diff --git a/internal/overlap/resolve.go b/internal/overlap/resolve.go index db680ed..a59419d 100644 --- a/internal/overlap/resolve.go +++ b/internal/overlap/resolve.go @@ -36,6 +36,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow overlapRefs[ref] = struct{}{} } } + claimedContextRefs := make(map[string]struct{}) removeRefs := make(map[string]struct{}) clearAnnotationRefs := make(map[string]struct{}) @@ -44,7 +45,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow replacementOrder := make(map[string]replacementOrder) for _, group := range in.OverlapGroups { - resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, wordRunGap, wordRunReorderWindow, contextWindow) + resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, claimedContextRefs, wordRunGap, wordRunReorderWindow, contextWindow) if err != nil { return model.MergedTranscript{}, ResolutionSummary{}, err } @@ -58,6 +59,9 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow for sourceRef, order := range resolved.replacementOrder { replacementOrder[sourceRef] = order } + for _, ref := range resolved.contextRefs { + claimedContextRefs[ref] = struct{}{} + } for _, ref := range group.Segments { clearAnnotationRefs[ref] = struct{}{} @@ -107,6 +111,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow type resolvedGroup struct { removeRefs []string + contextRefs []string replacements []model.Segment replacementOrder map[string]replacementOrder } @@ -131,9 +136,10 @@ type wordRun struct { end float64 } -func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) { +func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, claimedContextRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) { segmentsBySpeaker := make(map[string][]model.Segment) refsBySpeaker := make(map[string][]string) + contextRefs := make([]string, 0) groupRefs := make(map[string]struct{}, len(group.Segments)) groupSpeakers := make(map[string]struct{}) for _, ref := range group.Segments { @@ -154,6 +160,9 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde if _, exists := overlapRefs[ref]; exists { continue } + if _, exists := claimedContextRefs[ref]; exists { + continue + } if _, exists := groupSpeakers[segment.Speaker]; !exists { continue } @@ -166,6 +175,9 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde } segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment) refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref) + if _, exists := groupRefs[ref]; !exists { + contextRefs = append(contextRefs, ref) + } } speakers := groupSpeakerOrder(group, segmentsBySpeaker) @@ -187,6 +199,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde resolved.replacements = append(resolved.replacements, replacementSegment(group.ID, speakerIndex+1, runIndex+1, speaker, run)) } } + resolved.contextRefs = uniqueStrings(contextRefs) resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow) return resolved, nil @@ -466,3 +479,16 @@ func uniqueSortedStrings(values []string) []string { sort.Strings(unique) return unique } + +func uniqueStrings(values []string) []string { + seen := make(map[string]struct{}, len(values)) + unique := make([]string, 0, len(values)) + for _, value := range values { + if _, exists := seen[value]; exists { + continue + } + seen[value] = struct{}{} + unique = append(unique, value) + } + return unique +} diff --git a/internal/overlap/resolve_test.go b/internal/overlap/resolve_test.go index 9d1922b..050f729 100644 --- a/internal/overlap/resolve_test.go +++ b/internal/overlap/resolve_test.go @@ -333,7 +333,7 @@ func TestResolveSkipsContextSegmentReferencedByAnotherOverlapGroup(t *testing.T) "c.json#0": {}, } - resolved, err := resolveGroup(merged, merged.OverlapGroups[0], refToIndex, overlapRefs, 10, 0.4, 3) + resolved, err := resolveGroup(merged, merged.OverlapGroups[0], refToIndex, overlapRefs, map[string]struct{}{}, 10, 0.4, 3) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -597,6 +597,37 @@ func TestResolveDoesNotReorderWordRunsOutsideWindow(t *testing.T) { } } +func TestResolveDoesNotReuseContextSegmentAcrossGroups(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 1.0, 1.1, word("alpha", 1.0, 1.05)), + segmentWithWords("b.json", 0, "Bob", 1.05, 1.15, word("beta", 1.05, 1.1)), + segmentWithWords("a.json", 1, "Alice", 2.0, 2.1, word("shared", 2.0, 2.05)), + segmentWithWords("c.json", 0, "Carol", 4.0, 4.1, word("gamma", 4.0, 4.05)), + segmentWithWords("a.json", 2, "Alice", 4.2, 4.3, word("delta", 4.2, 4.25)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 1.0, 1.15, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}), + group(2, 4.0, 4.3, []string{"c.json#0", "a.json#2"}, []string{"Carol", "Alice"}), + }, + } + + got, _, err := Resolve(merged, 0.75, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + + sharedCount := 0 + for _, segment := range got.Segments { + if segment.Text == "shared" { + sharedCount++ + } + } + if sharedCount != 1 { + t.Fatalf("shared context segment was reused %d time(s); want 1", sharedCount) + } +} + func TestResolveReordersTransitiveNearStartClustersByDuration(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{