Fixed a text duplication bug in the resolve-overlaps module

This commit is contained in:
2026-04-29 07:00:18 -05:00
parent cc80a123ef
commit cc02a7a01e
2 changed files with 60 additions and 3 deletions

View File

@@ -333,7 +333,7 @@ func TestResolveSkipsContextSegmentReferencedByAnotherOverlapGroup(t *testing.T)
"c.json#0": {},
}
resolved, err := resolveGroup(merged, merged.OverlapGroups[0], refToIndex, overlapRefs, 10, 0.4, 3)
resolved, err := resolveGroup(merged, merged.OverlapGroups[0], refToIndex, overlapRefs, map[string]struct{}{}, 10, 0.4, 3)
if err != nil {
t.Fatalf("resolve failed: %v", err)
}
@@ -597,6 +597,37 @@ func TestResolveDoesNotReorderWordRunsOutsideWindow(t *testing.T) {
}
}
func TestResolveDoesNotReuseContextSegmentAcrossGroups(t *testing.T) {
merged := model.MergedTranscript{
Segments: []model.Segment{
segmentWithWords("a.json", 0, "Alice", 1.0, 1.1, word("alpha", 1.0, 1.05)),
segmentWithWords("b.json", 0, "Bob", 1.05, 1.15, word("beta", 1.05, 1.1)),
segmentWithWords("a.json", 1, "Alice", 2.0, 2.1, word("shared", 2.0, 2.05)),
segmentWithWords("c.json", 0, "Carol", 4.0, 4.1, word("gamma", 4.0, 4.05)),
segmentWithWords("a.json", 2, "Alice", 4.2, 4.3, word("delta", 4.2, 4.25)),
},
OverlapGroups: []model.OverlapGroup{
group(1, 1.0, 1.15, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
group(2, 4.0, 4.3, []string{"c.json#0", "a.json#2"}, []string{"Carol", "Alice"}),
},
}
got, _, err := Resolve(merged, 0.75, 0.4, 3)
if err != nil {
t.Fatalf("resolve failed: %v", err)
}
sharedCount := 0
for _, segment := range got.Segments {
if segment.Text == "shared" {
sharedCount++
}
}
if sharedCount != 1 {
t.Fatalf("shared context segment was reused %d time(s); want 1", sharedCount)
}
}
func TestResolveReordersTransitiveNearStartClustersByDuration(t *testing.T) {
merged := model.MergedTranscript{
Segments: []model.Segment{