package overlap import ( "reflect" "testing" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 0, "Alice", 1, 2, word("hello", 1.1, 1.2)), }, } got, summary, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if !reflect.DeepEqual(got, merged) { t.Fatalf("expected no-op result:\ngot %#v\nwant %#v", got, merged) } if summary.GroupsProcessed != 0 || summary.GroupsChanged != 0 { t.Fatalf("unexpected summary: %#v", summary) } } func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 0, "Alice", 1, 5, word("A1", 1.1, 1.2), word("A2", 1.8, 2.0)), segmentWithWords("b.json", 0, "Bob", 1.5, 4, word("B1", 1.55, 1.7), word("B2", 2.6, 2.8)), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}), }, } merged.Segments[0].OverlapGroupID = 1 merged.Segments[1].OverlapGroupID = 1 got, summary, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if summary.GroupsProcessed != 1 || summary.GroupsChanged != 1 || summary.OriginalsRemoved != 2 || summary.ReplacementsCreated != 3 { t.Fatalf("unexpected summary: %#v", summary) } if len(got.OverlapGroups) != 0 { t.Fatalf("expected resolved group to be removed, got %#v", got.OverlapGroups) } gotTexts := []string{got.Segments[0].Text, got.Segments[1].Text, got.Segments[2].Text} wantTexts := []string{"A1 A2", "B1", "B2"} if !reflect.DeepEqual(gotTexts, wantTexts) { t.Fatalf("texts = %v, want %v", gotTexts, wantTexts) } for _, segment := range got.Segments { if segment.ID != 0 { t.Fatalf("replacement segment has ID %d, want 0", segment.ID) } if segment.SourceSegmentIndex != nil { t.Fatalf("replacement segment source index = %d, want nil", *segment.SourceSegmentIndex) } if segment.OverlapGroupID != 0 { t.Fatalf("replacement segment overlap group ID = %d, want 0", segment.OverlapGroupID) } if segment.SourceRef == "" { t.Fatal("replacement segment missing source_ref") } } } func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords( "a.json", 0, "Alice", 9, 21, word("before", 9.5, 10), word("left-edge", 9.9, 10.1), word("inside", 11, 11.2), word("right-edge", 19.9, 20.1), word("after", 20, 20.2), ), }, OverlapGroups: []model.OverlapGroup{ group(1, 10, 20, []string{"a.json#0"}, []string{"Alice"}), }, } merged.Segments[0].OverlapGroupID = 1 got, _, err := Resolve(merged, 10) if err != nil { t.Fatalf("resolve failed: %v", err) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } if got.Segments[0].Text != "left-edge inside right-edge" { t.Fatalf("text = %q", got.Segments[0].Text) } } func TestResolveWordRunGapThreshold(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 0, "Alice", 1, 4, word("one", 1, 1.1), word("two", 1.85, 2), word("three", 2.8, 3)), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}), }, } merged.Segments[0].OverlapGroupID = 1 got, _, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[0].Text != "one two" || got.Segments[1].Text != "three" { t.Fatalf("unexpected replacement texts: %#v", got.Segments) } } func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 0, "Alice", 1, 5, word("hello", 1.2, 1.4)), segmentWithWords("b.json", 0, "Bob", 2, 4), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}), }, } merged.Segments[0].OverlapGroupID = 1 merged.Segments[1].OverlapGroupID = 1 got, summary, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if summary.OriginalsRemoved != 1 || summary.ReplacementsCreated != 1 { t.Fatalf("unexpected summary: %#v", summary) } if len(got.OverlapGroups) != 0 { t.Fatalf("expected changed group to be removed, got %#v", got.OverlapGroups) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[0].Text != "hello" || got.Segments[1].Text != "Bob" { t.Fatalf("unexpected segment texts: %#v", got.Segments) } if got.Segments[1].SourceSegmentIndex == nil { t.Fatal("kept original should retain source_segment_index") } if got.Segments[1].OverlapGroupID != 0 { t.Fatalf("kept original overlap group ID = %d, want 0", got.Segments[1].OverlapGroupID) } } func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 0, "Alice", 1, 5), segmentWithWords("b.json", 0, "Bob", 2, 4), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}), }, } merged.Segments[0].OverlapGroupID = 1 merged.Segments[1].OverlapGroupID = 1 got, summary, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if summary.GroupsChanged != 0 || summary.OriginalsRemoved != 0 || summary.ReplacementsCreated != 0 { t.Fatalf("unexpected summary: %#v", summary) } if !reflect.DeepEqual(got, merged) { t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged) } } func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 1, "Alice", 1, 3, word("second", 1.5, 1.6)), segmentWithWords("a.json", 0, "Alice", 1, 3, word("first", 1.1, 1.2)), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 3, []string{"a.json#1", "a.json#0"}, []string{"Alice"}), }, } got, _, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } segment := got.Segments[0] if segment.SourceRef != "word-run:1:1:1" { t.Fatalf("source_ref = %q", segment.SourceRef) } if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) { t.Fatalf("derived_from = %v", segment.DerivedFrom) } } func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords( "a.json", 0, "Alice", 1, 3, untimedWord("pre"), word("one", 1.1, 1.2), untimedWord("middle"), word("two", 1.4, 1.5), untimedWord("post"), ), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}), }, } got, _, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } segment := got.Segments[0] if segment.Text != "pre one middle two post" { t.Fatalf("text = %q", segment.Text) } if segment.Start != 1.1 || segment.End != 1.5 { t.Fatalf("bounds = %f-%f, want 1.1-1.5", segment.Start, segment.End) } } func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords( "a.json", 0, "Alice", 1, 4, word("one", 1, 1.1), untimedWord("middle"), word("two", 2, 2.1), ), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}), }, } got, _, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[0].Text != "one middle" || got.Segments[1].Text != "two" { t.Fatalf("unexpected texts: %#v", got.Segments) } if got.Segments[0].End != 1.1 || got.Segments[1].Start != 2 { t.Fatalf("untimed word changed bounds: %#v", got.Segments) } } func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords("a.json", 0, "Alice", 1, 3, untimedWord("hello")), }, OverlapGroups: []model.OverlapGroup{ group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}), }, } merged.Segments[0].OverlapGroupID = 1 got, summary, err := Resolve(merged, 0.75) if err != nil { t.Fatalf("resolve failed: %v", err) } if summary.GroupsChanged != 0 { t.Fatalf("unexpected summary: %#v", summary) } if !reflect.DeepEqual(got, merged) { t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged) } } func segmentWithWords(source string, sourceIndex int, speaker string, start float64, end float64, words ...model.Word) model.Segment { segment := segment(source, sourceIndex, speaker, start, end) segment.Words = words return segment } func word(text string, start float64, end float64) model.Word { return model.Word{ Text: text, Start: start, End: end, Timed: true, } } func untimedWord(text string) model.Word { return model.Word{ Text: text, } } func group(id int, start float64, end float64, refs []string, speakers []string) model.OverlapGroup { return model.OverlapGroup{ ID: id, Start: start, End: end, Segments: refs, Speakers: speakers, Class: defaultClass, Resolution: defaultResolution, } }