package coalesce import ( "reflect" "testing" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) func TestApplyMergesConsecutiveSameSpeakerWithinGap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 2, " first "), segment("a.json", 1, "Alice", 4, 5, "second"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } segment := got.Segments[0] if segment.Text != "first second" { t.Fatalf("text = %q", segment.Text) } if segment.Start != 1 || segment.End != 5 { t.Fatalf("bounds = %f-%f, want 1-5", segment.Start, segment.End) } if segment.Source != "a.json" { t.Fatalf("source = %q, want a.json", segment.Source) } if segment.SourceRef != "coalesce:1" { t.Fatalf("source_ref = %q, want coalesce:1", segment.SourceRef) } if segment.SourceSegmentIndex != nil { t.Fatalf("source_segment_index = %d, want nil", *segment.SourceSegmentIndex) } if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) { t.Fatalf("derived_from = %v", segment.DerivedFrom) } } func TestApplyDoesNotMergeSameSpeakerBeyondGap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 2, "first"), segment("a.json", 1, "Alice", 5.1, 6, "second"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 { t.Fatalf("summary = %#v", summary) } if !reflect.DeepEqual(got.Segments, merged.Segments) { t.Fatalf("segments changed:\ngot %#v\nwant %#v", got.Segments, merged.Segments) } } func TestApplyDoesNotMergeAcrossDifferentSpeaker(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 2, "first"), segment("b.json", 0, "Bob", 2.5, 3, "bob"), segment("a.json", 1, "Alice", 3.5, 4, "second"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 3 { t.Fatalf("segment count = %d, want 3", len(got.Segments)) } } func TestApplyMergesNegativeGapOverlap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 4, "first"), segment("a.json", 1, "Alice", 3, 5, "second"), }, } got, summary := Apply(merged, 0) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if got.Segments[0].Start != 1 || got.Segments[0].End != 5 { t.Fatalf("bounds = %f-%f, want 1-5", got.Segments[0].Start, got.Segments[0].End) } } func TestApplyHonorsCurrentOrder(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 10, 11, "later"), segment("a.json", 1, "Alice", 1, 2, "earlier"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if got.Segments[0].Text != "later earlier" { t.Fatalf("text = %q, want current-order merge", got.Segments[0].Text) } if got.Segments[0].Start != 1 || got.Segments[0].End != 11 { t.Fatalf("bounds = %f-%f, want 1-11", got.Segments[0].Start, got.Segments[0].End) } } func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "first") second := model.Segment{ Source: "b.json", SourceRef: "word-run:1:1:1", DerivedFrom: []string{"b.json#0"}, Speaker: "Alice", Start: 2.5, End: 3, Text: "second", } got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3) segment := got.Segments[0] if segment.Source != "derived" { t.Fatalf("source = %q, want derived", segment.Source) } if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "word-run:1:1:1"}) { t.Fatalf("derived_from = %v", segment.DerivedFrom) } } func TestApplyDropsBackchannelCategoryFromMergedSameSpeakerRun(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "yeah") first.Categories = []string{"backchannel"} second := segment("a.json", 1, "Alice", 2.5, 3, "more") got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3) if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } if got.Segments[0].Categories != nil { t.Fatalf("categories = %v, want nil", got.Segments[0].Categories) } } func TestApplyDropsFillerCategoryFromMergedSameSpeakerRun(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "um") first.Categories = []string{"filler"} second := segment("a.json", 1, "Alice", 2.5, 3, "more") got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3) if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } if got.Segments[0].Categories != nil { t.Fatalf("categories = %v, want nil", got.Segments[0].Categories) } } func TestApplyMergesSameSpeakerBackchannelIntoDerivedRun(t *testing.T) { first := segment("zach.json", 110, "Zach", 7811.778, 7812.478, "That makes sense.") first.Categories = []string{"backchannel"} second := model.Segment{ Source: "zach.json", SourceRef: "coalesce:347", DerivedFrom: []string{"zach.json#111", "zach.json#112"}, Speaker: "Zach", Start: 7812.498, End: 7824.045, Text: "So, like, I'm above the silence field.", } got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } if got.Segments[0].Text != "That makes sense. So, like, I'm above the silence field." { t.Fatalf("text = %q", got.Segments[0].Text) } if got.Segments[0].SourceRef != "coalesce:1" { t.Fatalf("source_ref = %q, want coalesce:1", got.Segments[0].SourceRef) } if !reflect.DeepEqual(got.Segments[0].DerivedFrom, []string{"zach.json#110", "coalesce:347"}) { t.Fatalf("derived_from = %v", got.Segments[0].DerivedFrom) } if got.Segments[0].Categories != nil { t.Fatalf("categories = %v, want nil", got.Segments[0].Categories) } } func TestApplyMergesSameSpeakerFillerIntoDerivedRun(t *testing.T) { first := segment("zach.json", 110, "Zach", 1, 1.5, "um") first.Categories = []string{"filler"} second := model.Segment{ Source: "zach.json", SourceRef: "coalesce:12", DerivedFrom: []string{"zach.json#111", "zach.json#112"}, Speaker: "Zach", Start: 1.6, End: 4, Text: "next thought", } got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } if got.Segments[0].Text != "um next thought" { t.Fatalf("text = %q", got.Segments[0].Text) } if !reflect.DeepEqual(got.Segments[0].DerivedFrom, []string{"zach.json#110", "coalesce:12"}) { t.Fatalf("derived_from = %v", got.Segments[0].DerivedFrom) } if got.Segments[0].Categories != nil { t.Fatalf("categories = %v, want nil", got.Segments[0].Categories) } } func TestApplyUsesSkippedBackchannelToSeedNextSameSpeakerRun(t *testing.T) { mike := segment("mike.json", 367, "Mike", 7803.57, 7810.719, "It's very easy to notice.") backchannel := segment("zach.json", 110, "Zach", 7811.778, 7812.478, "That makes sense.") backchannel.Categories = []string{"backchannel"} next := segment("zach.json", 111, "Zach", 7812.498, 7820, "So, like, next thought.") got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{mike, backchannel, next}}, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[0].Text != "It's very easy to notice." { t.Fatalf("first text = %q", got.Segments[0].Text) } if got.Segments[1].Speaker != "Zach" || got.Segments[1].Text != "That makes sense. So, like, next thought." { t.Fatalf("second segment = %#v", got.Segments[1]) } if !reflect.DeepEqual(got.Segments[1].DerivedFrom, []string{"zach.json#110", "zach.json#111"}) { t.Fatalf("derived_from = %v", got.Segments[1].DerivedFrom) } if got.Segments[1].Categories != nil { t.Fatalf("categories = %v, want nil", got.Segments[1].Categories) } } func TestApplyUsesSkippedFillerToSeedNextSameSpeakerRun(t *testing.T) { alice := segment("alice.json", 0, "Alice", 1, 2, "first") filler := segment("bob.json", 0, "Bob", 2.1, 2.3, "um") filler.Categories = []string{"filler"} bob := segment("bob.json", 1, "Bob", 2.4, 4, "actual thought") got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{alice, filler, bob}}, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[1].Text != "um actual thought" { t.Fatalf("second text = %q", got.Segments[1].Text) } if got.Segments[1].Categories != nil { t.Fatalf("categories = %v, want nil", got.Segments[1].Categories) } } func TestApplySkipsDifferentSpeakerBackchannelAsMergeBlocker(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "first") backchannel := segment("b.json", 0, "Bob", 2.2, 2.5, "yeah") backchannel.Categories = []string{"backchannel"} second := segment("a.json", 1, "Alice", 3, 4, "second") got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, backchannel, second}}, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[0].Text != "first second" { t.Fatalf("first output text = %q, want first second", got.Segments[0].Text) } if got.Segments[1].Text != "yeah" || !reflect.DeepEqual(got.Segments[1].Categories, []string{"backchannel"}) { t.Fatalf("second output segment = %#v", got.Segments[1]) } } func TestApplySkipsDifferentSpeakerFillerAsMergeBlocker(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "first") filler := segment("b.json", 0, "Bob", 2.2, 2.5, "um") filler.Categories = []string{"filler"} second := segment("a.json", 1, "Alice", 3, 4, "second") got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, filler, second}}, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(got.Segments)) } if got.Segments[0].Text != "first second" { t.Fatalf("first output text = %q, want first second", got.Segments[0].Text) } if got.Segments[1].Text != "um" || !reflect.DeepEqual(got.Segments[1].Categories, []string{"filler"}) { t.Fatalf("second output segment = %#v", got.Segments[1]) } } func TestApplyDifferentSpeakerNonBackchannelStillBlocksMerge(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "first") bob := segment("b.json", 0, "Bob", 2.2, 2.5, "interruption") second := segment("a.json", 1, "Alice", 3, 4, "second") got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, bob, second}}, 3) if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 3 { t.Fatalf("segment count = %d, want 3", len(got.Segments)) } } func segment(source string, sourceIndex int, speaker string, start float64, end float64, text string) model.Segment { return model.Segment{ Source: source, SourceSegmentIndex: intPtr(sourceIndex), Speaker: speaker, Start: start, End: end, Text: text, Words: []model.Word{ {Text: text, Start: start, End: end, Timed: true}, }, } } func intPtr(value int) *int { return &value }