package coalesce import ( "reflect" "testing" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) func TestApplyMergesConsecutiveSameSpeakerWithinGap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 2, " first "), segment("a.json", 1, "Alice", 4, 5, "second"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } segment := got.Segments[0] if segment.Text != "first second" { t.Fatalf("text = %q", segment.Text) } if segment.Start != 1 || segment.End != 5 { t.Fatalf("bounds = %f-%f, want 1-5", segment.Start, segment.End) } if segment.Source != "a.json" { t.Fatalf("source = %q, want a.json", segment.Source) } if segment.SourceRef != "coalesce:1" { t.Fatalf("source_ref = %q, want coalesce:1", segment.SourceRef) } if segment.SourceSegmentIndex != nil { t.Fatalf("source_segment_index = %d, want nil", *segment.SourceSegmentIndex) } if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) { t.Fatalf("derived_from = %v", segment.DerivedFrom) } } func TestApplyDoesNotMergeSameSpeakerBeyondGap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 2, "first"), segment("a.json", 1, "Alice", 5.1, 6, "second"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 { t.Fatalf("summary = %#v", summary) } if !reflect.DeepEqual(got.Segments, merged.Segments) { t.Fatalf("segments changed:\ngot %#v\nwant %#v", got.Segments, merged.Segments) } } func TestApplyDoesNotMergeAcrossDifferentSpeaker(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 2, "first"), segment("b.json", 0, "Bob", 2.5, 3, "bob"), segment("a.json", 1, "Alice", 3.5, 4, "second"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 { t.Fatalf("summary = %#v", summary) } if len(got.Segments) != 3 { t.Fatalf("segment count = %d, want 3", len(got.Segments)) } } func TestApplyMergesNegativeGapOverlap(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 1, 4, "first"), segment("a.json", 1, "Alice", 3, 5, "second"), }, } got, summary := Apply(merged, 0) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if got.Segments[0].Start != 1 || got.Segments[0].End != 5 { t.Fatalf("bounds = %f-%f, want 1-5", got.Segments[0].Start, got.Segments[0].End) } } func TestApplyHonorsCurrentOrder(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segment("a.json", 0, "Alice", 10, 11, "later"), segment("a.json", 1, "Alice", 1, 2, "earlier"), }, } got, summary := Apply(merged, 3) if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { t.Fatalf("summary = %#v", summary) } if got.Segments[0].Text != "later earlier" { t.Fatalf("text = %q, want current-order merge", got.Segments[0].Text) } if got.Segments[0].Start != 1 || got.Segments[0].End != 11 { t.Fatalf("bounds = %f-%f, want 1-11", got.Segments[0].Start, got.Segments[0].End) } } func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "first") second := model.Segment{ Source: "b.json", SourceRef: "word-run:1:1:1", DerivedFrom: []string{"b.json#0"}, Speaker: "Alice", Start: 2.5, End: 3, Text: "second", } got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3) segment := got.Segments[0] if segment.Source != "derived" { t.Fatalf("source = %q, want derived", segment.Source) } if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "word-run:1:1:1"}) { t.Fatalf("derived_from = %v", segment.DerivedFrom) } } func segment(source string, sourceIndex int, speaker string, start float64, end float64, text string) model.Segment { return model.Segment{ Source: source, SourceSegmentIndex: intPtr(sourceIndex), Speaker: speaker, Start: start, End: end, Text: text, Words: []model.Word{ {Text: text, Start: start, End: end, Timed: true}, }, } } func intPtr(value int) *int { return &value }