Added a module to coalesce adjacent same-speaker segments
This commit is contained in:
156
internal/coalesce/coalesce_test.go
Normal file
156
internal/coalesce/coalesce_test.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package coalesce
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
func TestApplyMergesConsecutiveSameSpeakerWithinGap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 2, " first "),
|
||||
segment("a.json", 1, "Alice", 4, 5, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
segment := got.Segments[0]
|
||||
if segment.Text != "first second" {
|
||||
t.Fatalf("text = %q", segment.Text)
|
||||
}
|
||||
if segment.Start != 1 || segment.End != 5 {
|
||||
t.Fatalf("bounds = %f-%f, want 1-5", segment.Start, segment.End)
|
||||
}
|
||||
if segment.Source != "a.json" {
|
||||
t.Fatalf("source = %q, want a.json", segment.Source)
|
||||
}
|
||||
if segment.SourceRef != "coalesce:1" {
|
||||
t.Fatalf("source_ref = %q, want coalesce:1", segment.SourceRef)
|
||||
}
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
t.Fatalf("source_segment_index = %d, want nil", *segment.SourceSegmentIndex)
|
||||
}
|
||||
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) {
|
||||
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyDoesNotMergeSameSpeakerBeyondGap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 2, "first"),
|
||||
segment("a.json", 1, "Alice", 5.1, 6, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Segments, merged.Segments) {
|
||||
t.Fatalf("segments changed:\ngot %#v\nwant %#v", got.Segments, merged.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyDoesNotMergeAcrossDifferentSpeaker(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 2, "first"),
|
||||
segment("b.json", 0, "Bob", 2.5, 3, "bob"),
|
||||
segment("a.json", 1, "Alice", 3.5, 4, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if len(got.Segments) != 3 {
|
||||
t.Fatalf("segment count = %d, want 3", len(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMergesNegativeGapOverlap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 4, "first"),
|
||||
segment("a.json", 1, "Alice", 3, 5, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 0)
|
||||
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if got.Segments[0].Start != 1 || got.Segments[0].End != 5 {
|
||||
t.Fatalf("bounds = %f-%f, want 1-5", got.Segments[0].Start, got.Segments[0].End)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyHonorsCurrentOrder(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 10, 11, "later"),
|
||||
segment("a.json", 1, "Alice", 1, 2, "earlier"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if got.Segments[0].Text != "later earlier" {
|
||||
t.Fatalf("text = %q, want current-order merge", got.Segments[0].Text)
|
||||
}
|
||||
if got.Segments[0].Start != 1 || got.Segments[0].End != 11 {
|
||||
t.Fatalf("bounds = %f-%f, want 1-11", got.Segments[0].Start, got.Segments[0].End)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) {
|
||||
first := segment("a.json", 0, "Alice", 1, 2, "first")
|
||||
second := model.Segment{
|
||||
Source: "b.json",
|
||||
SourceRef: "word-run:1:1:1",
|
||||
DerivedFrom: []string{"b.json#0"},
|
||||
Speaker: "Alice",
|
||||
Start: 2.5,
|
||||
End: 3,
|
||||
Text: "second",
|
||||
}
|
||||
|
||||
got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3)
|
||||
segment := got.Segments[0]
|
||||
if segment.Source != "derived" {
|
||||
t.Fatalf("source = %q, want derived", segment.Source)
|
||||
}
|
||||
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "word-run:1:1:1"}) {
|
||||
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||
}
|
||||
}
|
||||
|
||||
func segment(source string, sourceIndex int, speaker string, start float64, end float64, text string) model.Segment {
|
||||
return model.Segment{
|
||||
Source: source,
|
||||
SourceSegmentIndex: intPtr(sourceIndex),
|
||||
Speaker: speaker,
|
||||
Start: start,
|
||||
End: end,
|
||||
Text: text,
|
||||
Words: []model.Word{
|
||||
{Text: text, Start: start, End: end, Timed: true},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func intPtr(value int) *int {
|
||||
return &value
|
||||
}
|
||||
Reference in New Issue
Block a user