Added initial segment overlap resolution logic
This commit is contained in:
345
internal/overlap/resolve_test.go
Normal file
345
internal/overlap/resolve_test.go
Normal file
@@ -0,0 +1,345 @@
|
||||
package overlap
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 2, word("hello", 1.1, 1.2)),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(got, merged) {
|
||||
t.Fatalf("expected no-op result:\ngot %#v\nwant %#v", got, merged)
|
||||
}
|
||||
if summary.GroupsProcessed != 0 || summary.GroupsChanged != 0 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 5, word("A1", 1.1, 1.2), word("A2", 1.8, 2.0)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1.5, 4, word("B1", 1.55, 1.7), word("B2", 2.6, 2.8)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.GroupsProcessed != 1 || summary.GroupsChanged != 1 || summary.OriginalsRemoved != 2 || summary.ReplacementsCreated != 3 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if len(got.OverlapGroups) != 0 {
|
||||
t.Fatalf("expected resolved group to be removed, got %#v", got.OverlapGroups)
|
||||
}
|
||||
|
||||
gotTexts := []string{got.Segments[0].Text, got.Segments[1].Text, got.Segments[2].Text}
|
||||
wantTexts := []string{"A1 A2", "B1", "B2"}
|
||||
if !reflect.DeepEqual(gotTexts, wantTexts) {
|
||||
t.Fatalf("texts = %v, want %v", gotTexts, wantTexts)
|
||||
}
|
||||
for _, segment := range got.Segments {
|
||||
if segment.ID != 0 {
|
||||
t.Fatalf("replacement segment has ID %d, want 0", segment.ID)
|
||||
}
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
t.Fatalf("replacement segment source index = %d, want nil", *segment.SourceSegmentIndex)
|
||||
}
|
||||
if segment.OverlapGroupID != 0 {
|
||||
t.Fatalf("replacement segment overlap group ID = %d, want 0", segment.OverlapGroupID)
|
||||
}
|
||||
if segment.SourceRef == "" {
|
||||
t.Fatal("replacement segment missing source_ref")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
9,
|
||||
21,
|
||||
word("before", 9.5, 10),
|
||||
word("left-edge", 9.9, 10.1),
|
||||
word("inside", 11, 11.2),
|
||||
word("right-edge", 19.9, 20.1),
|
||||
word("after", 20, 20.2),
|
||||
),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 10, 20, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "left-edge inside right-edge" {
|
||||
t.Fatalf("text = %q", got.Segments[0].Text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveWordRunGapThreshold(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 4, word("one", 1, 1.1), word("two", 1.85, 2), word("three", 2.8, 3)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "one two" || got.Segments[1].Text != "three" {
|
||||
t.Fatalf("unexpected replacement texts: %#v", got.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 5, word("hello", 1.2, 1.4)),
|
||||
segmentWithWords("b.json", 0, "Bob", 2, 4),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.OriginalsRemoved != 1 || summary.ReplacementsCreated != 1 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if len(got.OverlapGroups) != 0 {
|
||||
t.Fatalf("expected changed group to be removed, got %#v", got.OverlapGroups)
|
||||
}
|
||||
if len(got.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "hello" || got.Segments[1].Text != "Bob" {
|
||||
t.Fatalf("unexpected segment texts: %#v", got.Segments)
|
||||
}
|
||||
if got.Segments[1].SourceSegmentIndex == nil {
|
||||
t.Fatal("kept original should retain source_segment_index")
|
||||
}
|
||||
if got.Segments[1].OverlapGroupID != 0 {
|
||||
t.Fatalf("kept original overlap group ID = %d, want 0", got.Segments[1].OverlapGroupID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 5),
|
||||
segmentWithWords("b.json", 0, "Bob", 2, 4),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.GroupsChanged != 0 || summary.OriginalsRemoved != 0 || summary.ReplacementsCreated != 0 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if !reflect.DeepEqual(got, merged) {
|
||||
t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 1, "Alice", 1, 3, word("second", 1.5, 1.6)),
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("first", 1.1, 1.2)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#1", "a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
segment := got.Segments[0]
|
||||
if segment.SourceRef != "word-run:1:1:1" {
|
||||
t.Fatalf("source_ref = %q", segment.SourceRef)
|
||||
}
|
||||
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) {
|
||||
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
1,
|
||||
3,
|
||||
untimedWord("pre"),
|
||||
word("one", 1.1, 1.2),
|
||||
untimedWord("middle"),
|
||||
word("two", 1.4, 1.5),
|
||||
untimedWord("post"),
|
||||
),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
segment := got.Segments[0]
|
||||
if segment.Text != "pre one middle two post" {
|
||||
t.Fatalf("text = %q", segment.Text)
|
||||
}
|
||||
if segment.Start != 1.1 || segment.End != 1.5 {
|
||||
t.Fatalf("bounds = %f-%f, want 1.1-1.5", segment.Start, segment.End)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
1,
|
||||
4,
|
||||
word("one", 1, 1.1),
|
||||
untimedWord("middle"),
|
||||
word("two", 2, 2.1),
|
||||
),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "one middle" || got.Segments[1].Text != "two" {
|
||||
t.Fatalf("unexpected texts: %#v", got.Segments)
|
||||
}
|
||||
if got.Segments[0].End != 1.1 || got.Segments[1].Start != 2 {
|
||||
t.Fatalf("untimed word changed bounds: %#v", got.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, untimedWord("hello")),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.GroupsChanged != 0 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if !reflect.DeepEqual(got, merged) {
|
||||
t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged)
|
||||
}
|
||||
}
|
||||
|
||||
func segmentWithWords(source string, sourceIndex int, speaker string, start float64, end float64, words ...model.Word) model.Segment {
|
||||
segment := segment(source, sourceIndex, speaker, start, end)
|
||||
segment.Words = words
|
||||
return segment
|
||||
}
|
||||
|
||||
func word(text string, start float64, end float64) model.Word {
|
||||
return model.Word{
|
||||
Text: text,
|
||||
Start: start,
|
||||
End: end,
|
||||
Timed: true,
|
||||
}
|
||||
}
|
||||
|
||||
func untimedWord(text string) model.Word {
|
||||
return model.Word{
|
||||
Text: text,
|
||||
}
|
||||
}
|
||||
|
||||
func group(id int, start float64, end float64, refs []string, speakers []string) model.OverlapGroup {
|
||||
return model.OverlapGroup{
|
||||
ID: id,
|
||||
Start: start,
|
||||
End: end,
|
||||
Segments: refs,
|
||||
Speakers: speakers,
|
||||
Class: defaultClass,
|
||||
Resolution: defaultResolution,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user