681 lines
21 KiB
Go
681 lines
21 KiB
Go
package overlap
|
|
|
|
import (
|
|
"reflect"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 2, word("hello", 1.1, 1.2)),
|
|
},
|
|
}
|
|
|
|
got, summary, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if !reflect.DeepEqual(got, merged) {
|
|
t.Fatalf("expected no-op result:\ngot %#v\nwant %#v", got, merged)
|
|
}
|
|
if summary.GroupsProcessed != 0 || summary.GroupsChanged != 0 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
}
|
|
|
|
func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 5, word("A1", 1.1, 1.2), word("A2", 1.8, 2.0)),
|
|
segmentWithWords("b.json", 0, "Bob", 1.5, 4, word("B1", 1.55, 1.7), word("B2", 2.6, 2.8)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 1
|
|
merged.Segments[1].OverlapGroupID = 1
|
|
|
|
got, summary, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if summary.GroupsProcessed != 1 || summary.GroupsChanged != 1 || summary.OriginalsRemoved != 2 || summary.ReplacementsCreated != 3 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
if len(got.OverlapGroups) != 0 {
|
|
t.Fatalf("expected resolved group to be removed, got %#v", got.OverlapGroups)
|
|
}
|
|
|
|
gotTexts := []string{got.Segments[0].Text, got.Segments[1].Text, got.Segments[2].Text}
|
|
wantTexts := []string{"A1 A2", "B1", "B2"}
|
|
if !reflect.DeepEqual(gotTexts, wantTexts) {
|
|
t.Fatalf("texts = %v, want %v", gotTexts, wantTexts)
|
|
}
|
|
for _, segment := range got.Segments {
|
|
if segment.ID != 0 {
|
|
t.Fatalf("replacement segment has ID %d, want 0", segment.ID)
|
|
}
|
|
if segment.SourceSegmentIndex != nil {
|
|
t.Fatalf("replacement segment source index = %d, want nil", *segment.SourceSegmentIndex)
|
|
}
|
|
if segment.OverlapGroupID != 0 {
|
|
t.Fatalf("replacement segment overlap group ID = %d, want 0", segment.OverlapGroupID)
|
|
}
|
|
if segment.SourceRef == "" {
|
|
t.Fatal("replacement segment missing source_ref")
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestResolvePreservesAllWordsFromSelectedOverlapSegment(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords(
|
|
"a.json",
|
|
0,
|
|
"Alice",
|
|
9,
|
|
21,
|
|
word("before", 9.5, 10),
|
|
word("left-edge", 9.9, 10.1),
|
|
word("inside", 11, 11.2),
|
|
word("right-edge", 19.9, 20.1),
|
|
word("after", 20, 20.2),
|
|
),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 20, []string{"a.json#0"}, []string{"Alice"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 1
|
|
|
|
got, _, err := Resolve(merged, 10, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if len(got.Segments) != 1 {
|
|
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
|
}
|
|
if got.Segments[0].Text != "before left-edge inside right-edge after" {
|
|
t.Fatalf("text = %q", got.Segments[0].Text)
|
|
}
|
|
}
|
|
|
|
func TestResolvePreservesPrefixWordsFromAbsorbedContextSegment(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords(
|
|
"a.json",
|
|
0,
|
|
"Alice",
|
|
7,
|
|
9.5,
|
|
word("prefix", 7.2, 7.4),
|
|
word("near", 9.2, 9.4),
|
|
),
|
|
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "prefix near inside,bob" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
}
|
|
|
|
func TestResolvePreservesSuffixWordsFromAbsorbedContextSegment(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords(
|
|
"a.json",
|
|
1,
|
|
"Alice",
|
|
12.5,
|
|
16,
|
|
word("near", 12.7, 12.9),
|
|
word("suffix", 15.6, 15.8),
|
|
),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "bob,inside near suffix" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
}
|
|
|
|
func TestResolveOutOfWindowWordsAffectWordRunBoundaries(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords(
|
|
"a.json",
|
|
0,
|
|
"Alice",
|
|
7,
|
|
9.5,
|
|
word("early", 7.2, 7.4),
|
|
word("near", 9.2, 9.4),
|
|
),
|
|
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "early,near,bob,inside" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
}
|
|
|
|
func TestResolveIncludesContextWordsAroundOverlapWindow(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 7.5, 9.5, word("before", 8.5, 8.7)),
|
|
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords("a.json", 2, "Alice", 12.5, 13.5, word("after", 13, 13.2)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
merged.Segments[1].OverlapGroupID = 1
|
|
merged.Segments[3].OverlapGroupID = 1
|
|
|
|
got, summary, err := Resolve(merged, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if summary.GroupsChanged != 1 || summary.OriginalsRemoved != 4 || summary.ReplacementsCreated != 2 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
if gotTexts(got.Segments) != "before inside after,bob" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
alice := got.Segments[0]
|
|
if alice.Start != 8.5 || alice.End != 13.2 {
|
|
t.Fatalf("context bounds = %f-%f, want 8.5-13.2", alice.Start, alice.End)
|
|
}
|
|
if !reflect.DeepEqual(alice.DerivedFrom, []string{"a.json#0", "a.json#1", "a.json#2"}) {
|
|
t.Fatalf("derived_from = %v", alice.DerivedFrom)
|
|
}
|
|
}
|
|
|
|
func TestResolveDoesNotIncludeContextOutsideWindow(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 5, 6.9, word("outside", 6, 6.2)),
|
|
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "Alice,bob,inside" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
if got.Segments[0].SourceSegmentIndex == nil || *got.Segments[0].SourceSegmentIndex != 0 {
|
|
t.Fatalf("outside context segment was not preserved: %#v", got.Segments[0])
|
|
}
|
|
}
|
|
|
|
func TestResolveDoesNotIncludeNearbyNonGroupSpeakerContext(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 10, 12, word("alice", 10.5, 10.7)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
segmentWithWords("c.json", 0, "Carol", 12.5, 13.5, word("carol", 13, 13.2)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "bob,alice,Carol" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
if got.Segments[2].SourceSegmentIndex == nil || *got.Segments[2].SourceSegmentIndex != 0 {
|
|
t.Fatalf("non-group speaker context segment was not preserved: %#v", got.Segments[2])
|
|
}
|
|
}
|
|
|
|
func TestResolveRemovesIncludedContextSegmentsForReplacedSpeaker(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 8, 9, word("before", 8.5, 8.7)),
|
|
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
merged.Segments[1].OverlapGroupID = 1
|
|
merged.Segments[2].OverlapGroupID = 1
|
|
|
|
got, summary, err := Resolve(merged, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if summary.OriginalsRemoved != 2 || summary.ReplacementsCreated != 1 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
if gotTexts(got.Segments) != "before inside,Bob" {
|
|
t.Fatalf("segment texts = %s", gotTexts(got.Segments))
|
|
}
|
|
if got.Segments[1].OverlapGroupID != 0 {
|
|
t.Fatalf("kept original group annotation = %d, want 0", got.Segments[1].OverlapGroupID)
|
|
}
|
|
}
|
|
|
|
func TestResolveSkipsContextSegmentReferencedByAnotherOverlapGroup(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 8, 9, word("other-group", 8.5, 8.7)),
|
|
segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)),
|
|
segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)),
|
|
segmentWithWords("c.json", 0, "Carol", 8.5, 9.5),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
group(2, 8, 9.5, []string{"a.json#0", "c.json#0"}, []string{"Alice", "Carol"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 2
|
|
merged.Segments[1].OverlapGroupID = 1
|
|
merged.Segments[2].OverlapGroupID = 1
|
|
merged.Segments[3].OverlapGroupID = 2
|
|
|
|
refToIndex := map[string]int{}
|
|
for index, segment := range merged.Segments {
|
|
refToIndex[SegmentRef(segment)] = index
|
|
}
|
|
overlapRefs := map[string]struct{}{
|
|
"a.json#0": {},
|
|
"a.json#1": {},
|
|
"b.json#0": {},
|
|
"c.json#0": {},
|
|
}
|
|
|
|
resolved, err := resolveGroup(merged, merged.OverlapGroups[0], refToIndex, overlapRefs, 10, 0.4, 3)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if !reflect.DeepEqual(resolved.removeRefs, []string{"a.json#1", "b.json#0"}) {
|
|
t.Fatalf("remove refs = %v", resolved.removeRefs)
|
|
}
|
|
if gotTexts(resolved.replacements) != "bob,inside" {
|
|
t.Fatalf("replacement texts = %s", gotTexts(resolved.replacements))
|
|
}
|
|
}
|
|
|
|
func TestResolveWordRunGapThreshold(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 4, word("one", 1, 1.1), word("two", 1.85, 2), word("three", 2.8, 3)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 1
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if len(got.Segments) != 2 {
|
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
|
}
|
|
if got.Segments[0].Text != "one two" || got.Segments[1].Text != "three" {
|
|
t.Fatalf("unexpected replacement texts: %#v", got.Segments)
|
|
}
|
|
}
|
|
|
|
func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 5, word("hello", 1.2, 1.4)),
|
|
segmentWithWords("b.json", 0, "Bob", 2, 4),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 1
|
|
merged.Segments[1].OverlapGroupID = 1
|
|
|
|
got, summary, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if summary.OriginalsRemoved != 1 || summary.ReplacementsCreated != 1 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
if len(got.OverlapGroups) != 0 {
|
|
t.Fatalf("expected changed group to be removed, got %#v", got.OverlapGroups)
|
|
}
|
|
if len(got.Segments) != 2 {
|
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
|
}
|
|
if got.Segments[0].Text != "hello" || got.Segments[1].Text != "Bob" {
|
|
t.Fatalf("unexpected segment texts: %#v", got.Segments)
|
|
}
|
|
if got.Segments[1].SourceSegmentIndex == nil {
|
|
t.Fatal("kept original should retain source_segment_index")
|
|
}
|
|
if got.Segments[1].OverlapGroupID != 0 {
|
|
t.Fatalf("kept original overlap group ID = %d, want 0", got.Segments[1].OverlapGroupID)
|
|
}
|
|
}
|
|
|
|
func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 5),
|
|
segmentWithWords("b.json", 0, "Bob", 2, 4),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 1
|
|
merged.Segments[1].OverlapGroupID = 1
|
|
|
|
got, summary, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if summary.GroupsChanged != 0 || summary.OriginalsRemoved != 0 || summary.ReplacementsCreated != 0 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
if !reflect.DeepEqual(got, merged) {
|
|
t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged)
|
|
}
|
|
}
|
|
|
|
func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 1, "Alice", 1, 3, word("second", 1.5, 1.6)),
|
|
segmentWithWords("a.json", 0, "Alice", 1, 3, word("first", 1.1, 1.2)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"a.json#1", "a.json#0"}, []string{"Alice"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if len(got.Segments) != 1 {
|
|
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
|
}
|
|
segment := got.Segments[0]
|
|
if segment.SourceRef != "word-run:1:1:1" {
|
|
t.Fatalf("source_ref = %q", segment.SourceRef)
|
|
}
|
|
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) {
|
|
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
|
}
|
|
}
|
|
|
|
func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords(
|
|
"a.json",
|
|
0,
|
|
"Alice",
|
|
1,
|
|
3,
|
|
untimedWord("pre"),
|
|
word("one", 1.1, 1.2),
|
|
untimedWord("middle"),
|
|
word("two", 1.4, 1.5),
|
|
untimedWord("post"),
|
|
),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if len(got.Segments) != 1 {
|
|
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
|
}
|
|
segment := got.Segments[0]
|
|
if segment.Text != "pre one middle two post" {
|
|
t.Fatalf("text = %q", segment.Text)
|
|
}
|
|
if segment.Start != 1.1 || segment.End != 1.5 {
|
|
t.Fatalf("bounds = %f-%f, want 1.1-1.5", segment.Start, segment.End)
|
|
}
|
|
}
|
|
|
|
func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords(
|
|
"a.json",
|
|
0,
|
|
"Alice",
|
|
1,
|
|
4,
|
|
word("one", 1, 1.1),
|
|
untimedWord("middle"),
|
|
word("two", 2, 2.1),
|
|
),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if len(got.Segments) != 2 {
|
|
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
|
}
|
|
if got.Segments[0].Text != "one middle" || got.Segments[1].Text != "two" {
|
|
t.Fatalf("unexpected texts: %#v", got.Segments)
|
|
}
|
|
if got.Segments[0].End != 1.1 || got.Segments[1].Start != 2 {
|
|
t.Fatalf("untimed word changed bounds: %#v", got.Segments)
|
|
}
|
|
}
|
|
|
|
func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 3, untimedWord("hello")),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}),
|
|
},
|
|
}
|
|
merged.Segments[0].OverlapGroupID = 1
|
|
|
|
got, summary, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if summary.GroupsChanged != 0 {
|
|
t.Fatalf("unexpected summary: %#v", summary)
|
|
}
|
|
if !reflect.DeepEqual(got, merged) {
|
|
t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged)
|
|
}
|
|
}
|
|
|
|
func TestResolveReordersNearStartWordRunsByDuration(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
|
segmentWithWords("b.json", 0, "Bob", 1, 3, word("short", 1.2, 1.3)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "short,long" {
|
|
t.Fatalf("segment order = %s, want short,long", gotTexts(got.Segments))
|
|
}
|
|
if got.Segments[0].Start != 1.2 || got.Segments[0].End != 1.3 {
|
|
t.Fatalf("short segment bounds changed: %#v", got.Segments[0])
|
|
}
|
|
if got.Segments[1].SourceRef != "word-run:1:1:1" || got.Segments[1].Text != "long" {
|
|
t.Fatalf("long segment provenance/text changed: %#v", got.Segments[1])
|
|
}
|
|
}
|
|
|
|
func TestResolveDoesNotReorderWordRunsOutsideWindow(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
|
segmentWithWords("b.json", 0, "Bob", 1, 3, word("short", 1.5, 1.6)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "long,short" {
|
|
t.Fatalf("segment order = %s, want long,short", gotTexts(got.Segments))
|
|
}
|
|
}
|
|
|
|
func TestResolveReordersTransitiveNearStartClustersByDuration(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
|
segmentWithWords("b.json", 0, "Bob", 1, 3, word("medium", 1.3, 1.8)),
|
|
segmentWithWords("c.json", 0, "Carol", 1, 3, word("short", 1.65, 1.75)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"a.json#0", "b.json#0", "c.json#0"}, []string{"Alice", "Bob", "Carol"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "short,medium,long" {
|
|
t.Fatalf("segment order = %s, want short,medium,long", gotTexts(got.Segments))
|
|
}
|
|
}
|
|
|
|
func TestResolveReorderFallsBackToDeterministicOrderForEqualDurations(t *testing.T) {
|
|
merged := model.MergedTranscript{
|
|
Segments: []model.Segment{
|
|
segmentWithWords("b.json", 0, "Bob", 1, 3, word("bob", 1, 1.5)),
|
|
segmentWithWords("a.json", 0, "Alice", 1, 3, word("alice", 1.2, 1.7)),
|
|
},
|
|
OverlapGroups: []model.OverlapGroup{
|
|
group(1, 1, 3, []string{"b.json#0", "a.json#0"}, []string{"Bob", "Alice"}),
|
|
},
|
|
}
|
|
|
|
got, _, err := Resolve(merged, 0.75, 0.4, 0)
|
|
if err != nil {
|
|
t.Fatalf("resolve failed: %v", err)
|
|
}
|
|
if gotTexts(got.Segments) != "bob,alice" {
|
|
t.Fatalf("segment order = %s, want bob,alice", gotTexts(got.Segments))
|
|
}
|
|
}
|
|
|
|
func segmentWithWords(source string, sourceIndex int, speaker string, start float64, end float64, words ...model.Word) model.Segment {
|
|
segment := segment(source, sourceIndex, speaker, start, end)
|
|
segment.Words = words
|
|
return segment
|
|
}
|
|
|
|
func word(text string, start float64, end float64) model.Word {
|
|
return model.Word{
|
|
Text: text,
|
|
Start: start,
|
|
End: end,
|
|
Timed: true,
|
|
}
|
|
}
|
|
|
|
func gotTexts(segments []model.Segment) string {
|
|
texts := make([]string, 0, len(segments))
|
|
for _, segment := range segments {
|
|
texts = append(texts, segment.Text)
|
|
}
|
|
return strings.Join(texts, ",")
|
|
}
|
|
|
|
func untimedWord(text string) model.Word {
|
|
return model.Word{
|
|
Text: text,
|
|
}
|
|
}
|
|
|
|
func group(id int, start float64, end float64, refs []string, speakers []string) model.OverlapGroup {
|
|
return model.OverlapGroup{
|
|
ID: id,
|
|
Start: start,
|
|
End: end,
|
|
Segments: refs,
|
|
Speakers: speakers,
|
|
Class: defaultClass,
|
|
Resolution: defaultResolution,
|
|
}
|
|
}
|