Added a module to coalesce adjacent same-speaker segments
This commit is contained in:
@@ -2,6 +2,7 @@ package overlap
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
@@ -14,7 +15,7 @@ func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -39,7 +40,7 @@ func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) {
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -93,7 +94,7 @@ func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) {
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 10)
|
||||
got, _, err := Resolve(merged, 10, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -116,7 +117,7 @@ func TestResolveWordRunGapThreshold(t *testing.T) {
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -141,7 +142,7 @@ func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) {
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -178,7 +179,7 @@ func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) {
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -201,7 +202,7 @@ func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -238,7 +239,7 @@ func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -273,7 +274,7 @@ func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -299,7 +300,7 @@ func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -311,6 +312,93 @@ func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReordersNearStartWordRunsByDuration(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("short", 1.2, 1.3)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "short,long" {
|
||||
t.Fatalf("segment order = %s, want short,long", gotTexts(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Start != 1.2 || got.Segments[0].End != 1.3 {
|
||||
t.Fatalf("short segment bounds changed: %#v", got.Segments[0])
|
||||
}
|
||||
if got.Segments[1].SourceRef != "word-run:1:1:1" || got.Segments[1].Text != "long" {
|
||||
t.Fatalf("long segment provenance/text changed: %#v", got.Segments[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveDoesNotReorderWordRunsOutsideWindow(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("short", 1.5, 1.6)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "long,short" {
|
||||
t.Fatalf("segment order = %s, want long,short", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReordersTransitiveNearStartClustersByDuration(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("medium", 1.3, 1.8)),
|
||||
segmentWithWords("c.json", 0, "Carol", 1, 3, word("short", 1.65, 1.75)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0", "b.json#0", "c.json#0"}, []string{"Alice", "Bob", "Carol"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "short,medium,long" {
|
||||
t.Fatalf("segment order = %s, want short,medium,long", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReorderFallsBackToDeterministicOrderForEqualDurations(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("bob", 1, 1.5)),
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("alice", 1.2, 1.7)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"b.json#0", "a.json#0"}, []string{"Bob", "Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "bob,alice" {
|
||||
t.Fatalf("segment order = %s, want bob,alice", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func segmentWithWords(source string, sourceIndex int, speaker string, start float64, end float64, words ...model.Word) model.Segment {
|
||||
segment := segment(source, sourceIndex, speaker, start, end)
|
||||
segment.Words = words
|
||||
@@ -326,6 +414,14 @@ func word(text string, start float64, end float64) model.Word {
|
||||
}
|
||||
}
|
||||
|
||||
func gotTexts(segments []model.Segment) string {
|
||||
texts := make([]string, 0, len(segments))
|
||||
for _, segment := range segments {
|
||||
texts = append(texts, segment.Text)
|
||||
}
|
||||
return strings.Join(texts, ",")
|
||||
}
|
||||
|
||||
func untimedWord(text string) model.Word {
|
||||
return model.Word{
|
||||
Text: text,
|
||||
|
||||
Reference in New Issue
Block a user