Minor updates to overlap detection and segment coalescing logic

This commit is contained in:
2026-04-28 14:11:38 -05:00
parent 28c2eea340
commit a3ca6665a9
14 changed files with 662 additions and 95 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
@@ -317,7 +318,7 @@ func TestMergeResolvesOverlapGroupsWithWordRuns(t *testing.T) {
t.Fatalf("segment count = %d, want %d", got, want)
}
wantTexts := []string{"hello there", "bob reply", "later"}
wantTexts := []string{"outside hello there", "bob reply", "later"}
wantSpeakers := []string{"Alice", "Bob", "Alice"}
wantRefs := []string{"word-run:1:1:1", "word-run:1:2:1", "word-run:1:1:2"}
for index, segment := range transcript.Segments {
@@ -1496,6 +1497,86 @@ func TestMergeResolutionPreservesUntimedWordText(t *testing.T) {
}
}
func TestMergeResolveOverlapsAbsorbsNearbyContext(t *testing.T) {
dir := t.TempDir()
inputA := writeJSONFile(t, dir, "a.json", `{
"segments": [
{
"start": 9,
"end": 9.95,
"text": "before",
"words": [
{"word": "before", "start": 9.7, "end": 9.9}
]
},
{
"start": 10,
"end": 11,
"text": "inside",
"words": [
{"word": "inside", "start": 10.5, "end": 10.7}
]
},
{
"start": 11.1,
"end": 12,
"text": "after",
"words": [
{"word": "after", "start": 11.2, "end": 11.3}
]
}
]
}`)
inputB := writeJSONFile(t, dir, "b.json", `{
"segments": [
{
"start": 10.2,
"end": 11,
"text": "bob",
"words": [
{"word": "bob", "start": 10.4, "end": 10.6}
]
}
]
}`)
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
- speaker: Alice
match: ["a.json"]
- speaker: Bob
match: ["b.json"]
`)
output := filepath.Join(dir, "merged.json")
err := executeMerge(
"--input-file", inputA,
"--input-file", inputB,
"--speakers", speakers,
"--output-file", output,
)
if err != nil {
t.Fatalf("merge failed: %v", err)
}
var transcript model.FinalTranscript
readJSON(t, output, &transcript)
var aliceSegments []model.Segment
for _, segment := range transcript.Segments {
if segment.Speaker == "Alice" {
aliceSegments = append(aliceSegments, segment)
}
}
if len(aliceSegments) != 1 {
t.Fatalf("Alice segment count = %d, want 1: %#v", len(aliceSegments), aliceSegments)
}
if aliceSegments[0].Text != "before inside after" {
t.Fatalf("Alice text = %q", aliceSegments[0].Text)
}
if !reflect.DeepEqual(aliceSegments[0].DerivedFrom, []string{inputA + "#0", inputA + "#1", inputA + "#2"}) {
t.Fatalf("Alice derived_from = %v", aliceSegments[0].DerivedFrom)
}
}
func TestInvalidTimingFails(t *testing.T) {
tests := []struct {
name string