Implemented a new internal/danglers package with deterministic two-pass dangling-end then dangling-start resolution
This commit is contained in:
@@ -95,6 +95,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
"resolve-overlaps",
|
||||
"backchannel",
|
||||
"filler",
|
||||
"resolve-danglers",
|
||||
"coalesce",
|
||||
"detect-overlaps",
|
||||
"autocorrect",
|
||||
@@ -701,6 +702,128 @@ func TestMergeCoalesceGapOverridePreventsMerge(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeResolveDanglersMergesDanglingEnd(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1,
|
||||
"end": 4,
|
||||
"text": "main tail.",
|
||||
"words": [
|
||||
{"word": "main", "start": 1, "end": 1.1},
|
||||
{"word": "tail.", "start": 3, "end": 3.1}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1.5,
|
||||
"end": 2,
|
||||
"text": "interruption",
|
||||
"words": [
|
||||
{"word": "interruption", "start": 1.5, "end": 2}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
reportPath := filepath.Join(dir, "report.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputA,
|
||||
"--input-file", inputB,
|
||||
"--speakers", speakers,
|
||||
"--output-file", output,
|
||||
"--report-file", reportPath,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if transcript.Segments[0].Speaker != "Alice" || transcript.Segments[0].Text != "main tail." {
|
||||
t.Fatalf("first segment = %#v, want Alice merged dangling end", transcript.Segments[0])
|
||||
}
|
||||
if transcript.Segments[0].ID != 1 || transcript.Segments[1].ID != 2 {
|
||||
t.Fatalf("ids not sequential after resolve-danglers: %#v", transcript.Segments)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
if !hasReportEvent(rpt, "postprocessing", "resolve-danglers", "merged 1 dangling segment(s) into 1 target segment(s)") {
|
||||
t.Fatal("expected resolve-danglers report event")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeResolveDanglersMergesDanglingStart(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1,
|
||||
"end": 4,
|
||||
"text": "start target words",
|
||||
"words": [
|
||||
{"word": "start", "start": 1, "end": 1.1},
|
||||
{"word": "target", "start": 3, "end": 3.1},
|
||||
{"word": "words", "start": 3.2, "end": 3.3}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1.5,
|
||||
"end": 2,
|
||||
"text": "interruption",
|
||||
"words": [
|
||||
{"word": "interruption", "start": 1.5, "end": 2}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputA,
|
||||
"--input-file", inputB,
|
||||
"--speakers", speakers,
|
||||
"--output-file", output,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
found := false
|
||||
for _, segment := range transcript.Segments {
|
||||
if segment.Speaker == "Alice" && segment.Text == "start target words" {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatalf("expected resolved dangling start in output, got %#v", transcript.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeTagsBackchannelSegments(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", `{
|
||||
|
||||
Reference in New Issue
Block a user