Implemented an overlap detection module in the postprocessing chain

This commit is contained in:
2026-04-26 20:39:49 -05:00
parent f9ca80f2e8
commit e42a2326e8
8 changed files with 464 additions and 4 deletions

View File

@@ -149,6 +149,74 @@ func TestMergeTieBreakOrder(t *testing.T) {
}
}
func TestMergeDetectsOverlapGroups(t *testing.T) {
dir := t.TempDir()
inputA := writeJSONFile(t, dir, "a.json", `{
"segments": [
{"start": 1, "end": 5, "text": "alice long"},
{"start": 2, "end": 3, "text": "alice nested"}
]
}`)
inputB := writeJSONFile(t, dir, "b.json", `{
"segments": [
{"start": 4, "end": 6, "text": "bob overlap"}
]
}`)
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
- speaker: Alice
match: ["a.json"]
- speaker: Bob
match: ["b.json"]
`)
output := filepath.Join(dir, "merged.json")
reportPath := filepath.Join(dir, "report.json")
err := executeMerge(
"--input-file", inputB,
"--input-file", inputA,
"--speakers", speakers,
"--output-file", output,
"--report-file", reportPath,
)
if err != nil {
t.Fatalf("merge failed: %v", err)
}
var transcript model.FinalTranscript
readJSON(t, output, &transcript)
if len(transcript.OverlapGroups) != 1 {
t.Fatalf("overlap group count = %d, want 1", len(transcript.OverlapGroups))
}
group := transcript.OverlapGroups[0]
if group.ID != 1 {
t.Fatalf("group ID = %d, want 1", group.ID)
}
if group.Start != 1 || group.End != 6 {
t.Fatalf("group bounds = %f-%f, want 1-6", group.Start, group.End)
}
wantRefs := []string{inputA + "#0", inputA + "#1", inputB + "#0"}
if !equalStrings(group.Segments, wantRefs) {
t.Fatalf("group refs = %v, want %v", group.Segments, wantRefs)
}
if !equalStrings(group.Speakers, []string{"Alice", "Bob"}) {
t.Fatalf("group speakers = %v, want [Alice Bob]", group.Speakers)
}
if group.Class != "unknown" || group.Resolution != "unresolved" {
t.Fatalf("unexpected group class/resolution: %q/%q", group.Class, group.Resolution)
}
for index, segment := range transcript.Segments {
if segment.OverlapGroupID != 1 {
t.Fatalf("segment %d overlap group ID = %d, want 1", index, segment.OverlapGroupID)
}
}
var rpt report.Report
readJSON(t, reportPath, &rpt)
if !hasReportEvent(rpt, "postprocessing", "detect-overlaps", "detected 1 overlap group(s)") {
t.Fatal("expected detect-overlaps report event")
}
}
func TestSpeakerMatchingUsesFirstMatchingRuleCaseInsensitive(t *testing.T) {
dir := t.TempDir()
input := writeJSONFile(t, dir, "2026-04-19-Adam_Rakestraw.json", `{