Bugfix in the coalesce module

This commit is contained in:
2026-04-27 20:29:55 -05:00
parent fb0519c561
commit 8a95dba276
4 changed files with 248 additions and 6 deletions

View File

@@ -665,6 +665,94 @@ func TestMergeTagsFillerSegments(t *testing.T) {
}
}
func TestMergeCoalescesSameSpeakerBackchannelWithFollowingSegment(t *testing.T) {
dir := t.TempDir()
input := writeJSONFile(t, dir, "zach.json", `{
"segments": [
{"start": 1, "end": 1.7, "text": "That makes sense."},
{"start": 1.72, "end": 4, "text": "So, like, next thought."}
]
}`)
output := filepath.Join(dir, "merged.json")
err := executeMerge(
"--input-file", input,
"--output-file", output,
)
if err != nil {
t.Fatalf("merge failed: %v", err)
}
var transcript model.FinalTranscript
readJSON(t, output, &transcript)
if len(transcript.Segments) != 1 {
t.Fatalf("segment count = %d, want 1", len(transcript.Segments))
}
segment := transcript.Segments[0]
if segment.Text != "That makes sense. So, like, next thought." {
t.Fatalf("text = %q", segment.Text)
}
if segment.SourceRef != "coalesce:1" {
t.Fatalf("source_ref = %q, want coalesce:1", segment.SourceRef)
}
if !equalStrings(segment.DerivedFrom, []string{input + "#0", input + "#1"}) {
t.Fatalf("derived_from = %v", segment.DerivedFrom)
}
if len(segment.Categories) != 0 {
t.Fatalf("categories = %v, want none", segment.Categories)
}
}
func TestMergeCoalescesBackchannelAfterDifferentSpeakerIntoFollowingSameSpeakerSegment(t *testing.T) {
dir := t.TempDir()
inputA := writeJSONFile(t, dir, "mike.json", `{
"segments": [
{"start": 1, "end": 2, "text": "previous speaker"}
]
}`)
inputB := writeJSONFile(t, dir, "zach.json", `{
"segments": [
{"start": 2.5, "end": 3, "text": "That makes sense."},
{"start": 3.02, "end": 6, "text": "So, like, next thought."}
]
}`)
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
- speaker: Mike
match: ["mike.json"]
- speaker: Zach
match: ["zach.json"]
`)
output := filepath.Join(dir, "merged.json")
err := executeMerge(
"--input-file", inputA,
"--input-file", inputB,
"--speakers", speakers,
"--output-file", output,
)
if err != nil {
t.Fatalf("merge failed: %v", err)
}
var transcript model.FinalTranscript
readJSON(t, output, &transcript)
if len(transcript.Segments) != 2 {
t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
}
if transcript.Segments[0].Speaker != "Mike" || transcript.Segments[0].Text != "previous speaker" {
t.Fatalf("first segment = %#v, want Mike original", transcript.Segments[0])
}
if transcript.Segments[1].Speaker != "Zach" || transcript.Segments[1].Text != "That makes sense. So, like, next thought." {
t.Fatalf("second segment = %#v, want coalesced Zach", transcript.Segments[1])
}
if !equalStrings(transcript.Segments[1].DerivedFrom, []string{inputB + "#0", inputB + "#1"}) {
t.Fatalf("derived_from = %v", transcript.Segments[1].DerivedFrom)
}
if len(transcript.Segments[1].Categories) != 0 {
t.Fatalf("categories = %v, want none", transcript.Segments[1].Categories)
}
}
func TestMergeCoalescesAroundDifferentSpeakerBackchannel(t *testing.T) {
dir := t.TempDir()
inputA := writeJSONFile(t, dir, "a.json", `{