Implemented a module to detect backchannel segments, and updated the coalesce module to ignore them when coalescing same-speaker turns

This commit is contained in:
2026-04-27 19:49:25 -05:00
parent aab6d12730
commit bbfb8aba44
10 changed files with 360 additions and 6 deletions

View File

@@ -137,6 +137,55 @@ func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) {
}
}
func TestApplyDropsBackchannelCategoryFromMergedSameSpeakerRun(t *testing.T) {
first := segment("a.json", 0, "Alice", 1, 2, "yeah")
first.Categories = []string{"backchannel"}
second := segment("a.json", 1, "Alice", 2.5, 3, "more")
got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3)
if len(got.Segments) != 1 {
t.Fatalf("segment count = %d, want 1", len(got.Segments))
}
if got.Segments[0].Categories != nil {
t.Fatalf("categories = %v, want nil", got.Segments[0].Categories)
}
}
func TestApplySkipsDifferentSpeakerBackchannelAsMergeBlocker(t *testing.T) {
first := segment("a.json", 0, "Alice", 1, 2, "first")
backchannel := segment("b.json", 0, "Bob", 2.2, 2.5, "yeah")
backchannel.Categories = []string{"backchannel"}
second := segment("a.json", 1, "Alice", 3, 4, "second")
got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, backchannel, second}}, 3)
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
t.Fatalf("summary = %#v", summary)
}
if len(got.Segments) != 2 {
t.Fatalf("segment count = %d, want 2", len(got.Segments))
}
if got.Segments[0].Text != "first second" {
t.Fatalf("first output text = %q, want first second", got.Segments[0].Text)
}
if got.Segments[1].Text != "yeah" || !reflect.DeepEqual(got.Segments[1].Categories, []string{"backchannel"}) {
t.Fatalf("second output segment = %#v", got.Segments[1])
}
}
func TestApplyDifferentSpeakerNonBackchannelStillBlocksMerge(t *testing.T) {
first := segment("a.json", 0, "Alice", 1, 2, "first")
bob := segment("b.json", 0, "Bob", 2.2, 2.5, "interruption")
second := segment("a.json", 1, "Alice", 3, 4, "second")
got, summary := Apply(model.MergedTranscript{Segments: []model.Segment{first, bob, second}}, 3)
if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 {
t.Fatalf("summary = %#v", summary)
}
if len(got.Segments) != 3 {
t.Fatalf("segment count = %d, want 3", len(got.Segments))
}
}
func segment(source string, sourceIndex int, speaker string, start float64, end float64, text string) model.Segment {
return model.Segment{
Source: source,