Implemented a module to detect backchannel segments, and updated the coalesce module to ignore them when coalescing same-speaker turns

This commit is contained in:
2026-04-27 19:49:25 -05:00
parent aab6d12730
commit bbfb8aba44
10 changed files with 360 additions and 6 deletions

View File

@@ -27,16 +27,24 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ
coalescedID := 0
current := newRun(in.Segments[0])
pendingBackchannels := make([]model.Segment, 0)
for _, segment := range in.Segments[1:] {
if current.canMerge(segment, gap) {
current.add(segment)
continue
}
if segment.Speaker != current.speaker() && hasCategory(segment, "backchannel") {
pendingBackchannels = append(pendingBackchannels, segment)
continue
}
coalescedID = appendRun(&out, current, coalescedID, &summary)
out.Segments = append(out.Segments, pendingBackchannels...)
pendingBackchannels = pendingBackchannels[:0]
current = newRun(segment)
}
appendRun(&out, current, coalescedID, &summary)
coalescedID = appendRun(&out, current, coalescedID, &summary)
out.Segments = append(out.Segments, pendingBackchannels...)
return out, summary
}
@@ -56,6 +64,10 @@ func (r run) canMerge(next model.Segment, gap float64) bool {
return current.Speaker == next.Speaker && next.Start-current.End <= gap
}
func (r run) speaker() string {
return r.segments[0].Speaker
}
func (r *run) add(segment model.Segment) {
r.segments = append(r.segments, segment)
}
@@ -116,3 +128,12 @@ func segmentRef(segment model.Segment) string {
}
return segment.Source
}
func hasCategory(segment model.Segment, category string) bool {
for _, existing := range segment.Categories {
if existing == category {
return true
}
}
return false
}