Implemented a module to detect filler segments, and skip them for purposes of same-speaker segment coalescing

This commit is contained in:
2026-04-27 19:58:55 -05:00
parent bbfb8aba44
commit fb0519c561
9 changed files with 319 additions and 14 deletions

View File

@@ -27,24 +27,24 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ
coalescedID := 0
current := newRun(in.Segments[0])
pendingBackchannels := make([]model.Segment, 0)
pendingSkipped := make([]model.Segment, 0)
for _, segment := range in.Segments[1:] {
if current.canMerge(segment, gap) {
current.add(segment)
continue
}
if segment.Speaker != current.speaker() && hasCategory(segment, "backchannel") {
pendingBackchannels = append(pendingBackchannels, segment)
if segment.Speaker != current.speaker() && hasAnyCategory(segment, "backchannel", "filler") {
pendingSkipped = append(pendingSkipped, segment)
continue
}
coalescedID = appendRun(&out, current, coalescedID, &summary)
out.Segments = append(out.Segments, pendingBackchannels...)
pendingBackchannels = pendingBackchannels[:0]
out.Segments = append(out.Segments, pendingSkipped...)
pendingSkipped = pendingSkipped[:0]
current = newRun(segment)
}
coalescedID = appendRun(&out, current, coalescedID, &summary)
out.Segments = append(out.Segments, pendingBackchannels...)
out.Segments = append(out.Segments, pendingSkipped...)
return out, summary
}
@@ -129,10 +129,12 @@ func segmentRef(segment model.Segment) string {
return segment.Source
}
func hasCategory(segment model.Segment, category string) bool {
func hasAnyCategory(segment model.Segment, categories ...string) bool {
for _, existing := range segment.Categories {
if existing == category {
return true
for _, category := range categories {
if existing == category {
return true
}
}
}
return false