Bugfix in the coalesce module
This commit is contained in:
@@ -29,16 +29,22 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ
|
||||
current := newRun(in.Segments[0])
|
||||
pendingSkipped := make([]model.Segment, 0)
|
||||
for _, segment := range in.Segments[1:] {
|
||||
if current.canMerge(segment, gap) {
|
||||
current.add(segment)
|
||||
continue
|
||||
}
|
||||
if segment.Speaker != current.speaker() && hasAnyCategory(segment, "backchannel", "filler") {
|
||||
if segment.Speaker == current.speaker() {
|
||||
if current.canMerge(segment, gap) {
|
||||
current.add(segment)
|
||||
continue
|
||||
}
|
||||
} else if isSkippableInterjection(segment) {
|
||||
pendingSkipped = append(pendingSkipped, segment)
|
||||
continue
|
||||
}
|
||||
|
||||
coalescedID = appendRun(&out, current, coalescedID, &summary)
|
||||
if seeded, remaining, ok := seedRunFromPending(pendingSkipped, segment, gap); ok {
|
||||
pendingSkipped = remaining
|
||||
current = seeded
|
||||
continue
|
||||
}
|
||||
out.Segments = append(out.Segments, pendingSkipped...)
|
||||
pendingSkipped = pendingSkipped[:0]
|
||||
current = newRun(segment)
|
||||
@@ -72,6 +78,37 @@ func (r *run) add(segment model.Segment) {
|
||||
r.segments = append(r.segments, segment)
|
||||
}
|
||||
|
||||
func seedRunFromPending(pending []model.Segment, segment model.Segment, gap float64) (run, []model.Segment, bool) {
|
||||
for start := range pending {
|
||||
if pending[start].Speaker != segment.Speaker {
|
||||
continue
|
||||
}
|
||||
|
||||
candidate := newRun(pending[start])
|
||||
selected := map[int]struct{}{start: {}}
|
||||
for index := start + 1; index < len(pending); index++ {
|
||||
if pending[index].Speaker == segment.Speaker && candidate.canMerge(pending[index], gap) {
|
||||
candidate.add(pending[index])
|
||||
selected[index] = struct{}{}
|
||||
}
|
||||
}
|
||||
if !candidate.canMerge(segment, gap) {
|
||||
continue
|
||||
}
|
||||
|
||||
candidate.add(segment)
|
||||
remaining := make([]model.Segment, 0, len(pending)-len(selected))
|
||||
for index, skipped := range pending {
|
||||
if _, ok := selected[index]; ok {
|
||||
continue
|
||||
}
|
||||
remaining = append(remaining, skipped)
|
||||
}
|
||||
return candidate, remaining, true
|
||||
}
|
||||
return run{}, pending, false
|
||||
}
|
||||
|
||||
func appendRun(out *model.MergedTranscript, current run, coalescedID int, summary *Summary) int {
|
||||
if len(current.segments) == 1 {
|
||||
out.Segments = append(out.Segments, current.segments[0])
|
||||
@@ -129,6 +166,10 @@ func segmentRef(segment model.Segment) string {
|
||||
return segment.Source
|
||||
}
|
||||
|
||||
func isSkippableInterjection(segment model.Segment) bool {
|
||||
return hasAnyCategory(segment, "backchannel", "filler")
|
||||
}
|
||||
|
||||
func hasAnyCategory(segment model.Segment, categories ...string) bool {
|
||||
for _, existing := range segment.Categories {
|
||||
for _, category := range categories {
|
||||
|
||||
Reference in New Issue
Block a user