183 lines
4.7 KiB
Go
183 lines
4.7 KiB
Go
package coalesce
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
// Summary records deterministic counters for a coalesce pass.
|
|
type Summary struct {
|
|
OriginalSegmentsMerged int
|
|
CoalescedSegments int
|
|
}
|
|
|
|
// Apply merges adjacent same-speaker segments in the transcript's current order.
|
|
func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summary) {
|
|
if len(in.Segments) < 2 {
|
|
return in, Summary{}
|
|
}
|
|
|
|
out := model.MergedTranscript{
|
|
Segments: make([]model.Segment, 0, len(in.Segments)),
|
|
OverlapGroups: in.OverlapGroups,
|
|
}
|
|
summary := Summary{}
|
|
coalescedID := 0
|
|
|
|
current := newRun(in.Segments[0])
|
|
pendingSkipped := make([]model.Segment, 0)
|
|
for _, segment := range in.Segments[1:] {
|
|
if segment.Speaker == current.speaker() {
|
|
if current.canMerge(segment, gap) {
|
|
current.add(segment)
|
|
continue
|
|
}
|
|
} else if isSkippableInterjection(segment) {
|
|
pendingSkipped = append(pendingSkipped, segment)
|
|
continue
|
|
}
|
|
|
|
coalescedID = appendRun(&out, current, coalescedID, &summary)
|
|
if seeded, remaining, ok := seedRunFromPending(pendingSkipped, segment, gap); ok {
|
|
pendingSkipped = remaining
|
|
current = seeded
|
|
continue
|
|
}
|
|
out.Segments = append(out.Segments, pendingSkipped...)
|
|
pendingSkipped = pendingSkipped[:0]
|
|
current = newRun(segment)
|
|
}
|
|
coalescedID = appendRun(&out, current, coalescedID, &summary)
|
|
out.Segments = append(out.Segments, pendingSkipped...)
|
|
|
|
return out, summary
|
|
}
|
|
|
|
type run struct {
|
|
segments []model.Segment
|
|
}
|
|
|
|
func newRun(segment model.Segment) run {
|
|
return run{
|
|
segments: []model.Segment{segment},
|
|
}
|
|
}
|
|
|
|
func (r run) canMerge(next model.Segment, gap float64) bool {
|
|
current := r.segments[len(r.segments)-1]
|
|
return current.Speaker == next.Speaker && next.Start-current.End <= gap
|
|
}
|
|
|
|
func (r run) speaker() string {
|
|
return r.segments[0].Speaker
|
|
}
|
|
|
|
func (r *run) add(segment model.Segment) {
|
|
r.segments = append(r.segments, segment)
|
|
}
|
|
|
|
func seedRunFromPending(pending []model.Segment, segment model.Segment, gap float64) (run, []model.Segment, bool) {
|
|
for start := range pending {
|
|
if pending[start].Speaker != segment.Speaker {
|
|
continue
|
|
}
|
|
|
|
candidate := newRun(pending[start])
|
|
selected := map[int]struct{}{start: {}}
|
|
for index := start + 1; index < len(pending); index++ {
|
|
if pending[index].Speaker == segment.Speaker && candidate.canMerge(pending[index], gap) {
|
|
candidate.add(pending[index])
|
|
selected[index] = struct{}{}
|
|
}
|
|
}
|
|
if !candidate.canMerge(segment, gap) {
|
|
continue
|
|
}
|
|
|
|
candidate.add(segment)
|
|
remaining := make([]model.Segment, 0, len(pending)-len(selected))
|
|
for index, skipped := range pending {
|
|
if _, ok := selected[index]; ok {
|
|
continue
|
|
}
|
|
remaining = append(remaining, skipped)
|
|
}
|
|
return candidate, remaining, true
|
|
}
|
|
return run{}, pending, false
|
|
}
|
|
|
|
func appendRun(out *model.MergedTranscript, current run, coalescedID int, summary *Summary) int {
|
|
if len(current.segments) == 1 {
|
|
out.Segments = append(out.Segments, current.segments[0])
|
|
return coalescedID
|
|
}
|
|
|
|
coalescedID++
|
|
out.Segments = append(out.Segments, current.coalescedSegment(coalescedID))
|
|
summary.OriginalSegmentsMerged += len(current.segments)
|
|
summary.CoalescedSegments++
|
|
return coalescedID
|
|
}
|
|
|
|
func (r run) coalescedSegment(id int) model.Segment {
|
|
first := r.segments[0]
|
|
merged := model.Segment{
|
|
Source: first.Source,
|
|
SourceRef: fmt.Sprintf("coalesce:%d", id),
|
|
DerivedFrom: make([]string, 0, len(r.segments)),
|
|
Speaker: first.Speaker,
|
|
Start: first.Start,
|
|
End: first.End,
|
|
Words: make([]model.Word, 0),
|
|
}
|
|
|
|
text := make([]string, 0, len(r.segments))
|
|
for _, segment := range r.segments {
|
|
if segment.Start < merged.Start {
|
|
merged.Start = segment.Start
|
|
}
|
|
if segment.End > merged.End {
|
|
merged.End = segment.End
|
|
}
|
|
if segment.Source != merged.Source {
|
|
merged.Source = "derived"
|
|
}
|
|
if trimmed := strings.TrimSpace(segment.Text); trimmed != "" {
|
|
text = append(text, trimmed)
|
|
}
|
|
merged.Words = append(merged.Words, segment.Words...)
|
|
merged.DerivedFrom = append(merged.DerivedFrom, segmentRef(segment))
|
|
}
|
|
|
|
merged.Text = strings.Join(text, " ")
|
|
return merged
|
|
}
|
|
|
|
func segmentRef(segment model.Segment) string {
|
|
if segment.SourceSegmentIndex != nil {
|
|
return fmt.Sprintf("%s#%d", segment.Source, *segment.SourceSegmentIndex)
|
|
}
|
|
if segment.SourceRef != "" {
|
|
return segment.SourceRef
|
|
}
|
|
return segment.Source
|
|
}
|
|
|
|
func isSkippableInterjection(segment model.Segment) bool {
|
|
return hasAnyCategory(segment, "backchannel", "filler")
|
|
}
|
|
|
|
func hasAnyCategory(segment model.Segment, categories ...string) bool {
|
|
for _, existing := range segment.Categories {
|
|
for _, category := range categories {
|
|
if existing == category {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|