Minor updates to overlap detection and segment coalescing logic

This commit is contained in:
2026-04-28 14:11:38 -05:00
parent 28c2eea340
commit a3ca6665a9
14 changed files with 662 additions and 95 deletions

View File

@@ -57,17 +57,20 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ
type run struct {
segments []model.Segment
start float64
end float64
}
func newRun(segment model.Segment) run {
return run{
segments: []model.Segment{segment},
start: segment.Start,
end: segment.End,
}
}
func (r run) canMerge(next model.Segment, gap float64) bool {
current := r.segments[len(r.segments)-1]
return current.Speaker == next.Speaker && next.Start-current.End <= gap
return r.speaker() == next.Speaker && next.Start-r.end <= gap
}
func (r run) speaker() string {
@@ -76,6 +79,12 @@ func (r run) speaker() string {
func (r *run) add(segment model.Segment) {
r.segments = append(r.segments, segment)
if segment.Start < r.start {
r.start = segment.Start
}
if segment.End > r.end {
r.end = segment.End
}
}
func seedRunFromPending(pending []model.Segment, segment model.Segment, gap float64) (run, []model.Segment, bool) {
@@ -129,19 +138,13 @@ func (r run) coalescedSegment(id int) model.Segment {
SourceRef: fmt.Sprintf("coalesce:%d", id),
DerivedFrom: make([]string, 0, len(r.segments)),
Speaker: first.Speaker,
Start: first.Start,
End: first.End,
Start: r.start,
End: r.end,
Words: make([]model.Word, 0),
}
text := make([]string, 0, len(r.segments))
for _, segment := range r.segments {
if segment.Start < merged.Start {
merged.Start = segment.Start
}
if segment.End > merged.End {
merged.End = segment.End
}
if segment.Source != merged.Source {
merged.Source = "derived"
}