Added a module to coalesce adjacent same-speaker segments

This commit is contained in:
2026-04-27 19:30:00 -05:00
parent 13d972cb24
commit aab6d12730
12 changed files with 919 additions and 28 deletions

View File

@@ -18,7 +18,7 @@ type ResolutionSummary struct {
// Resolve replaces detected overlap-group segments with word-run segments when
// word-level timing is available.
func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscript, ResolutionSummary, error) {
func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64) (model.MergedTranscript, ResolutionSummary, error) {
summary := ResolutionSummary{
GroupsProcessed: len(in.OverlapGroups),
}
@@ -35,9 +35,10 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
clearAnnotationRefs := make(map[string]struct{})
removeGroupIDs := make(map[int]struct{})
replacements := make([]model.Segment, 0)
replacementOrder := make(map[string]replacementOrder)
for _, group := range in.OverlapGroups {
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap)
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap, wordRunReorderWindow)
if err != nil {
return model.MergedTranscript{}, ResolutionSummary{}, err
}
@@ -48,6 +49,9 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
summary.GroupsChanged++
removeGroupIDs[group.ID] = struct{}{}
replacements = append(replacements, resolved.replacements...)
for sourceRef, order := range resolved.replacementOrder {
replacementOrder[sourceRef] = order
}
for _, ref := range group.Segments {
clearAnnotationRefs[ref] = struct{}{}
@@ -78,7 +82,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
}
segments = append(segments, replacements...)
sort.SliceStable(segments, func(i, j int) bool {
return model.SegmentLess(segments[i], segments[j])
return resolvedSegmentLess(segments[i], segments[j], replacementOrder)
})
overlapGroups := make([]model.OverlapGroup, 0, len(in.OverlapGroups)-len(removeGroupIDs))
@@ -96,8 +100,15 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
}
type resolvedGroup struct {
removeRefs []string
replacements []model.Segment
removeRefs []string
replacements []model.Segment
replacementOrder map[string]replacementOrder
}
type replacementOrder struct {
cluster string
rank int
anchor float64
}
type resolutionWord struct {
@@ -114,7 +125,7 @@ type wordRun struct {
end float64
}
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64) (resolvedGroup, error) {
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64, wordRunReorderWindow float64) (resolvedGroup, error) {
segmentsBySpeaker := make(map[string][]model.Segment)
refsBySpeaker := make(map[string][]string)
for _, ref := range group.Segments {
@@ -147,9 +158,76 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
}
}
resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow)
return resolved, nil
}
func reorderReplacementSegments(groupID int, replacements []model.Segment, wordRunReorderWindow float64) ([]model.Segment, map[string]replacementOrder) {
if len(replacements) == 0 {
return replacements, nil
}
ordered := append([]model.Segment(nil), replacements...)
sort.SliceStable(ordered, func(i, j int) bool {
return model.SegmentLess(ordered[i], ordered[j])
})
ranks := make(map[string]replacementOrder, len(ordered))
clusterStart := 0
clusterIndex := 1
for clusterStart < len(ordered) {
clusterEnd := clusterStart + 1
for clusterEnd < len(ordered) && ordered[clusterEnd].Start-ordered[clusterEnd-1].Start <= wordRunReorderWindow {
clusterEnd++
}
cluster := ordered[clusterStart:clusterEnd]
anchor := cluster[0].Start
sort.SliceStable(cluster, func(i, j int) bool {
leftDuration := cluster[i].End - cluster[i].Start
rightDuration := cluster[j].End - cluster[j].Start
if leftDuration != rightDuration {
return leftDuration < rightDuration
}
return model.SegmentLess(cluster[i], cluster[j])
})
clusterKey := fmt.Sprintf("%d:%d", groupID, clusterIndex)
for index := range cluster {
ranks[cluster[index].SourceRef] = replacementOrder{
cluster: clusterKey,
rank: index,
anchor: anchor,
}
}
clusterStart = clusterEnd
clusterIndex++
}
return ordered, ranks
}
func resolvedSegmentLess(left model.Segment, right model.Segment, replacementOrder map[string]replacementOrder) bool {
leftOrder, leftHasOrder := replacementOrder[left.SourceRef]
rightOrder, rightHasOrder := replacementOrder[right.SourceRef]
if leftHasOrder && rightHasOrder && leftOrder.cluster == rightOrder.cluster && leftOrder.rank != rightOrder.rank {
return leftOrder.rank < rightOrder.rank
}
leftStart := left.Start
if leftHasOrder {
leftStart = leftOrder.anchor
}
rightStart := right.Start
if rightHasOrder {
rightStart = rightOrder.anchor
}
if leftStart != rightStart {
return leftStart < rightStart
}
return model.SegmentLess(left, right)
}
func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][]model.Segment) []string {
seen := make(map[string]struct{}, len(group.Speakers))
speakers := make([]string, 0, len(group.Speakers))