Minor updates to overlap detection and segment coalescing logic

This commit is contained in:
2026-04-28 14:11:38 -05:00
parent 28c2eea340
commit a3ca6665a9
14 changed files with 662 additions and 95 deletions

View File

@@ -18,7 +18,7 @@ type ResolutionSummary struct {
// Resolve replaces detected overlap-group segments with word-run segments when
// word-level timing is available.
func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64) (model.MergedTranscript, ResolutionSummary, error) {
func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (model.MergedTranscript, ResolutionSummary, error) {
summary := ResolutionSummary{
GroupsProcessed: len(in.OverlapGroups),
}
@@ -30,6 +30,12 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
for index, segment := range in.Segments {
refToIndex[SegmentRef(segment)] = index
}
overlapRefs := make(map[string]struct{})
for _, group := range in.OverlapGroups {
for _, ref := range group.Segments {
overlapRefs[ref] = struct{}{}
}
}
removeRefs := make(map[string]struct{})
clearAnnotationRefs := make(map[string]struct{})
@@ -38,7 +44,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
replacementOrder := make(map[string]replacementOrder)
for _, group := range in.OverlapGroups {
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap, wordRunReorderWindow)
resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, wordRunGap, wordRunReorderWindow, contextWindow)
if err != nil {
return model.MergedTranscript{}, ResolutionSummary{}, err
}
@@ -125,15 +131,39 @@ type wordRun struct {
end float64
}
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64, wordRunReorderWindow float64) (resolvedGroup, error) {
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) {
segmentsBySpeaker := make(map[string][]model.Segment)
refsBySpeaker := make(map[string][]string)
groupRefs := make(map[string]struct{}, len(group.Segments))
groupSpeakers := make(map[string]struct{})
for _, ref := range group.Segments {
index, exists := refToIndex[ref]
if !exists {
return resolvedGroup{}, fmt.Errorf("overlap group %d references missing segment %q", group.ID, ref)
}
groupRefs[ref] = struct{}{}
segment := in.Segments[index]
groupSpeakers[segment.Speaker] = struct{}{}
}
expandedStart := group.Start - contextWindow
expandedEnd := group.End + contextWindow
for _, segment := range in.Segments {
ref := SegmentRef(segment)
if _, exists := groupRefs[ref]; !exists {
if _, exists := overlapRefs[ref]; exists {
continue
}
if _, exists := groupSpeakers[segment.Speaker]; !exists {
continue
}
if !intervalIntersects(segment.Start, segment.End, expandedStart, expandedEnd) {
continue
}
if !segmentNearGroupBoundary(segment, group, contextWindow) {
continue
}
}
segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment)
refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref)
}
@@ -141,7 +171,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
speakers := groupSpeakerOrder(group, segmentsBySpeaker)
resolved := resolvedGroup{}
for speakerIndex, speaker := range speakers {
timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], group.Start, group.End)
timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], expandedStart, expandedEnd)
if len(timedWords) == 0 {
continue
}
@@ -162,6 +192,25 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
return resolved, nil
}
func intervalIntersects(start float64, end float64, windowStart float64, windowEnd float64) bool {
return end > windowStart && start < windowEnd
}
func segmentNearGroupBoundary(segment model.Segment, group model.OverlapGroup, window float64) bool {
return withinWindow(segment.Start, group.Start, window) ||
withinWindow(segment.End, group.Start, window) ||
withinWindow(segment.Start, group.End, window) ||
withinWindow(segment.End, group.End, window)
}
func withinWindow(value float64, boundary float64, window float64) bool {
diff := value - boundary
if diff < 0 {
diff = -diff
}
return diff <= window
}
func reorderReplacementSegments(groupID int, replacements []model.Segment, wordRunReorderWindow float64) ([]model.Segment, map[string]replacementOrder) {
if len(replacements) == 0 {
return replacements, nil