Fixed a text duplication bug in the resolve-overlaps module

This commit is contained in:
2026-04-29 07:00:18 -05:00
parent cc80a123ef
commit cc02a7a01e
2 changed files with 60 additions and 3 deletions

View File

@@ -36,6 +36,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
overlapRefs[ref] = struct{}{}
}
}
claimedContextRefs := make(map[string]struct{})
removeRefs := make(map[string]struct{})
clearAnnotationRefs := make(map[string]struct{})
@@ -44,7 +45,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
replacementOrder := make(map[string]replacementOrder)
for _, group := range in.OverlapGroups {
resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, wordRunGap, wordRunReorderWindow, contextWindow)
resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, claimedContextRefs, wordRunGap, wordRunReorderWindow, contextWindow)
if err != nil {
return model.MergedTranscript{}, ResolutionSummary{}, err
}
@@ -58,6 +59,9 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
for sourceRef, order := range resolved.replacementOrder {
replacementOrder[sourceRef] = order
}
for _, ref := range resolved.contextRefs {
claimedContextRefs[ref] = struct{}{}
}
for _, ref := range group.Segments {
clearAnnotationRefs[ref] = struct{}{}
@@ -107,6 +111,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
type resolvedGroup struct {
removeRefs []string
contextRefs []string
replacements []model.Segment
replacementOrder map[string]replacementOrder
}
@@ -131,9 +136,10 @@ type wordRun struct {
end float64
}
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) {
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, claimedContextRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) {
segmentsBySpeaker := make(map[string][]model.Segment)
refsBySpeaker := make(map[string][]string)
contextRefs := make([]string, 0)
groupRefs := make(map[string]struct{}, len(group.Segments))
groupSpeakers := make(map[string]struct{})
for _, ref := range group.Segments {
@@ -154,6 +160,9 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
if _, exists := overlapRefs[ref]; exists {
continue
}
if _, exists := claimedContextRefs[ref]; exists {
continue
}
if _, exists := groupSpeakers[segment.Speaker]; !exists {
continue
}
@@ -166,6 +175,9 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
}
segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment)
refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref)
if _, exists := groupRefs[ref]; !exists {
contextRefs = append(contextRefs, ref)
}
}
speakers := groupSpeakerOrder(group, segmentsBySpeaker)
@@ -187,6 +199,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
resolved.replacements = append(resolved.replacements, replacementSegment(group.ID, speakerIndex+1, runIndex+1, speaker, run))
}
}
resolved.contextRefs = uniqueStrings(contextRefs)
resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow)
return resolved, nil
@@ -466,3 +479,16 @@ func uniqueSortedStrings(values []string) []string {
sort.Strings(unique)
return unique
}
func uniqueStrings(values []string) []string {
seen := make(map[string]struct{}, len(values))
unique := make([]string, 0, len(values))
for _, value := range values {
if _, exists := seen[value]; exists {
continue
}
seen[value] = struct{}{}
unique = append(unique, value)
}
return unique
}