Fixed a text duplication bug in the resolve-overlaps module
This commit is contained in:
@@ -36,6 +36,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
|
||||
overlapRefs[ref] = struct{}{}
|
||||
}
|
||||
}
|
||||
claimedContextRefs := make(map[string]struct{})
|
||||
|
||||
removeRefs := make(map[string]struct{})
|
||||
clearAnnotationRefs := make(map[string]struct{})
|
||||
@@ -44,7 +45,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
|
||||
replacementOrder := make(map[string]replacementOrder)
|
||||
|
||||
for _, group := range in.OverlapGroups {
|
||||
resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, wordRunGap, wordRunReorderWindow, contextWindow)
|
||||
resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, claimedContextRefs, wordRunGap, wordRunReorderWindow, contextWindow)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, ResolutionSummary{}, err
|
||||
}
|
||||
@@ -58,6 +59,9 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
|
||||
for sourceRef, order := range resolved.replacementOrder {
|
||||
replacementOrder[sourceRef] = order
|
||||
}
|
||||
for _, ref := range resolved.contextRefs {
|
||||
claimedContextRefs[ref] = struct{}{}
|
||||
}
|
||||
|
||||
for _, ref := range group.Segments {
|
||||
clearAnnotationRefs[ref] = struct{}{}
|
||||
@@ -107,6 +111,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow
|
||||
|
||||
type resolvedGroup struct {
|
||||
removeRefs []string
|
||||
contextRefs []string
|
||||
replacements []model.Segment
|
||||
replacementOrder map[string]replacementOrder
|
||||
}
|
||||
@@ -131,9 +136,10 @@ type wordRun struct {
|
||||
end float64
|
||||
}
|
||||
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) {
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, claimedContextRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) {
|
||||
segmentsBySpeaker := make(map[string][]model.Segment)
|
||||
refsBySpeaker := make(map[string][]string)
|
||||
contextRefs := make([]string, 0)
|
||||
groupRefs := make(map[string]struct{}, len(group.Segments))
|
||||
groupSpeakers := make(map[string]struct{})
|
||||
for _, ref := range group.Segments {
|
||||
@@ -154,6 +160,9 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
|
||||
if _, exists := overlapRefs[ref]; exists {
|
||||
continue
|
||||
}
|
||||
if _, exists := claimedContextRefs[ref]; exists {
|
||||
continue
|
||||
}
|
||||
if _, exists := groupSpeakers[segment.Speaker]; !exists {
|
||||
continue
|
||||
}
|
||||
@@ -166,6 +175,9 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
|
||||
}
|
||||
segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment)
|
||||
refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref)
|
||||
if _, exists := groupRefs[ref]; !exists {
|
||||
contextRefs = append(contextRefs, ref)
|
||||
}
|
||||
}
|
||||
|
||||
speakers := groupSpeakerOrder(group, segmentsBySpeaker)
|
||||
@@ -187,6 +199,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
|
||||
resolved.replacements = append(resolved.replacements, replacementSegment(group.ID, speakerIndex+1, runIndex+1, speaker, run))
|
||||
}
|
||||
}
|
||||
resolved.contextRefs = uniqueStrings(contextRefs)
|
||||
|
||||
resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow)
|
||||
return resolved, nil
|
||||
@@ -466,3 +479,16 @@ func uniqueSortedStrings(values []string) []string {
|
||||
sort.Strings(unique)
|
||||
return unique
|
||||
}
|
||||
|
||||
func uniqueStrings(values []string) []string {
|
||||
seen := make(map[string]struct{}, len(values))
|
||||
unique := make([]string, 0, len(values))
|
||||
for _, value := range values {
|
||||
if _, exists := seen[value]; exists {
|
||||
continue
|
||||
}
|
||||
seen[value] = struct{}{}
|
||||
unique = append(unique, value)
|
||||
}
|
||||
return unique
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user