Added a module to coalesce adjacent same-speaker segments
This commit is contained in:
@@ -18,7 +18,7 @@ type ResolutionSummary struct {
|
||||
|
||||
// Resolve replaces detected overlap-group segments with word-run segments when
|
||||
// word-level timing is available.
|
||||
func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscript, ResolutionSummary, error) {
|
||||
func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64) (model.MergedTranscript, ResolutionSummary, error) {
|
||||
summary := ResolutionSummary{
|
||||
GroupsProcessed: len(in.OverlapGroups),
|
||||
}
|
||||
@@ -35,9 +35,10 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
clearAnnotationRefs := make(map[string]struct{})
|
||||
removeGroupIDs := make(map[int]struct{})
|
||||
replacements := make([]model.Segment, 0)
|
||||
replacementOrder := make(map[string]replacementOrder)
|
||||
|
||||
for _, group := range in.OverlapGroups {
|
||||
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap)
|
||||
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap, wordRunReorderWindow)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, ResolutionSummary{}, err
|
||||
}
|
||||
@@ -48,6 +49,9 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
summary.GroupsChanged++
|
||||
removeGroupIDs[group.ID] = struct{}{}
|
||||
replacements = append(replacements, resolved.replacements...)
|
||||
for sourceRef, order := range resolved.replacementOrder {
|
||||
replacementOrder[sourceRef] = order
|
||||
}
|
||||
|
||||
for _, ref := range group.Segments {
|
||||
clearAnnotationRefs[ref] = struct{}{}
|
||||
@@ -78,7 +82,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
}
|
||||
segments = append(segments, replacements...)
|
||||
sort.SliceStable(segments, func(i, j int) bool {
|
||||
return model.SegmentLess(segments[i], segments[j])
|
||||
return resolvedSegmentLess(segments[i], segments[j], replacementOrder)
|
||||
})
|
||||
|
||||
overlapGroups := make([]model.OverlapGroup, 0, len(in.OverlapGroups)-len(removeGroupIDs))
|
||||
@@ -96,8 +100,15 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
}
|
||||
|
||||
type resolvedGroup struct {
|
||||
removeRefs []string
|
||||
replacements []model.Segment
|
||||
removeRefs []string
|
||||
replacements []model.Segment
|
||||
replacementOrder map[string]replacementOrder
|
||||
}
|
||||
|
||||
type replacementOrder struct {
|
||||
cluster string
|
||||
rank int
|
||||
anchor float64
|
||||
}
|
||||
|
||||
type resolutionWord struct {
|
||||
@@ -114,7 +125,7 @@ type wordRun struct {
|
||||
end float64
|
||||
}
|
||||
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64) (resolvedGroup, error) {
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64, wordRunReorderWindow float64) (resolvedGroup, error) {
|
||||
segmentsBySpeaker := make(map[string][]model.Segment)
|
||||
refsBySpeaker := make(map[string][]string)
|
||||
for _, ref := range group.Segments {
|
||||
@@ -147,9 +158,76 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
|
||||
}
|
||||
}
|
||||
|
||||
resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow)
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
func reorderReplacementSegments(groupID int, replacements []model.Segment, wordRunReorderWindow float64) ([]model.Segment, map[string]replacementOrder) {
|
||||
if len(replacements) == 0 {
|
||||
return replacements, nil
|
||||
}
|
||||
|
||||
ordered := append([]model.Segment(nil), replacements...)
|
||||
sort.SliceStable(ordered, func(i, j int) bool {
|
||||
return model.SegmentLess(ordered[i], ordered[j])
|
||||
})
|
||||
|
||||
ranks := make(map[string]replacementOrder, len(ordered))
|
||||
clusterStart := 0
|
||||
clusterIndex := 1
|
||||
for clusterStart < len(ordered) {
|
||||
clusterEnd := clusterStart + 1
|
||||
for clusterEnd < len(ordered) && ordered[clusterEnd].Start-ordered[clusterEnd-1].Start <= wordRunReorderWindow {
|
||||
clusterEnd++
|
||||
}
|
||||
|
||||
cluster := ordered[clusterStart:clusterEnd]
|
||||
anchor := cluster[0].Start
|
||||
sort.SliceStable(cluster, func(i, j int) bool {
|
||||
leftDuration := cluster[i].End - cluster[i].Start
|
||||
rightDuration := cluster[j].End - cluster[j].Start
|
||||
if leftDuration != rightDuration {
|
||||
return leftDuration < rightDuration
|
||||
}
|
||||
return model.SegmentLess(cluster[i], cluster[j])
|
||||
})
|
||||
|
||||
clusterKey := fmt.Sprintf("%d:%d", groupID, clusterIndex)
|
||||
for index := range cluster {
|
||||
ranks[cluster[index].SourceRef] = replacementOrder{
|
||||
cluster: clusterKey,
|
||||
rank: index,
|
||||
anchor: anchor,
|
||||
}
|
||||
}
|
||||
|
||||
clusterStart = clusterEnd
|
||||
clusterIndex++
|
||||
}
|
||||
|
||||
return ordered, ranks
|
||||
}
|
||||
|
||||
func resolvedSegmentLess(left model.Segment, right model.Segment, replacementOrder map[string]replacementOrder) bool {
|
||||
leftOrder, leftHasOrder := replacementOrder[left.SourceRef]
|
||||
rightOrder, rightHasOrder := replacementOrder[right.SourceRef]
|
||||
if leftHasOrder && rightHasOrder && leftOrder.cluster == rightOrder.cluster && leftOrder.rank != rightOrder.rank {
|
||||
return leftOrder.rank < rightOrder.rank
|
||||
}
|
||||
leftStart := left.Start
|
||||
if leftHasOrder {
|
||||
leftStart = leftOrder.anchor
|
||||
}
|
||||
rightStart := right.Start
|
||||
if rightHasOrder {
|
||||
rightStart = rightOrder.anchor
|
||||
}
|
||||
if leftStart != rightStart {
|
||||
return leftStart < rightStart
|
||||
}
|
||||
return model.SegmentLess(left, right)
|
||||
}
|
||||
|
||||
func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][]model.Segment) []string {
|
||||
seen := make(map[string]struct{}, len(group.Speakers))
|
||||
speakers := make([]string, 0, len(group.Speakers))
|
||||
|
||||
Reference in New Issue
Block a user