472 lines
13 KiB
Go
472 lines
13 KiB
Go
package overlap
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
// ResolutionSummary records deterministic counters for a resolve-overlaps pass.
|
|
type ResolutionSummary struct {
|
|
GroupsProcessed int
|
|
GroupsChanged int
|
|
OriginalsRemoved int
|
|
ReplacementsCreated int
|
|
}
|
|
|
|
// Resolve replaces detected overlap-group segments with word-run segments when
|
|
// word-level timing is available.
|
|
func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (model.MergedTranscript, ResolutionSummary, error) {
|
|
summary := ResolutionSummary{
|
|
GroupsProcessed: len(in.OverlapGroups),
|
|
}
|
|
if len(in.OverlapGroups) == 0 {
|
|
return in, summary, nil
|
|
}
|
|
|
|
refToIndex := make(map[string]int, len(in.Segments))
|
|
for index, segment := range in.Segments {
|
|
refToIndex[SegmentRef(segment)] = index
|
|
}
|
|
overlapRefs := make(map[string]struct{})
|
|
for _, group := range in.OverlapGroups {
|
|
for _, ref := range group.Segments {
|
|
overlapRefs[ref] = struct{}{}
|
|
}
|
|
}
|
|
|
|
removeRefs := make(map[string]struct{})
|
|
clearAnnotationRefs := make(map[string]struct{})
|
|
removeGroupIDs := make(map[int]struct{})
|
|
replacements := make([]model.Segment, 0)
|
|
replacementOrder := make(map[string]replacementOrder)
|
|
|
|
for _, group := range in.OverlapGroups {
|
|
resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, wordRunGap, wordRunReorderWindow, contextWindow)
|
|
if err != nil {
|
|
return model.MergedTranscript{}, ResolutionSummary{}, err
|
|
}
|
|
if len(resolved.replacements) == 0 {
|
|
continue
|
|
}
|
|
|
|
summary.GroupsChanged++
|
|
removeGroupIDs[group.ID] = struct{}{}
|
|
replacements = append(replacements, resolved.replacements...)
|
|
for sourceRef, order := range resolved.replacementOrder {
|
|
replacementOrder[sourceRef] = order
|
|
}
|
|
|
|
for _, ref := range group.Segments {
|
|
clearAnnotationRefs[ref] = struct{}{}
|
|
}
|
|
for _, ref := range resolved.removeRefs {
|
|
if _, exists := removeRefs[ref]; !exists {
|
|
summary.OriginalsRemoved++
|
|
}
|
|
removeRefs[ref] = struct{}{}
|
|
}
|
|
summary.ReplacementsCreated += len(resolved.replacements)
|
|
}
|
|
|
|
if summary.GroupsChanged == 0 {
|
|
return in, summary, nil
|
|
}
|
|
|
|
segments := make([]model.Segment, 0, len(in.Segments)-len(removeRefs)+len(replacements))
|
|
for _, segment := range in.Segments {
|
|
ref := SegmentRef(segment)
|
|
if _, remove := removeRefs[ref]; remove {
|
|
continue
|
|
}
|
|
if _, clear := clearAnnotationRefs[ref]; clear {
|
|
segment.OverlapGroupID = 0
|
|
}
|
|
segments = append(segments, segment)
|
|
}
|
|
segments = append(segments, replacements...)
|
|
sort.SliceStable(segments, func(i, j int) bool {
|
|
return resolvedSegmentLess(segments[i], segments[j], replacementOrder)
|
|
})
|
|
|
|
overlapGroups := make([]model.OverlapGroup, 0, len(in.OverlapGroups)-len(removeGroupIDs))
|
|
for _, group := range in.OverlapGroups {
|
|
if _, remove := removeGroupIDs[group.ID]; remove {
|
|
continue
|
|
}
|
|
overlapGroups = append(overlapGroups, group)
|
|
}
|
|
|
|
return model.MergedTranscript{
|
|
Segments: segments,
|
|
OverlapGroups: overlapGroups,
|
|
}, summary, nil
|
|
}
|
|
|
|
type resolvedGroup struct {
|
|
removeRefs []string
|
|
replacements []model.Segment
|
|
replacementOrder map[string]replacementOrder
|
|
}
|
|
|
|
type replacementOrder struct {
|
|
cluster string
|
|
rank int
|
|
anchor float64
|
|
}
|
|
|
|
type resolutionWord struct {
|
|
word model.Word
|
|
source string
|
|
ref string
|
|
sequence int
|
|
}
|
|
|
|
type wordRun struct {
|
|
timedWords []resolutionWord
|
|
untimedWords []resolutionWord
|
|
start float64
|
|
end float64
|
|
}
|
|
|
|
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) {
|
|
segmentsBySpeaker := make(map[string][]model.Segment)
|
|
refsBySpeaker := make(map[string][]string)
|
|
groupRefs := make(map[string]struct{}, len(group.Segments))
|
|
groupSpeakers := make(map[string]struct{})
|
|
for _, ref := range group.Segments {
|
|
index, exists := refToIndex[ref]
|
|
if !exists {
|
|
return resolvedGroup{}, fmt.Errorf("overlap group %d references missing segment %q", group.ID, ref)
|
|
}
|
|
groupRefs[ref] = struct{}{}
|
|
segment := in.Segments[index]
|
|
groupSpeakers[segment.Speaker] = struct{}{}
|
|
}
|
|
|
|
expandedStart := group.Start - contextWindow
|
|
expandedEnd := group.End + contextWindow
|
|
for _, segment := range in.Segments {
|
|
ref := SegmentRef(segment)
|
|
if _, exists := groupRefs[ref]; !exists {
|
|
if _, exists := overlapRefs[ref]; exists {
|
|
continue
|
|
}
|
|
if _, exists := groupSpeakers[segment.Speaker]; !exists {
|
|
continue
|
|
}
|
|
if !intervalIntersects(segment.Start, segment.End, expandedStart, expandedEnd) {
|
|
continue
|
|
}
|
|
if !segmentNearGroupBoundary(segment, group, contextWindow) {
|
|
continue
|
|
}
|
|
}
|
|
segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment)
|
|
refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref)
|
|
}
|
|
|
|
speakers := groupSpeakerOrder(group, segmentsBySpeaker)
|
|
resolved := resolvedGroup{}
|
|
for speakerIndex, speaker := range speakers {
|
|
timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], expandedStart, expandedEnd)
|
|
if len(timedWords) == 0 {
|
|
continue
|
|
}
|
|
|
|
runs := buildWordRuns(timedWords, wordRunGap)
|
|
if len(runs) == 0 {
|
|
continue
|
|
}
|
|
attachUntimedWords(runs, untimedWords)
|
|
|
|
resolved.removeRefs = append(resolved.removeRefs, refsBySpeaker[speaker]...)
|
|
for runIndex, run := range runs {
|
|
resolved.replacements = append(resolved.replacements, replacementSegment(group.ID, speakerIndex+1, runIndex+1, speaker, run))
|
|
}
|
|
}
|
|
|
|
resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow)
|
|
return resolved, nil
|
|
}
|
|
|
|
func intervalIntersects(start float64, end float64, windowStart float64, windowEnd float64) bool {
|
|
return end > windowStart && start < windowEnd
|
|
}
|
|
|
|
func segmentNearGroupBoundary(segment model.Segment, group model.OverlapGroup, window float64) bool {
|
|
return withinWindow(segment.Start, group.Start, window) ||
|
|
withinWindow(segment.End, group.Start, window) ||
|
|
withinWindow(segment.Start, group.End, window) ||
|
|
withinWindow(segment.End, group.End, window)
|
|
}
|
|
|
|
func withinWindow(value float64, boundary float64, window float64) bool {
|
|
diff := value - boundary
|
|
if diff < 0 {
|
|
diff = -diff
|
|
}
|
|
return diff <= window
|
|
}
|
|
|
|
func reorderReplacementSegments(groupID int, replacements []model.Segment, wordRunReorderWindow float64) ([]model.Segment, map[string]replacementOrder) {
|
|
if len(replacements) == 0 {
|
|
return replacements, nil
|
|
}
|
|
|
|
ordered := append([]model.Segment(nil), replacements...)
|
|
sort.SliceStable(ordered, func(i, j int) bool {
|
|
return model.SegmentLess(ordered[i], ordered[j])
|
|
})
|
|
|
|
ranks := make(map[string]replacementOrder, len(ordered))
|
|
clusterStart := 0
|
|
clusterIndex := 1
|
|
for clusterStart < len(ordered) {
|
|
clusterEnd := clusterStart + 1
|
|
for clusterEnd < len(ordered) && ordered[clusterEnd].Start-ordered[clusterEnd-1].Start <= wordRunReorderWindow {
|
|
clusterEnd++
|
|
}
|
|
|
|
cluster := ordered[clusterStart:clusterEnd]
|
|
anchor := cluster[0].Start
|
|
sort.SliceStable(cluster, func(i, j int) bool {
|
|
leftDuration := cluster[i].End - cluster[i].Start
|
|
rightDuration := cluster[j].End - cluster[j].Start
|
|
if leftDuration != rightDuration {
|
|
return leftDuration < rightDuration
|
|
}
|
|
return model.SegmentLess(cluster[i], cluster[j])
|
|
})
|
|
|
|
clusterKey := fmt.Sprintf("%d:%d", groupID, clusterIndex)
|
|
for index := range cluster {
|
|
ranks[cluster[index].SourceRef] = replacementOrder{
|
|
cluster: clusterKey,
|
|
rank: index,
|
|
anchor: anchor,
|
|
}
|
|
}
|
|
|
|
clusterStart = clusterEnd
|
|
clusterIndex++
|
|
}
|
|
|
|
return ordered, ranks
|
|
}
|
|
|
|
func resolvedSegmentLess(left model.Segment, right model.Segment, replacementOrder map[string]replacementOrder) bool {
|
|
leftOrder, leftHasOrder := replacementOrder[left.SourceRef]
|
|
rightOrder, rightHasOrder := replacementOrder[right.SourceRef]
|
|
if leftHasOrder && rightHasOrder && leftOrder.cluster == rightOrder.cluster && leftOrder.rank != rightOrder.rank {
|
|
return leftOrder.rank < rightOrder.rank
|
|
}
|
|
leftStart := left.Start
|
|
if leftHasOrder {
|
|
leftStart = leftOrder.anchor
|
|
}
|
|
rightStart := right.Start
|
|
if rightHasOrder {
|
|
rightStart = rightOrder.anchor
|
|
}
|
|
if leftStart != rightStart {
|
|
return leftStart < rightStart
|
|
}
|
|
return model.SegmentLess(left, right)
|
|
}
|
|
|
|
func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][]model.Segment) []string {
|
|
seen := make(map[string]struct{}, len(group.Speakers))
|
|
speakers := make([]string, 0, len(group.Speakers))
|
|
for _, speaker := range group.Speakers {
|
|
if _, exists := segmentsBySpeaker[speaker]; !exists {
|
|
continue
|
|
}
|
|
if _, exists := seen[speaker]; exists {
|
|
continue
|
|
}
|
|
seen[speaker] = struct{}{}
|
|
speakers = append(speakers, speaker)
|
|
}
|
|
|
|
extra := make([]string, 0)
|
|
for speaker := range segmentsBySpeaker {
|
|
if _, exists := seen[speaker]; exists {
|
|
continue
|
|
}
|
|
extra = append(extra, speaker)
|
|
}
|
|
sort.Strings(extra)
|
|
speakers = append(speakers, extra...)
|
|
return speakers
|
|
}
|
|
|
|
func gatherResolutionWords(segments []model.Segment, groupStart float64, groupEnd float64) ([]resolutionWord, []resolutionWord) {
|
|
timedWords := make([]resolutionWord, 0)
|
|
untimedWords := make([]resolutionWord, 0)
|
|
sequence := 0
|
|
for _, segment := range segments {
|
|
ref := SegmentRef(segment)
|
|
for _, word := range segment.Words {
|
|
candidate := resolutionWord{
|
|
word: word,
|
|
source: segment.Source,
|
|
ref: ref,
|
|
sequence: sequence,
|
|
}
|
|
sequence++
|
|
if !word.Timed {
|
|
untimedWords = append(untimedWords, candidate)
|
|
continue
|
|
}
|
|
if word.End <= groupStart || word.Start >= groupEnd {
|
|
continue
|
|
}
|
|
timedWords = append(timedWords, candidate)
|
|
}
|
|
}
|
|
|
|
sort.SliceStable(timedWords, func(i, j int) bool {
|
|
left := timedWords[i].word
|
|
right := timedWords[j].word
|
|
if left.Start != right.Start {
|
|
return left.Start < right.Start
|
|
}
|
|
if left.End != right.End {
|
|
return left.End < right.End
|
|
}
|
|
return left.Text < right.Text
|
|
})
|
|
return timedWords, untimedWords
|
|
}
|
|
|
|
func buildWordRuns(words []resolutionWord, wordRunGap float64) []wordRun {
|
|
if len(words) == 0 {
|
|
return nil
|
|
}
|
|
|
|
runs := make([]wordRun, 0)
|
|
current := newWordRun(words[0])
|
|
previousEnd := words[0].word.End
|
|
for _, word := range words[1:] {
|
|
if word.word.Start-previousEnd <= wordRunGap {
|
|
current.add(word)
|
|
} else {
|
|
runs = append(runs, current.finish())
|
|
current = newWordRun(word)
|
|
}
|
|
previousEnd = word.word.End
|
|
}
|
|
runs = append(runs, current.finish())
|
|
return runs
|
|
}
|
|
|
|
func newWordRun(word resolutionWord) wordRun {
|
|
return wordRun{
|
|
timedWords: []resolutionWord{word},
|
|
start: word.word.Start,
|
|
end: word.word.End,
|
|
}
|
|
}
|
|
|
|
func (r *wordRun) add(word resolutionWord) {
|
|
r.timedWords = append(r.timedWords, word)
|
|
if word.word.Start < r.start {
|
|
r.start = word.word.Start
|
|
}
|
|
if word.word.End > r.end {
|
|
r.end = word.word.End
|
|
}
|
|
}
|
|
|
|
func (r wordRun) finish() wordRun {
|
|
return r
|
|
}
|
|
|
|
func attachUntimedWords(runs []wordRun, untimedWords []resolutionWord) {
|
|
if len(runs) == 0 || len(untimedWords) == 0 {
|
|
return
|
|
}
|
|
|
|
for _, word := range untimedWords {
|
|
target := 0
|
|
for index, run := range runs {
|
|
if word.sequence < run.firstSequence() {
|
|
if index == 0 {
|
|
target = 0
|
|
} else {
|
|
target = index - 1
|
|
}
|
|
break
|
|
}
|
|
target = index
|
|
}
|
|
runs[target].untimedWords = append(runs[target].untimedWords, word)
|
|
}
|
|
}
|
|
|
|
func (r wordRun) firstSequence() int {
|
|
first := r.timedWords[0].sequence
|
|
for _, word := range r.timedWords[1:] {
|
|
if word.sequence < first {
|
|
first = word.sequence
|
|
}
|
|
}
|
|
return first
|
|
}
|
|
|
|
func (r wordRun) allWordsInTextOrder() []resolutionWord {
|
|
words := make([]resolutionWord, 0, len(r.timedWords)+len(r.untimedWords))
|
|
words = append(words, r.timedWords...)
|
|
words = append(words, r.untimedWords...)
|
|
sort.SliceStable(words, func(i, j int) bool {
|
|
return words[i].sequence < words[j].sequence
|
|
})
|
|
return words
|
|
}
|
|
|
|
func replacementSegment(groupID int, speakerIndex int, runIndex int, speaker string, run wordRun) model.Segment {
|
|
orderedWords := run.allWordsInTextOrder()
|
|
words := make([]model.Word, 0, len(orderedWords))
|
|
text := make([]string, 0, len(orderedWords))
|
|
refs := make([]string, 0, len(orderedWords))
|
|
source := ""
|
|
for _, word := range orderedWords {
|
|
words = append(words, word.word)
|
|
text = append(text, word.word.Text)
|
|
refs = append(refs, word.ref)
|
|
if source == "" {
|
|
source = word.source
|
|
} else if source != word.source {
|
|
source = "derived"
|
|
}
|
|
}
|
|
|
|
return model.Segment{
|
|
Source: source,
|
|
SourceRef: fmt.Sprintf("word-run:%d:%d:%d", groupID, speakerIndex, runIndex),
|
|
DerivedFrom: uniqueSortedStrings(refs),
|
|
Speaker: speaker,
|
|
Start: run.start,
|
|
End: run.end,
|
|
Text: strings.Join(text, " "),
|
|
Words: words,
|
|
}
|
|
}
|
|
|
|
func uniqueSortedStrings(values []string) []string {
|
|
seen := make(map[string]struct{}, len(values))
|
|
unique := make([]string, 0, len(values))
|
|
for _, value := range values {
|
|
if _, exists := seen[value]; exists {
|
|
continue
|
|
}
|
|
seen[value] = struct{}{}
|
|
unique = append(unique, value)
|
|
}
|
|
sort.Strings(unique)
|
|
return unique
|
|
}
|