139 lines
3.2 KiB
Go
139 lines
3.2 KiB
Go
package overlap
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
const (
|
|
defaultClass = "unknown"
|
|
defaultResolution = "unresolved"
|
|
)
|
|
|
|
// Detect annotates overlapping segment groups in an already sorted merged transcript.
|
|
func Detect(in model.MergedTranscript) model.MergedTranscript {
|
|
clearExisting(&in)
|
|
if len(in.Segments) < 2 {
|
|
return in
|
|
}
|
|
|
|
order := sortedSegmentIndices(in.Segments)
|
|
var groupID int
|
|
var candidate overlapCandidate
|
|
for _, index := range order {
|
|
segment := in.Segments[index]
|
|
if !candidate.active {
|
|
candidate = newCandidate(index, segment)
|
|
continue
|
|
}
|
|
|
|
if segment.Start < candidate.end {
|
|
candidate.add(index, segment)
|
|
continue
|
|
}
|
|
|
|
groupID = finalizeCandidate(&in, candidate, groupID)
|
|
candidate = newCandidate(index, segment)
|
|
}
|
|
|
|
finalizeCandidate(&in, candidate, groupID)
|
|
return in
|
|
}
|
|
|
|
func sortedSegmentIndices(segments []model.Segment) []int {
|
|
indices := make([]int, len(segments))
|
|
for index := range segments {
|
|
indices[index] = index
|
|
}
|
|
sort.SliceStable(indices, func(i, j int) bool {
|
|
return model.SegmentLess(segments[indices[i]], segments[indices[j]])
|
|
})
|
|
return indices
|
|
}
|
|
|
|
type overlapCandidate struct {
|
|
active bool
|
|
indices []int
|
|
start float64
|
|
end float64
|
|
}
|
|
|
|
func newCandidate(index int, segment model.Segment) overlapCandidate {
|
|
return overlapCandidate{
|
|
active: true,
|
|
indices: []int{index},
|
|
start: segment.Start,
|
|
end: segment.End,
|
|
}
|
|
}
|
|
|
|
func (c *overlapCandidate) add(index int, segment model.Segment) {
|
|
c.indices = append(c.indices, index)
|
|
if segment.End > c.end {
|
|
c.end = segment.End
|
|
}
|
|
}
|
|
|
|
func finalizeCandidate(in *model.MergedTranscript, candidate overlapCandidate, currentGroupID int) int {
|
|
if !candidate.active || len(candidate.indices) < 2 {
|
|
return currentGroupID
|
|
}
|
|
|
|
speakers := distinctSpeakers(in.Segments, candidate.indices)
|
|
if len(speakers) < 2 {
|
|
return currentGroupID
|
|
}
|
|
|
|
groupID := currentGroupID + 1
|
|
refs := make([]string, 0, len(candidate.indices))
|
|
for _, index := range candidate.indices {
|
|
in.Segments[index].OverlapGroupID = groupID
|
|
refs = append(refs, SegmentRef(in.Segments[index]))
|
|
}
|
|
|
|
in.OverlapGroups = append(in.OverlapGroups, model.OverlapGroup{
|
|
ID: groupID,
|
|
Start: candidate.start,
|
|
End: candidate.end,
|
|
Segments: refs,
|
|
Speakers: speakers,
|
|
Class: defaultClass,
|
|
Resolution: defaultResolution,
|
|
})
|
|
return groupID
|
|
}
|
|
|
|
func distinctSpeakers(segments []model.Segment, indices []int) []string {
|
|
seen := make(map[string]struct{}, len(indices))
|
|
speakers := make([]string, 0, len(indices))
|
|
for _, index := range indices {
|
|
speaker := segments[index].Speaker
|
|
if _, exists := seen[speaker]; exists {
|
|
continue
|
|
}
|
|
seen[speaker] = struct{}{}
|
|
speakers = append(speakers, speaker)
|
|
}
|
|
return speakers
|
|
}
|
|
|
|
// SegmentRef returns the stable overlap reference for a segment.
|
|
func SegmentRef(segment model.Segment) string {
|
|
if segment.SourceSegmentIndex != nil {
|
|
return fmt.Sprintf("%s#%d", segment.Source, *segment.SourceSegmentIndex)
|
|
}
|
|
if segment.SourceRef != "" {
|
|
return segment.SourceRef
|
|
}
|
|
return segment.Source
|
|
}
|
|
|
|
func clearExisting(in *model.MergedTranscript) {
|
|
in.OverlapGroups = make([]model.OverlapGroup, 0)
|
|
for index := range in.Segments {
|
|
in.Segments[index].OverlapGroupID = 0
|
|
}
|
|
}
|