package overlap import ( "fmt" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) const ( defaultClass = "unknown" defaultResolution = "unresolved" ) // Detect annotates overlapping segment groups in an already sorted merged transcript. func Detect(in model.MergedTranscript) model.MergedTranscript { clearExisting(&in) if len(in.Segments) < 2 { return in } var groupID int var candidate overlapCandidate for index := range in.Segments { segment := in.Segments[index] if !candidate.active { candidate = newCandidate(index, segment) continue } if segment.Start < candidate.end { candidate.add(index, segment) continue } groupID = finalizeCandidate(&in, candidate, groupID) candidate = newCandidate(index, segment) } finalizeCandidate(&in, candidate, groupID) return in } type overlapCandidate struct { active bool indices []int start float64 end float64 } func newCandidate(index int, segment model.Segment) overlapCandidate { return overlapCandidate{ active: true, indices: []int{index}, start: segment.Start, end: segment.End, } } func (c *overlapCandidate) add(index int, segment model.Segment) { c.indices = append(c.indices, index) if segment.End > c.end { c.end = segment.End } } func finalizeCandidate(in *model.MergedTranscript, candidate overlapCandidate, currentGroupID int) int { if !candidate.active || len(candidate.indices) < 2 { return currentGroupID } speakers := distinctSpeakers(in.Segments, candidate.indices) if len(speakers) < 2 { return currentGroupID } groupID := currentGroupID + 1 refs := make([]string, 0, len(candidate.indices)) for _, index := range candidate.indices { in.Segments[index].OverlapGroupID = groupID refs = append(refs, segmentRef(in.Segments[index])) } in.OverlapGroups = append(in.OverlapGroups, model.OverlapGroup{ ID: groupID, Start: candidate.start, End: candidate.end, Segments: refs, Speakers: speakers, Class: defaultClass, Resolution: defaultResolution, }) return groupID } func distinctSpeakers(segments []model.Segment, indices []int) []string { seen := make(map[string]struct{}, len(indices)) speakers := make([]string, 0, len(indices)) for _, index := range indices { speaker := segments[index].Speaker if _, exists := seen[speaker]; exists { continue } seen[speaker] = struct{}{} speakers = append(speakers, speaker) } return speakers } func segmentRef(segment model.Segment) string { return fmt.Sprintf("%s#%d", segment.Source, segment.SourceSegmentIndex) } func clearExisting(in *model.MergedTranscript) { in.OverlapGroups = make([]model.OverlapGroup, 0) for index := range in.Segments { in.Segments[index].OverlapGroupID = 0 } }