Minor updates to overlap detection and segment coalescing logic

This commit is contained in:
2026-04-28 14:11:38 -05:00
parent 28c2eea340
commit a3ca6665a9
14 changed files with 662 additions and 95 deletions

View File

@@ -3,6 +3,7 @@ package backchannel
import (
"regexp"
"strings"
"unicode"
"gitea.maximumdirect.net/eric/seriatim/internal/model"
)
@@ -10,16 +11,16 @@ import (
const Category = "backchannel"
var patterns = []*regexp.Regexp{
regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|uh[- ]huh|mhm|mm-hmm)\.?$`),
regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|mm+\s+hmm|uh[- ]huh|mhm|mm-hmm)\.?$`),
regexp.MustCompile(`(?i)^(yeah|yep|right|okay|ok)([,.\s]+(yeah|yep|right|okay|ok))*\.?$`),
regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good)\.?$`),
regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good|there you go)\.?$`),
}
// Apply tags matching short acknowledgement segments.
func Apply(in model.MergedTranscript) (model.MergedTranscript, int) {
func Apply(in model.MergedTranscript, maxDuration float64) (model.MergedTranscript, int) {
tagged := 0
for index := range in.Segments {
if !matches(in.Segments[index]) {
if !matches(in.Segments[index], maxDuration) {
continue
}
if hasCategory(in.Segments[index], Category) {
@@ -31,15 +32,15 @@ func Apply(in model.MergedTranscript) (model.MergedTranscript, int) {
return in, tagged
}
func matches(segment model.Segment) bool {
text := strings.TrimSpace(segment.Text)
func matches(segment model.Segment, maxDuration float64) bool {
text := normalizeForMatching(segment.Text)
if text == "" {
return false
}
if len(strings.Fields(text)) > 3 {
return false
}
if segment.End-segment.Start > 1.0 {
if segment.End-segment.Start > maxDuration {
return false
}
for _, pattern := range patterns {
@@ -50,6 +51,16 @@ func matches(segment model.Segment) bool {
return false
}
func normalizeForMatching(text string) string {
text = strings.Map(func(r rune) rune {
if unicode.IsPunct(r) {
return ' '
}
return r
}, text)
return strings.Join(strings.Fields(text), " ")
}
func hasCategory(segment model.Segment, category string) bool {
for _, existing := range segment.Categories {
if existing == category {