package backchannel import ( "regexp" "strings" "unicode" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) const Category = "backchannel" var patterns = []*regexp.Regexp{ regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|mm+\s+hmm|uh[- ]huh|mhm|mm-hmm)\.?$`), regexp.MustCompile(`(?i)^(yeah|yep|right|okay|ok)([,.\s]+(yeah|yep|right|okay|ok))*\.?$`), regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good|there you go)\.?$`), } // Apply tags matching short acknowledgement segments. func Apply(in model.MergedTranscript, maxDuration float64) (model.MergedTranscript, int) { tagged := 0 for index := range in.Segments { if !matches(in.Segments[index], maxDuration) { continue } if hasCategory(in.Segments[index], Category) { continue } in.Segments[index].Categories = append(in.Segments[index].Categories, Category) tagged++ } return in, tagged } func matches(segment model.Segment, maxDuration float64) bool { text := normalizeForMatching(segment.Text) if text == "" { return false } if len(strings.Fields(text)) > 3 { return false } if segment.End-segment.Start > maxDuration { return false } for _, pattern := range patterns { if pattern.MatchString(text) { return true } } return false } func normalizeForMatching(text string) string { text = strings.Map(func(r rune) rune { if unicode.IsPunct(r) { return ' ' } return r }, text) return strings.Join(strings.Fields(text), " ") } func hasCategory(segment model.Segment, category string) bool { for _, existing := range segment.Categories { if existing == category { return true } } return false }