72 lines
1.7 KiB
Go
72 lines
1.7 KiB
Go
package backchannel
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
const Category = "backchannel"
|
|
|
|
var patterns = []*regexp.Regexp{
|
|
regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|mm+\s+hmm|uh[- ]huh|mhm|mm-hmm)\.?$`),
|
|
regexp.MustCompile(`(?i)^(yeah|yep|right|okay|ok)([,.\s]+(yeah|yep|right|okay|ok))*\.?$`),
|
|
regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good|there you go)\.?$`),
|
|
}
|
|
|
|
// Apply tags matching short acknowledgement segments.
|
|
func Apply(in model.MergedTranscript, maxDuration float64) (model.MergedTranscript, int) {
|
|
tagged := 0
|
|
for index := range in.Segments {
|
|
if !matches(in.Segments[index], maxDuration) {
|
|
continue
|
|
}
|
|
if hasCategory(in.Segments[index], Category) {
|
|
continue
|
|
}
|
|
in.Segments[index].Categories = append(in.Segments[index].Categories, Category)
|
|
tagged++
|
|
}
|
|
return in, tagged
|
|
}
|
|
|
|
func matches(segment model.Segment, maxDuration float64) bool {
|
|
text := normalizeForMatching(segment.Text)
|
|
if text == "" {
|
|
return false
|
|
}
|
|
if len(strings.Fields(text)) > 3 {
|
|
return false
|
|
}
|
|
if segment.End-segment.Start > maxDuration {
|
|
return false
|
|
}
|
|
for _, pattern := range patterns {
|
|
if pattern.MatchString(text) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func normalizeForMatching(text string) string {
|
|
text = strings.Map(func(r rune) rune {
|
|
if unicode.IsPunct(r) {
|
|
return ' '
|
|
}
|
|
return r
|
|
}, text)
|
|
return strings.Join(strings.Fields(text), " ")
|
|
}
|
|
|
|
func hasCategory(segment model.Segment, category string) bool {
|
|
for _, existing := range segment.Categories {
|
|
if existing == category {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|