123 lines
3.4 KiB
Go
123 lines
3.4 KiB
Go
package filler
|
|
|
|
import (
|
|
"reflect"
|
|
"testing"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
)
|
|
|
|
func TestApplyTagsVerySafeFillers(t *testing.T) {
|
|
for _, text := range []string{"um", "uhhh", "ER", "ermm", "ah", "eh", "hmmm", "mm", "mmm"} {
|
|
t.Run(text, func(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment(text, 1, 1.5)), 1.0)
|
|
if tagged != 1 {
|
|
t.Fatalf("tagged = %d, want 1", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], []string{Category})
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestApplyTagsRepeatedFillers(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment("um uh hmm", 1, 1.8)), 1.0)
|
|
if tagged != 1 {
|
|
t.Fatalf("tagged = %d, want 1", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], []string{Category})
|
|
}
|
|
|
|
func TestApplyMatchesTrimAwareCaseInsensitive(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment(" UM uh ", 1, 1.5)), 1.0)
|
|
if tagged != 1 {
|
|
t.Fatalf("tagged = %d, want 1", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], []string{Category})
|
|
}
|
|
|
|
func TestApplyIgnoresPunctuationWhenMatching(t *testing.T) {
|
|
for _, text := range []string{"um.", "uh?!", "um, uh... hmm!", "hmm--mm"} {
|
|
t.Run(text, func(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment(text, 1, 1.8)), 1.0)
|
|
if tagged != 1 {
|
|
t.Fatalf("tagged = %d, want 1", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], []string{Category})
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestApplyDoesNotTagNonMatches(t *testing.T) {
|
|
for _, text := range []string{"um okay", "uh-huh", "hmm, okay"} {
|
|
t.Run(text, func(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment(text, 1, 1.5)), 1.0)
|
|
if tagged != 0 {
|
|
t.Fatalf("tagged = %d, want 0", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], nil)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestApplyRejectsWordCountOverThree(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment("um uh er ah", 1, 1.5)), 1.0)
|
|
if tagged != 0 {
|
|
t.Fatalf("tagged = %d, want 0", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], nil)
|
|
}
|
|
|
|
func TestApplyUsesConfiguredMaxDuration(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment("um", 1, 2.2)), 1.25)
|
|
if tagged != 1 {
|
|
t.Fatalf("tagged = %d, want 1", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], []string{Category})
|
|
|
|
got, tagged = Apply(transcript(segment("um", 1, 2.3)), 1.25)
|
|
if tagged != 0 {
|
|
t.Fatalf("tagged = %d, want 0", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], nil)
|
|
}
|
|
|
|
func TestApplyRejectsDurationOverConfiguredMax(t *testing.T) {
|
|
got, tagged := Apply(transcript(segment("um", 1, 2.1)), 1.0)
|
|
if tagged != 0 {
|
|
t.Fatalf("tagged = %d, want 0", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], nil)
|
|
}
|
|
|
|
func TestApplyPreservesExistingCategoriesAndAvoidsDuplicate(t *testing.T) {
|
|
existing := segment("um", 1, 1.2)
|
|
existing.Categories = []string{"manual", Category}
|
|
|
|
got, tagged := Apply(transcript(existing), 1.0)
|
|
if tagged != 0 {
|
|
t.Fatalf("tagged = %d, want 0", tagged)
|
|
}
|
|
assertCategories(t, got.Segments[0], []string{"manual", Category})
|
|
}
|
|
|
|
func transcript(segments ...model.Segment) model.MergedTranscript {
|
|
return model.MergedTranscript{Segments: segments}
|
|
}
|
|
|
|
func segment(text string, start float64, end float64) model.Segment {
|
|
return model.Segment{
|
|
Source: "input.json",
|
|
Speaker: "Alice",
|
|
Start: start,
|
|
End: end,
|
|
Text: text,
|
|
}
|
|
}
|
|
|
|
func assertCategories(t *testing.T, segment model.Segment, want []string) {
|
|
t.Helper()
|
|
if !reflect.DeepEqual(segment.Categories, want) {
|
|
t.Fatalf("categories = %v, want %v", segment.Categories, want)
|
|
}
|
|
}
|