package filler import ( "reflect" "testing" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) func TestApplyTagsVerySafeFillers(t *testing.T) { for _, text := range []string{"um", "uhhh", "ER", "ermm", "ah", "eh", "hmmm", "mm", "mmm"} { t.Run(text, func(t *testing.T) { got, tagged := Apply(transcript(segment(text, 1, 1.5))) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } assertCategories(t, got.Segments[0], []string{Category}) }) } } func TestApplyTagsRepeatedFillers(t *testing.T) { got, tagged := Apply(transcript(segment("um uh hmm", 1, 1.8))) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } assertCategories(t, got.Segments[0], []string{Category}) } func TestApplyMatchesTrimAwareCaseInsensitive(t *testing.T) { got, tagged := Apply(transcript(segment(" UM uh ", 1, 1.5))) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } assertCategories(t, got.Segments[0], []string{Category}) } func TestApplyDoesNotTagNonMatches(t *testing.T) { for _, text := range []string{"um okay", "uh-huh", "hmm, okay"} { t.Run(text, func(t *testing.T) { got, tagged := Apply(transcript(segment(text, 1, 1.5))) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } assertCategories(t, got.Segments[0], nil) }) } } func TestApplyRejectsWordCountOverThree(t *testing.T) { got, tagged := Apply(transcript(segment("um uh er ah", 1, 1.5))) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } assertCategories(t, got.Segments[0], nil) } func TestApplyRejectsDurationOverOneSecond(t *testing.T) { got, tagged := Apply(transcript(segment("um", 1, 2.1))) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } assertCategories(t, got.Segments[0], nil) } func TestApplyPreservesExistingCategoriesAndAvoidsDuplicate(t *testing.T) { existing := segment("um", 1, 1.2) existing.Categories = []string{"manual", Category} got, tagged := Apply(transcript(existing)) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } assertCategories(t, got.Segments[0], []string{"manual", Category}) } func transcript(segments ...model.Segment) model.MergedTranscript { return model.MergedTranscript{Segments: segments} } func segment(text string, start float64, end float64) model.Segment { return model.Segment{ Source: "input.json", Speaker: "Alice", Start: start, End: end, Text: text, } } func assertCategories(t *testing.T, segment model.Segment, want []string) { t.Helper() if !reflect.DeepEqual(segment.Categories, want) { t.Fatalf("categories = %v, want %v", segment.Categories, want) } }