package builtin import ( "context" "strings" "testing" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/internal/pipeline" ) func TestValidateRawAcceptsValidRawTranscripts(t *testing.T) { state := pipeline.PreprocessState{ State: pipeline.StateRaw, Raw: []model.RawTranscript{ { Source: "input.json", Segments: []model.RawSegment{ { Start: 1, End: 2, Text: "hello", Words: []model.Word{ {Text: "hello", Start: 1, End: 1.5, Timed: true}, {Text: "untimed"}, }, }, }, }, }, } got, events, err := validateRaw{}.Process(context.Background(), state, config.Config{}) if err != nil { t.Fatalf("validate raw: %v", err) } if got.State != pipeline.StateRaw || len(got.Raw) != 1 { t.Fatalf("unexpected state: %#v", got) } if len(events) != 1 || !strings.Contains(events[0].Message, "validated 1 raw transcript(s)") { t.Fatalf("events = %#v", events) } } func TestValidateRawRejectsInvalidState(t *testing.T) { state := pipeline.PreprocessState{State: pipeline.StateCanonical} _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) assertPreprocessError(t, err, `requires state "raw"`) } func TestValidateRawRejectsNoRawTranscripts(t *testing.T) { state := pipeline.PreprocessState{State: pipeline.StateRaw} _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) assertPreprocessError(t, err, "no raw transcript(s)") } func TestValidateRawRejectsEmptySource(t *testing.T) { state := validRawState() state.Raw[0].Source = " " _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) assertPreprocessError(t, err, "raw transcript 0 has empty source") } func TestValidateRawRejectsInvalidSegmentTiming(t *testing.T) { tests := []struct { name string mutate func(*model.RawSegment) want string }{ { name: "negative start", mutate: func(segment *model.RawSegment) { segment.Start = -1 }, want: "segment 0 has negative start", }, { name: "end before start", mutate: func(segment *model.RawSegment) { segment.Start = 2 segment.End = 1 }, want: "segment 0 has end before start", }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { state := validRawState() test.mutate(&state.Raw[0].Segments[0]) _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) assertPreprocessError(t, err, test.want) }) } } func TestValidateRawRejectsInvalidTimedWordTiming(t *testing.T) { tests := []struct { name string word model.Word want string }{ { name: "negative start", word: model.Word{Text: "bad", Start: -1, End: 1, Timed: true}, want: "word 0 has negative start", }, { name: "end before start", word: model.Word{Text: "bad", Start: 2, End: 1, Timed: true}, want: "word 0 has end before start", }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { state := validRawState() state.Raw[0].Segments[0].Words = []model.Word{test.word} _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) assertPreprocessError(t, err, test.want) }) } } func validRawState() pipeline.PreprocessState { return pipeline.PreprocessState{ State: pipeline.StateRaw, Raw: []model.RawTranscript{ { Source: "input.json", Segments: []model.RawSegment{ {Start: 1, End: 2, Text: ""}, }, }, }, } } func assertPreprocessError(t *testing.T, err error, want string) { t.Helper() if err == nil { t.Fatalf("expected error containing %q", want) } if !strings.Contains(err.Error(), want) { t.Fatalf("expected error containing %q, got %v", want, err) } }