package schema import ( "strings" "testing" ) func TestValidateTranscriptAcceptsValidTranscript(t *testing.T) { transcript := validTranscript() if err := ValidateTranscript(transcript); err != nil { t.Fatalf("validate transcript: %v", err) } } func TestValidateJSONRejectsMissingRequiredField(t *testing.T) { err := ValidateJSON([]byte(`{ "metadata": { "application": "seriatim", "version": "dev", "input_reader": "json-files", "input_files": [], "preprocessing_modules": [], "postprocessing_modules": [], "output_modules": [] }, "segments": [] }`)) assertErrorContains(t, err, "overlap_groups") } func TestValidateJSONRejectsWrongFieldType(t *testing.T) { err := ValidateJSON([]byte(`{ "metadata": { "application": "seriatim", "version": "dev", "input_reader": "json-files", "input_files": [], "preprocessing_modules": [], "postprocessing_modules": [], "output_modules": [] }, "segments": [ { "id": "1", "source": "input.json", "speaker": "Alice", "start": 1, "end": 2, "text": "hello" } ], "overlap_groups": [] }`)) assertErrorContains(t, err, "id") } func TestValidateJSONRejectsUnexpectedInternalFields(t *testing.T) { tests := []struct { name string field string }{ {name: "internal ref", field: `"internal_ref": "internal-1",`}, {name: "words", field: `"words": [],`}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { err := ValidateJSON([]byte(`{ "metadata": { "application": "seriatim", "version": "dev", "input_reader": "json-files", "input_files": [], "preprocessing_modules": [], "postprocessing_modules": [], "output_modules": [] }, "segments": [ { "id": 1, ` + test.field + ` "source": "input.json", "speaker": "Alice", "start": 1, "end": 2, "text": "hello" } ], "overlap_groups": [] }`)) assertErrorContains(t, err, "additional properties") }) } } func TestValidateTranscriptRejectsMissingOrNonSequentialIDs(t *testing.T) { tests := []struct { name string ids []int want string }{ {name: "missing zero id", ids: []int{0}, want: "segment 0 has id 0; want 1"}, {name: "does not start at one", ids: []int{2}, want: "segment 0 has id 2; want 1"}, {name: "gap", ids: []int{1, 3}, want: "segment 1 has id 3; want 2"}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { transcript := validTranscript() transcript.Segments = transcript.Segments[:0] for index, id := range test.ids { transcript.Segments = append(transcript.Segments, Segment{ ID: id, Source: "input.json", Speaker: "Alice", Start: float64(index), End: float64(index) + 1, Text: "hello", }) } err := ValidateTranscript(transcript) assertErrorContains(t, err, test.want) }) } } func TestValidateTranscriptRejectsInvalidTiming(t *testing.T) { transcript := validTranscript() transcript.Segments[0].Start = 2 transcript.Segments[0].End = 1 err := ValidateTranscript(transcript) assertErrorContains(t, err, "segment 0 has end") } func TestValidateTranscriptRejectsInvalidOverlapGroupTiming(t *testing.T) { transcript := validTranscript() transcript.OverlapGroups = []OverlapGroup{ { ID: 1, Start: 3, End: 2, Segments: []string{"input.json#0"}, Speakers: []string{"Alice"}, Class: "unknown", Resolution: "unresolved", }, } err := ValidateTranscript(transcript) assertErrorContains(t, err, "overlap_group 0 has end") } func validTranscript() Transcript { sourceIndex := 0 return Transcript{ Metadata: Metadata{ Application: "seriatim", Version: "dev", InputReader: "json-files", InputFiles: []string{"input.json"}, PreprocessingModules: []string{"validate-raw", "normalize-speakers", "trim-text"}, PostprocessingModules: []string{"assign-ids", "validate-output"}, OutputModules: []string{"json"}, }, Segments: []Segment{ { ID: 1, Source: "input.json", SourceSegmentIndex: &sourceIndex, Speaker: "Alice", Start: 1, End: 2, Text: "hello", }, }, OverlapGroups: []OverlapGroup{}, } } func assertErrorContains(t *testing.T, err error, want string) { t.Helper() if err == nil { t.Fatalf("expected error containing %q", want) } if !strings.Contains(err.Error(), want) { t.Fatalf("expected error containing %q, got %v", want, err) } }