package normalize import ( "strings" "testing" ) func TestParseReaderObjectWithSegmentsParses(t *testing.T) { input := `{ "segments": [ {"start": 1.0, "end": 2.0, "speaker": " Alice ", "text": "hello", "id": 100} ] }` parsed, err := ParseReader(strings.NewReader(input)) if err != nil { t.Fatalf("parse failed: %v", err) } if parsed.Shape != ShapeObjectWithSegments { t.Fatalf("shape = %q, want %q", parsed.Shape, ShapeObjectWithSegments) } if len(parsed.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(parsed.Segments)) } segment := parsed.Segments[0] if segment.Speaker != "Alice" { t.Fatalf("speaker = %q, want %q", segment.Speaker, "Alice") } if segment.OriginalID == nil || *segment.OriginalID != 100 { t.Fatalf("original id = %v, want 100", segment.OriginalID) } } func TestParseReaderBareSegmentArrayParses(t *testing.T) { input := `[ {"start": 1.0, "end": 2.0, "speaker": "Alice", "text": "hello"}, {"start": 3.0, "end": 4.0, "speaker": "Bob", "text": "world"} ]` parsed, err := ParseReader(strings.NewReader(input)) if err != nil { t.Fatalf("parse failed: %v", err) } if parsed.Shape != ShapeBareSegmentsArray { t.Fatalf("shape = %q, want %q", parsed.Shape, ShapeBareSegmentsArray) } if len(parsed.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(parsed.Segments)) } } func TestParseReaderInvalidJSONFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`{"segments":`)) if err == nil { t.Fatal("expected parse error") } if !strings.Contains(err.Error(), "decode normalize input JSON") { t.Fatalf("unexpected error: %v", err) } } func TestParseReaderObjectMissingSegmentsFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`{"items":[]}`)) if err == nil { t.Fatal("expected missing segments error") } if !strings.Contains(err.Error(), "must contain a \"segments\" field") { t.Fatalf("unexpected error: %v", err) } } func TestParseReaderSegmentsNotArrayFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`{"segments": {}}`)) if err == nil { t.Fatal("expected segments not array error") } if !strings.Contains(err.Error(), "\"segments\" must be an array") { t.Fatalf("unexpected error: %v", err) } } func TestParseReaderTopLevelScalarShapesFail(t *testing.T) { tests := []string{`"text"`, `42`, `null`, `true`} for _, input := range tests { _, err := ParseReader(strings.NewReader(input)) if err == nil { t.Fatalf("expected top-level shape error for %s", input) } if !strings.Contains(err.Error(), "top-level object") { t.Fatalf("unexpected error for %s: %v", input, err) } } } func TestParseReaderMissingStartFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"end":2,"speaker":"A","text":"t"}]`)) assertContains(t, err, `missing required field "start"`) } func TestParseReaderMissingEndFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"start":1,"speaker":"A","text":"t"}]`)) assertContains(t, err, `missing required field "end"`) } func TestParseReaderMissingSpeakerFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"text":"t"}]`)) assertContains(t, err, `missing required field "speaker"`) } func TestParseReaderEmptySpeakerFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"speaker":" ","text":"t"}]`)) assertContains(t, err, `speaker must be non-empty`) } func TestParseReaderMissingTextFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"speaker":"A"}]`)) assertContains(t, err, `missing required field "text"`) } func TestParseReaderEndBeforeStartFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"start":3,"end":2,"speaker":"A","text":"t"}]`)) assertContains(t, err, "before start") } func TestParseReaderNegativeStartFails(t *testing.T) { _, err := ParseReader(strings.NewReader(`[{"start":-1,"end":2,"speaker":"A","text":"t"}]`)) assertContains(t, err, "start must be >= 0") } func TestParseReaderEmptySegmentsArrayAccepted(t *testing.T) { parsed, err := ParseReader(strings.NewReader(`{"segments":[]}`)) if err != nil { t.Fatalf("parse failed: %v", err) } if len(parsed.Segments) != 0 { t.Fatalf("segment count = %d, want 0", len(parsed.Segments)) } } func TestParseReaderCategoriesPreservedWhenValid(t *testing.T) { parsed, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"speaker":"A","text":"t","categories":["filler","backchannel"]}]`)) if err != nil { t.Fatalf("parse failed: %v", err) } if len(parsed.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(parsed.Segments)) } if len(parsed.Segments[0].Categories) != 2 { t.Fatalf("categories length = %d, want 2", len(parsed.Segments[0].Categories)) } if parsed.Segments[0].Categories[0] != "filler" || parsed.Segments[0].Categories[1] != "backchannel" { t.Fatalf("categories = %v", parsed.Segments[0].Categories) } } func TestParseReaderOriginalInputIndexPreserved(t *testing.T) { input := `[ {"start":1,"end":2,"speaker":"A","text":"one"}, {"start":2,"end":3,"speaker":"B","text":"two"}, {"start":3,"end":4,"speaker":"C","text":"three"} ]` parsed, err := ParseReader(strings.NewReader(input)) if err != nil { t.Fatalf("parse failed: %v", err) } for index, segment := range parsed.Segments { if segment.InputIndex != index { t.Fatalf("segment %d input index = %d, want %d", index, segment.InputIndex, index) } } } func assertContains(t *testing.T, err error, fragment string) { t.Helper() if err == nil { t.Fatalf("expected error containing %q", fragment) } if !strings.Contains(err.Error(), fragment) { t.Fatalf("error = %q, want substring %q", err.Error(), fragment) } }