182 lines
5.6 KiB
Go
182 lines
5.6 KiB
Go
package normalize
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
func TestParseReaderObjectWithSegmentsParses(t *testing.T) {
|
|
input := `{
|
|
"segments": [
|
|
{"start": 1.0, "end": 2.0, "speaker": " Alice ", "text": "hello", "id": 100}
|
|
]
|
|
}`
|
|
|
|
parsed, err := ParseReader(strings.NewReader(input))
|
|
if err != nil {
|
|
t.Fatalf("parse failed: %v", err)
|
|
}
|
|
if parsed.Shape != ShapeObjectWithSegments {
|
|
t.Fatalf("shape = %q, want %q", parsed.Shape, ShapeObjectWithSegments)
|
|
}
|
|
if len(parsed.Segments) != 1 {
|
|
t.Fatalf("segment count = %d, want 1", len(parsed.Segments))
|
|
}
|
|
segment := parsed.Segments[0]
|
|
if segment.Speaker != "Alice" {
|
|
t.Fatalf("speaker = %q, want %q", segment.Speaker, "Alice")
|
|
}
|
|
if segment.OriginalID == nil || *segment.OriginalID != 100 {
|
|
t.Fatalf("original id = %v, want 100", segment.OriginalID)
|
|
}
|
|
}
|
|
|
|
func TestParseReaderBareSegmentArrayParses(t *testing.T) {
|
|
input := `[
|
|
{"start": 1.0, "end": 2.0, "speaker": "Alice", "text": "hello"},
|
|
{"start": 3.0, "end": 4.0, "speaker": "Bob", "text": "world"}
|
|
]`
|
|
|
|
parsed, err := ParseReader(strings.NewReader(input))
|
|
if err != nil {
|
|
t.Fatalf("parse failed: %v", err)
|
|
}
|
|
if parsed.Shape != ShapeBareSegmentsArray {
|
|
t.Fatalf("shape = %q, want %q", parsed.Shape, ShapeBareSegmentsArray)
|
|
}
|
|
if len(parsed.Segments) != 2 {
|
|
t.Fatalf("segment count = %d, want 2", len(parsed.Segments))
|
|
}
|
|
}
|
|
|
|
func TestParseReaderInvalidJSONFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`{"segments":`))
|
|
if err == nil {
|
|
t.Fatal("expected parse error")
|
|
}
|
|
if !strings.Contains(err.Error(), "decode normalize input JSON") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestParseReaderObjectMissingSegmentsFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`{"items":[]}`))
|
|
if err == nil {
|
|
t.Fatal("expected missing segments error")
|
|
}
|
|
if !strings.Contains(err.Error(), "must contain a \"segments\" field") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestParseReaderSegmentsNotArrayFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`{"segments": {}}`))
|
|
if err == nil {
|
|
t.Fatal("expected segments not array error")
|
|
}
|
|
if !strings.Contains(err.Error(), "\"segments\" must be an array") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestParseReaderTopLevelScalarShapesFail(t *testing.T) {
|
|
tests := []string{`"text"`, `42`, `null`}
|
|
for _, input := range tests {
|
|
_, err := ParseReader(strings.NewReader(input))
|
|
if err == nil {
|
|
t.Fatalf("expected top-level shape error for %s", input)
|
|
}
|
|
if !strings.Contains(err.Error(), "top-level object") {
|
|
t.Fatalf("unexpected error for %s: %v", input, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestParseReaderMissingStartFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"end":2,"speaker":"A","text":"t"}]`))
|
|
assertContains(t, err, `missing required field "start"`)
|
|
}
|
|
|
|
func TestParseReaderMissingEndFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"start":1,"speaker":"A","text":"t"}]`))
|
|
assertContains(t, err, `missing required field "end"`)
|
|
}
|
|
|
|
func TestParseReaderMissingSpeakerFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"text":"t"}]`))
|
|
assertContains(t, err, `missing required field "speaker"`)
|
|
}
|
|
|
|
func TestParseReaderEmptySpeakerFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"speaker":" ","text":"t"}]`))
|
|
assertContains(t, err, `speaker must be non-empty`)
|
|
}
|
|
|
|
func TestParseReaderMissingTextFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"speaker":"A"}]`))
|
|
assertContains(t, err, `missing required field "text"`)
|
|
}
|
|
|
|
func TestParseReaderEndBeforeStartFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"start":3,"end":2,"speaker":"A","text":"t"}]`))
|
|
assertContains(t, err, "before start")
|
|
}
|
|
|
|
func TestParseReaderNegativeStartFails(t *testing.T) {
|
|
_, err := ParseReader(strings.NewReader(`[{"start":-1,"end":2,"speaker":"A","text":"t"}]`))
|
|
assertContains(t, err, "start must be >= 0")
|
|
}
|
|
|
|
func TestParseReaderEmptySegmentsArrayAccepted(t *testing.T) {
|
|
parsed, err := ParseReader(strings.NewReader(`{"segments":[]}`))
|
|
if err != nil {
|
|
t.Fatalf("parse failed: %v", err)
|
|
}
|
|
if len(parsed.Segments) != 0 {
|
|
t.Fatalf("segment count = %d, want 0", len(parsed.Segments))
|
|
}
|
|
}
|
|
|
|
func TestParseReaderCategoriesPreservedWhenValid(t *testing.T) {
|
|
parsed, err := ParseReader(strings.NewReader(`[{"start":1,"end":2,"speaker":"A","text":"t","categories":["filler","backchannel"]}]`))
|
|
if err != nil {
|
|
t.Fatalf("parse failed: %v", err)
|
|
}
|
|
if len(parsed.Segments) != 1 {
|
|
t.Fatalf("segment count = %d, want 1", len(parsed.Segments))
|
|
}
|
|
if len(parsed.Segments[0].Categories) != 2 {
|
|
t.Fatalf("categories length = %d, want 2", len(parsed.Segments[0].Categories))
|
|
}
|
|
if parsed.Segments[0].Categories[0] != "filler" || parsed.Segments[0].Categories[1] != "backchannel" {
|
|
t.Fatalf("categories = %v", parsed.Segments[0].Categories)
|
|
}
|
|
}
|
|
|
|
func TestParseReaderOriginalInputIndexPreserved(t *testing.T) {
|
|
input := `[
|
|
{"start":1,"end":2,"speaker":"A","text":"one"},
|
|
{"start":2,"end":3,"speaker":"B","text":"two"},
|
|
{"start":3,"end":4,"speaker":"C","text":"three"}
|
|
]`
|
|
parsed, err := ParseReader(strings.NewReader(input))
|
|
if err != nil {
|
|
t.Fatalf("parse failed: %v", err)
|
|
}
|
|
for index, segment := range parsed.Segments {
|
|
if segment.InputIndex != index {
|
|
t.Fatalf("segment %d input index = %d, want %d", index, segment.InputIndex, index)
|
|
}
|
|
}
|
|
}
|
|
|
|
func assertContains(t *testing.T, err error, fragment string) {
|
|
t.Helper()
|
|
if err == nil {
|
|
t.Fatalf("expected error containing %q", fragment)
|
|
}
|
|
if !strings.Contains(err.Error(), fragment) {
|
|
t.Fatalf("error = %q, want substring %q", err.Error(), fragment)
|
|
}
|
|
}
|