package builtin import ( "context" "encoding/json" "fmt" "os" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/internal/report" ) type jsonFilesReader struct{} func (jsonFilesReader) Name() string { return "json-files" } func (jsonFilesReader) Read(ctx context.Context, cfg config.Config) ([]model.RawTranscript, []report.Event, error) { if err := ctx.Err(); err != nil { return nil, nil, err } raw := make([]model.RawTranscript, 0, len(cfg.InputFiles)) for _, inputFile := range cfg.InputFiles { transcript, err := readRawTranscript(inputFile) if err != nil { return nil, nil, err } raw = append(raw, transcript) } return raw, []report.Event{ report.Info("input", "json-files", fmt.Sprintf("decoded %d input file(s)", len(raw))), }, nil } type rawTranscriptFile struct { Segments json.RawMessage `json:"segments"` } type rawSegmentFile struct { Start json.RawMessage `json:"start"` End json.RawMessage `json:"end"` Text json.RawMessage `json:"text"` } func readRawTranscript(path string) (model.RawTranscript, error) { data, err := os.ReadFile(path) if err != nil { return model.RawTranscript{}, fmt.Errorf("read input file %q: %w", path, err) } var parsed rawTranscriptFile if err := json.Unmarshal(data, &parsed); err != nil { return model.RawTranscript{}, fmt.Errorf("parse input file %q: %w", path, err) } if parsed.Segments == nil || isJSONNull(parsed.Segments) { return model.RawTranscript{}, fmt.Errorf("input file %q must contain top-level segments array", path) } var rawSegments []rawSegmentFile if err := json.Unmarshal(parsed.Segments, &rawSegments); err != nil { return model.RawTranscript{}, fmt.Errorf("input file %q top-level segments must be an array: %w", path, err) } segments := make([]model.RawSegment, 0, len(rawSegments)) for index, segment := range rawSegments { if segment.Start == nil || isJSONNull(segment.Start) { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing numeric start", path, index) } if segment.End == nil || isJSONNull(segment.End) { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing numeric end", path, index) } if segment.Text == nil || isJSONNull(segment.Text) { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing string text", path, index) } var start float64 if err := json.Unmarshal(segment.Start, &start); err != nil { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d start must be numeric", path, index) } var end float64 if err := json.Unmarshal(segment.End, &end); err != nil { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d end must be numeric", path, index) } var text string if err := json.Unmarshal(segment.Text, &text); err != nil { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d text must be a string", path, index) } if start < 0 { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d has negative start", path, index) } if end < start { return model.RawTranscript{}, fmt.Errorf("input file %q segment %d has end before start", path, index) } segments = append(segments, model.RawSegment{ Start: start, End: end, Text: text, }) } return model.RawTranscript{ Source: path, Segments: segments, }, nil } func isJSONNull(value json.RawMessage) bool { return string(value) == "null" }