Added support for a minimal JSON output schema

This commit is contained in:
2026-04-28 14:39:00 -05:00
parent a3ca6665a9
commit 9cca88280f
16 changed files with 658 additions and 44 deletions

View File

@@ -7,7 +7,6 @@ import (
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/report"
"gitea.maximumdirect.net/eric/seriatim/schema"
)
type jsonOutputWriter struct{}
@@ -16,7 +15,7 @@ func (jsonOutputWriter) Name() string {
return "json"
}
func (jsonOutputWriter) Write(ctx context.Context, out schema.Transcript, rpt report.Report, cfg config.Config) ([]report.Event, error) {
func (jsonOutputWriter) Write(ctx context.Context, out any, rpt report.Report, cfg config.Config) ([]report.Event, error) {
if err := ctx.Err(); err != nil {
return nil, err
}

View File

@@ -47,8 +47,17 @@ func (validateOutput) Process(ctx context.Context, in model.MergedTranscript, cf
return model.MergedTranscript{}, nil, err
}
transcript := artifact.FromMerged(cfg, in)
if err := schema.ValidateTranscript(transcript); err != nil {
selected := artifact.SelectedFromMerged(cfg, in)
var err error
switch transcript := selected.(type) {
case schema.MinimalTranscript:
err = schema.ValidateMinimalTranscript(transcript)
case schema.Transcript:
err = schema.ValidateTranscript(transcript)
default:
err = fmt.Errorf("unsupported output artifact type %T", selected)
}
if err != nil {
return model.MergedTranscript{}, nil, fmt.Errorf("validate-output: %w", err)
}

View File

@@ -49,6 +49,56 @@ func TestValidateOutputFailsBeforeAssignIDs(t *testing.T) {
}
}
func TestValidateOutputUsesMinimalSchemaWhenConfigured(t *testing.T) {
merged := model.MergedTranscript{
Segments: []model.Segment{
{
ID: 1,
Source: "input.json",
SourceRef: "word-run:1:1:1",
DerivedFrom: []string{"input.json#0"},
Speaker: "Alice",
Start: 1,
End: 2,
Text: "hello",
Categories: []string{"backchannel"},
OverlapGroupID: 1,
},
},
}
cfg := testConfig()
cfg.OutputSchema = config.OutputSchemaMinimal
got, events, err := validateOutput{}.Process(context.Background(), merged, cfg)
if err != nil {
t.Fatalf("validate output: %v", err)
}
if len(got.Segments) != 1 {
t.Fatalf("segment count = %d, want 1", len(got.Segments))
}
if len(events) != 1 || !strings.Contains(events[0].Message, "validated 1 output segment(s)") {
t.Fatalf("events = %#v", events)
}
}
func TestValidateOutputMinimalFailsBeforeAssignIDs(t *testing.T) {
merged := model.MergedTranscript{
Segments: []model.Segment{
{Source: "input.json", Speaker: "Alice", Start: 1, End: 2, Text: "hello"},
},
}
cfg := testConfig()
cfg.OutputSchema = config.OutputSchemaMinimal
_, _, err := validateOutput{}.Process(context.Background(), merged, cfg)
if err == nil {
t.Fatal("expected validation error")
}
if !strings.Contains(err.Error(), "segment 0 has id 0; want 1") {
t.Fatalf("unexpected error: %v", err)
}
}
func testConfig() config.Config {
return config.Config{
InputReader: config.DefaultInputReader,