Added a new JSON public schema as the default output artifact

This commit is contained in:
2026-04-28 21:32:43 -05:00
parent 80ac7e97dd
commit cc80a123ef
14 changed files with 533 additions and 12 deletions

View File

@@ -114,6 +114,81 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
}
}
func TestMergeWritesDefaultOutputSchema(t *testing.T) {
dir := t.TempDir()
input := writeJSONFile(t, dir, "input.json", `{
"segments": [
{"start": 1, "end": 1.6, "text": "yeah"}
]
}`)
output := filepath.Join(dir, "merged.json")
err := executeMergeRaw(
"--input-file", input,
"--output-file", output,
)
if err != nil {
t.Fatalf("merge failed: %v", err)
}
var transcript schema.DefaultTranscript
readJSON(t, output, &transcript)
if transcript.Metadata.OutputSchema != config.OutputSchemaDefault {
t.Fatalf("output_schema = %q, want default", transcript.Metadata.OutputSchema)
}
if len(transcript.Segments) != 1 {
t.Fatalf("segment count = %d, want 1", len(transcript.Segments))
}
segment := transcript.Segments[0]
if len(segment.Categories) != 1 || segment.Categories[0] != "backchannel" {
t.Fatalf("categories = %#v, want [backchannel]", segment.Categories)
}
outputBytes, err := os.ReadFile(output)
if err != nil {
t.Fatalf("read output: %v", err)
}
for _, forbidden := range []string{"overlap_groups", "source", "derived_from", "words"} {
if strings.Contains(string(outputBytes), forbidden) {
t.Fatalf("default output contains %q:\n%s", forbidden, outputBytes)
}
}
}
func TestMergeWritesSeriatimOutputSchema(t *testing.T) {
dir := t.TempDir()
input := writeJSONFile(t, dir, "input.json", `{
"segments": [
{"start": 1, "end": 1.6, "text": "yeah"}
]
}`)
output := filepath.Join(dir, "merged.json")
err := executeMergeRaw(
"--input-file", input,
"--output-file", output,
"--output-schema", "seriatim",
)
if err != nil {
t.Fatalf("merge failed: %v", err)
}
var transcript model.FinalTranscript
readJSON(t, output, &transcript)
if len(transcript.Segments) != 1 {
t.Fatalf("segment count = %d, want 1", len(transcript.Segments))
}
segment := transcript.Segments[0]
if len(segment.Categories) != 1 || segment.Categories[0] != "backchannel" {
t.Fatalf("categories = %#v, want [backchannel]", segment.Categories)
}
if segment.Source == "" {
t.Fatal("expected full output to include source")
}
if len(transcript.OverlapGroups) != 0 {
t.Fatalf("expected no overlap groups, got %d", len(transcript.OverlapGroups))
}
}
func TestMergeWritesMinimalOutputSchema(t *testing.T) {
dir := t.TempDir()
input := writeJSONFile(t, dir, "input.json", `{
@@ -1916,11 +1991,29 @@ func TestInvalidTimingFails(t *testing.T) {
}
func executeMerge(args ...string) error {
if !hasOutputSchemaFlag(args) {
// Most integration tests were written against the full envelope; keep
// that behavior unless the caller explicitly asks for another schema.
args = append(args, "--output-schema", config.OutputSchemaSeriatim)
}
return executeMergeRaw(args...)
}
func executeMergeRaw(args ...string) error {
cmd := NewRootCommand()
cmd.SetArgs(append([]string{"merge"}, args...))
return cmd.Execute()
}
func hasOutputSchemaFlag(args []string) bool {
for _, arg := range args {
if arg == "--output-schema" {
return true
}
}
return false
}
func writeJSONFile(t *testing.T, dir string, name string, content string) string {
t.Helper()