Added support for a minimal JSON output schema
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
||||
"gitea.maximumdirect.net/eric/seriatim/schema"
|
||||
)
|
||||
|
||||
func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
@@ -111,6 +112,64 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeWritesMinimalOutputSchema(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", `{
|
||||
"segments": [
|
||||
{"start": 1, "end": 2, "text": " Yeah. "},
|
||||
{"start": 8, "end": 9, "text": " next "}
|
||||
]
|
||||
}`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
reportPath := filepath.Join(dir, "report.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--output-schema", "minimal",
|
||||
"--report-file", reportPath,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript schema.MinimalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if transcript.Metadata.Application != "seriatim" {
|
||||
t.Fatalf("application = %q, want seriatim", transcript.Metadata.Application)
|
||||
}
|
||||
if transcript.Metadata.OutputSchema != "minimal" {
|
||||
t.Fatalf("output_schema = %q, want minimal", transcript.Metadata.OutputSchema)
|
||||
}
|
||||
if got, want := len(transcript.Segments), 2; got != want {
|
||||
t.Fatalf("segment count = %d, want %d", got, want)
|
||||
}
|
||||
for index, segment := range transcript.Segments {
|
||||
if segment.ID != index+1 {
|
||||
t.Fatalf("segment %d id = %d, want %d", index, segment.ID, index+1)
|
||||
}
|
||||
}
|
||||
if transcript.Segments[0].Speaker != "input.json" || transcript.Segments[0].Text != "Yeah." {
|
||||
t.Fatalf("first segment = %#v", transcript.Segments[0])
|
||||
}
|
||||
|
||||
outputBytes, err := os.ReadFile(output)
|
||||
if err != nil {
|
||||
t.Fatalf("read output: %v", err)
|
||||
}
|
||||
for _, forbidden := range []string{"overlap_groups", "categories", "source", "derived_from"} {
|
||||
if strings.Contains(string(outputBytes), forbidden) {
|
||||
t.Fatalf("minimal output contains %q:\n%s", forbidden, outputBytes)
|
||||
}
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
if !hasReportEvent(rpt, "postprocessing", "validate-output", "validated 2 output segment(s)") {
|
||||
t.Fatal("expected validate-output report event")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeTieBreakOrder(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
@@ -182,6 +241,29 @@ func TestMergeValidateOutputBeforeAssignIDsFails(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeValidateMinimalOutputBeforeAssignIDsFails(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", `{
|
||||
"segments": [
|
||||
{"start": 1, "end": 2, "text": "hello"}
|
||||
]
|
||||
}`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--output-schema", "minimal",
|
||||
"--postprocessing-modules", "validate-output,assign-ids",
|
||||
)
|
||||
if err == nil {
|
||||
t.Fatal("expected validation error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "validate-output: segment 0 has id 0; want 1") {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeDetectsOverlapGroups(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
@@ -963,6 +1045,24 @@ func TestUnknownModulesFailDuringValidation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnknownOutputSchemaFailsDuringValidation(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--output-schema", "compact",
|
||||
)
|
||||
if err == nil {
|
||||
t.Fatal("expected output schema error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "--output-schema must be one of") {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidPreprocessingOrderFails(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
||||
|
||||
Reference in New Issue
Block a user