780 lines
21 KiB
Go
780 lines
21 KiB
Go
package cli
|
|
|
|
import (
|
|
"encoding/json"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
|
)
|
|
|
|
func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
|
dir := t.TempDir()
|
|
inputA := writeJSONFile(t, dir, "a.json", `{
|
|
"segments": [
|
|
{"start": 10, "end": 11, "text": " second a ", "words": [{"word": "ignored"}]},
|
|
{"start": 1, "end": 2, "text": "first a"}
|
|
]
|
|
}`)
|
|
inputB := writeJSONFile(t, dir, "b.json", `{
|
|
"segments": [
|
|
{"start": 5, "end": 6, "text": "first b"}
|
|
]
|
|
}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["a.json"]
|
|
- speaker: Bob
|
|
match: ["b.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
reportPath := filepath.Join(dir, "report.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", inputB,
|
|
"--input-file", inputA,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
"--report-file", reportPath,
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("merge failed: %v", err)
|
|
}
|
|
|
|
var transcript model.FinalTranscript
|
|
readJSON(t, output, &transcript)
|
|
outputBytes, err := os.ReadFile(output)
|
|
if err != nil {
|
|
t.Fatalf("read output bytes: %v", err)
|
|
}
|
|
outputJSON := string(outputBytes)
|
|
if !strings.Contains(outputJSON, `"overlap_groups": []`) {
|
|
t.Fatalf("expected overlap_groups to serialize as an empty array, got:\n%s", outputJSON)
|
|
}
|
|
if transcript.Metadata.Application != "seriatim" {
|
|
t.Fatalf("unexpected application metadata: %q", transcript.Metadata.Application)
|
|
}
|
|
if got, want := transcript.Metadata.InputFiles, []string{inputA, inputB}; !equalStrings(got, want) {
|
|
t.Fatalf("input files not sorted deterministically: got %v want %v", got, want)
|
|
}
|
|
if got, want := len(transcript.Segments), 3; got != want {
|
|
t.Fatalf("expected merged output to contain %d segments, got %d", want, got)
|
|
}
|
|
assertSegment(t, transcript.Segments[0], 1, inputA, 1, "Alice", 1, 2, "first a")
|
|
assertSegment(t, transcript.Segments[1], 2, inputB, 0, "Bob", 5, 6, "first b")
|
|
assertSegment(t, transcript.Segments[2], 3, inputA, 0, "Alice", 10, 11, "second a")
|
|
if strings.Contains(outputJSON, "internal_ref") {
|
|
t.Fatalf("did not expect internal_ref in output:\n%s", outputJSON)
|
|
}
|
|
if strings.Contains(outputJSON, "words") {
|
|
t.Fatalf("did not expect words in output:\n%s", outputJSON)
|
|
}
|
|
if len(transcript.OverlapGroups) != 0 {
|
|
t.Fatalf("expected placeholder output to contain no overlap groups, got %d", len(transcript.OverlapGroups))
|
|
}
|
|
|
|
var rpt report.Report
|
|
readJSON(t, reportPath, &rpt)
|
|
gotModules := make([]string, 0, len(rpt.Events))
|
|
for _, event := range rpt.Events {
|
|
gotModules = append(gotModules, event.Module)
|
|
}
|
|
wantModules := []string{
|
|
"json-files",
|
|
"validate-raw",
|
|
"normalize-speakers",
|
|
"trim-text",
|
|
"placeholder-merger",
|
|
"detect-overlaps",
|
|
"resolve-overlaps",
|
|
"autocorrect",
|
|
"assign-ids",
|
|
"validate-output",
|
|
"json",
|
|
}
|
|
if !equalStrings(gotModules, wantModules) {
|
|
t.Fatalf("report event order mismatch:\ngot %v\nwant %v", gotModules, wantModules)
|
|
}
|
|
}
|
|
|
|
func TestMergeTieBreakOrder(t *testing.T) {
|
|
dir := t.TempDir()
|
|
inputA := writeJSONFile(t, dir, "a.json", `{
|
|
"segments": [
|
|
{"start": 1, "end": 4, "text": "a-late-end"},
|
|
{"start": 1, "end": 2, "text": "a-index-one"}
|
|
]
|
|
}`)
|
|
inputB := writeJSONFile(t, dir, "b.json", `{
|
|
"segments": [
|
|
{"start": 1, "end": 2, "text": "b-same-time"}
|
|
]
|
|
}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["a.json"]
|
|
- speaker: Bob
|
|
match: ["b.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", inputB,
|
|
"--input-file", inputA,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("merge failed: %v", err)
|
|
}
|
|
|
|
var transcript model.FinalTranscript
|
|
readJSON(t, output, &transcript)
|
|
got := []string{
|
|
transcript.Segments[0].Text,
|
|
transcript.Segments[1].Text,
|
|
transcript.Segments[2].Text,
|
|
}
|
|
want := []string{"a-index-one", "b-same-time", "a-late-end"}
|
|
if !equalStrings(got, want) {
|
|
t.Fatalf("tie-break order mismatch: got %v want %v", got, want)
|
|
}
|
|
for index, segment := range transcript.Segments {
|
|
if segment.ID != index+1 {
|
|
t.Fatalf("segment %d has id %d; want %d", index, segment.ID, index+1)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestMergeDetectsOverlapGroups(t *testing.T) {
|
|
dir := t.TempDir()
|
|
inputA := writeJSONFile(t, dir, "a.json", `{
|
|
"segments": [
|
|
{"start": 1, "end": 5, "text": "alice long"},
|
|
{"start": 2, "end": 3, "text": "alice nested"}
|
|
]
|
|
}`)
|
|
inputB := writeJSONFile(t, dir, "b.json", `{
|
|
"segments": [
|
|
{"start": 4, "end": 6, "text": "bob overlap"}
|
|
]
|
|
}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["a.json"]
|
|
- speaker: Bob
|
|
match: ["b.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
reportPath := filepath.Join(dir, "report.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", inputB,
|
|
"--input-file", inputA,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
"--report-file", reportPath,
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("merge failed: %v", err)
|
|
}
|
|
|
|
var transcript model.FinalTranscript
|
|
readJSON(t, output, &transcript)
|
|
if len(transcript.OverlapGroups) != 1 {
|
|
t.Fatalf("overlap group count = %d, want 1", len(transcript.OverlapGroups))
|
|
}
|
|
group := transcript.OverlapGroups[0]
|
|
if group.ID != 1 {
|
|
t.Fatalf("group ID = %d, want 1", group.ID)
|
|
}
|
|
if group.Start != 1 || group.End != 6 {
|
|
t.Fatalf("group bounds = %f-%f, want 1-6", group.Start, group.End)
|
|
}
|
|
wantRefs := []string{inputA + "#0", inputA + "#1", inputB + "#0"}
|
|
if !equalStrings(group.Segments, wantRefs) {
|
|
t.Fatalf("group refs = %v, want %v", group.Segments, wantRefs)
|
|
}
|
|
if !equalStrings(group.Speakers, []string{"Alice", "Bob"}) {
|
|
t.Fatalf("group speakers = %v, want [Alice Bob]", group.Speakers)
|
|
}
|
|
if group.Class != "unknown" || group.Resolution != "unresolved" {
|
|
t.Fatalf("unexpected group class/resolution: %q/%q", group.Class, group.Resolution)
|
|
}
|
|
for index, segment := range transcript.Segments {
|
|
if segment.OverlapGroupID != 1 {
|
|
t.Fatalf("segment %d overlap group ID = %d, want 1", index, segment.OverlapGroupID)
|
|
}
|
|
}
|
|
|
|
var rpt report.Report
|
|
readJSON(t, reportPath, &rpt)
|
|
if !hasReportEvent(rpt, "postprocessing", "detect-overlaps", "detected 1 overlap group(s)") {
|
|
t.Fatal("expected detect-overlaps report event")
|
|
}
|
|
}
|
|
|
|
func TestSpeakerMatchingUsesFirstMatchingRuleCaseInsensitive(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "2026-04-19-Adam_Rakestraw.json", `{
|
|
"segments": [
|
|
{"start": 1, "end": 2, "text": "hello"}
|
|
]
|
|
}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: First Match
|
|
match: ["adam"]
|
|
- speaker: Later Match
|
|
match: ["Adam_Rakestraw"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("merge failed: %v", err)
|
|
}
|
|
|
|
var transcript model.FinalTranscript
|
|
readJSON(t, output, &transcript)
|
|
if got, want := transcript.Segments[0].Speaker, "First Match"; got != want {
|
|
t.Fatalf("speaker = %q, want %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestUnknownModulesFailDuringValidation(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
tests := []struct {
|
|
name string
|
|
args []string
|
|
want string
|
|
}{
|
|
{
|
|
name: "input reader",
|
|
args: []string{"--input-reader", "missing-reader"},
|
|
want: `unknown input reader "missing-reader"`,
|
|
},
|
|
{
|
|
name: "preprocessing",
|
|
args: []string{"--preprocessing-modules", "validate-raw,missing-module"},
|
|
want: `unknown preprocessing module "missing-module"`,
|
|
},
|
|
{
|
|
name: "postprocessing",
|
|
args: []string{"--postprocessing-modules", "missing-module"},
|
|
want: `unknown postprocessing module "missing-module"`,
|
|
},
|
|
{
|
|
name: "output",
|
|
args: []string{"--output-modules", "missing-module"},
|
|
want: `unknown output module "missing-module"`,
|
|
},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
args := []string{
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
}
|
|
args = append(args, test.args...)
|
|
|
|
err := executeMerge(args...)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), test.want) {
|
|
t.Fatalf("expected error to contain %q, got %q", test.want, err.Error())
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestInvalidPreprocessingOrderFails(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--output-file", output,
|
|
"--preprocessing-modules", "trim-text,validate-raw",
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), `requires state "canonical"`) {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestMissingInputFileFailsBeforePipelineExecution(t *testing.T) {
|
|
dir := t.TempDir()
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["missing.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", filepath.Join(dir, "missing.json"),
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), "--input-file") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestDefaultMergeWorksWithoutSpeakersOrAutocorrect(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[{"start":1,"end":2,"text":"Frank"}]}`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
reportPath := filepath.Join(dir, "report.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--output-file", output,
|
|
"--report-file", reportPath,
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("merge failed: %v", err)
|
|
}
|
|
|
|
var transcript model.FinalTranscript
|
|
readJSON(t, output, &transcript)
|
|
if got, want := transcript.Segments[0].Speaker, "input.json"; got != want {
|
|
t.Fatalf("speaker = %q, want %q", got, want)
|
|
}
|
|
if got, want := transcript.Segments[0].Text, "Frank"; got != want {
|
|
t.Fatalf("text = %q, want %q", got, want)
|
|
}
|
|
|
|
var rpt report.Report
|
|
readJSON(t, reportPath, &rpt)
|
|
if !hasReportEvent(rpt, "preprocessing", "normalize-speakers", "using input basenames") {
|
|
t.Fatal("expected normalize-speakers fallback report event")
|
|
}
|
|
if !hasReportEvent(rpt, "postprocessing", "autocorrect", "skipped autocorrect") {
|
|
t.Fatal("expected autocorrect skip report event")
|
|
}
|
|
}
|
|
|
|
func TestPreprocessingAutocorrectIsUnknownModule(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
autocorrect := writeYAMLFile(t, dir, "autocorrect.yml", `autocorrect:
|
|
- target: Hrank
|
|
match: ["Frank"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--autocorrect", autocorrect,
|
|
"--output-file", output,
|
|
"--preprocessing-modules", "validate-raw,normalize-speakers,autocorrect",
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), `unknown preprocessing module "autocorrect"`) {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestPostprocessingAutocorrectUpdatesOutputAndReport(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{
|
|
"segments": [
|
|
{"start": 1, "end": 2, "text": "Frank met Mike Pat, not Franklin."},
|
|
{"start": 3, "end": 4, "text": "God-free and FRANK stayed."}
|
|
]
|
|
}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
autocorrect := writeYAMLFile(t, dir, "autocorrect.yml", `autocorrect:
|
|
- target: Hrank
|
|
match: ["Frank"]
|
|
- target: Mike Brown
|
|
match: ["Mike Pat"]
|
|
- target: Godfrey
|
|
match: ["God-free"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
reportPath := filepath.Join(dir, "report.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--autocorrect", autocorrect,
|
|
"--output-file", output,
|
|
"--report-file", reportPath,
|
|
"--postprocessing-modules", "detect-overlaps,resolve-overlaps,autocorrect,assign-ids,validate-output",
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("merge failed: %v", err)
|
|
}
|
|
|
|
var transcript model.FinalTranscript
|
|
readJSON(t, output, &transcript)
|
|
if got, want := transcript.Segments[0].Text, "Hrank met Mike Brown, not Franklin."; got != want {
|
|
t.Fatalf("segment 0 text = %q, want %q", got, want)
|
|
}
|
|
if got, want := transcript.Segments[1].Text, "Godfrey and FRANK stayed."; got != want {
|
|
t.Fatalf("segment 1 text = %q, want %q", got, want)
|
|
}
|
|
|
|
var rpt report.Report
|
|
readJSON(t, reportPath, &rpt)
|
|
found := false
|
|
for _, event := range rpt.Events {
|
|
if event.Stage == "postprocessing" && event.Module == "autocorrect" {
|
|
found = true
|
|
if !strings.Contains(event.Message, "applied 3 autocorrect replacement(s)") {
|
|
t.Fatalf("unexpected autocorrect report message: %q", event.Message)
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
t.Fatal("expected autocorrect report event")
|
|
}
|
|
}
|
|
|
|
func TestInvalidAutocorrectFileFailsWhenProvided(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[{"start":1,"end":2,"text":"Frank"}]}`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
autocorrect := writeYAMLFile(t, dir, "autocorrect.yml", `autocorrect:
|
|
- target: ""
|
|
match: ["Frank"]
|
|
`)
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--autocorrect", autocorrect,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), "must include target") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOutputJSONIsByteStable(t *testing.T) {
|
|
dir := t.TempDir()
|
|
inputA := writeJSONFile(t, dir, "a.json", `{"segments":[{"start":2,"end":3,"text":"a"}]}`)
|
|
inputB := writeJSONFile(t, dir, "b.json", `{"segments":[{"start":1,"end":2,"text":"b"}]}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["a.json"]
|
|
- speaker: Bob
|
|
match: ["b.json"]
|
|
`)
|
|
outputA := filepath.Join(dir, "merged-a.json")
|
|
outputB := filepath.Join(dir, "merged-b.json")
|
|
|
|
args := []string{
|
|
"--input-file", inputB,
|
|
"--input-file", inputA,
|
|
"--speakers", speakers,
|
|
}
|
|
|
|
err := executeMerge(append(append([]string(nil), args...), "--output-file", outputA)...)
|
|
if err != nil {
|
|
t.Fatalf("first merge failed: %v", err)
|
|
}
|
|
err = executeMerge(append(append([]string(nil), args...), "--output-file", outputB)...)
|
|
if err != nil {
|
|
t.Fatalf("second merge failed: %v", err)
|
|
}
|
|
|
|
first, err := os.ReadFile(outputA)
|
|
if err != nil {
|
|
t.Fatalf("read first output: %v", err)
|
|
}
|
|
second, err := os.ReadFile(outputB)
|
|
if err != nil {
|
|
t.Fatalf("read second output: %v", err)
|
|
}
|
|
if string(first) != string(second) {
|
|
t.Fatalf("expected byte-stable output\nfirst:\n%s\nsecond:\n%s", first, second)
|
|
}
|
|
}
|
|
|
|
func TestMissingSpeakerMappingFails(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["other.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), `speaker map has no match for input basename "input.json"`) {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestMalformedJSONFails(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), "parse input file") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestMissingTopLevelSegmentsFails(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", `{}`)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), "must contain top-level segments array") {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestInvalidSegmentFieldsFailWithSourceAndIndex(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
json string
|
|
want string
|
|
}{
|
|
{
|
|
name: "missing start",
|
|
json: `{"segments":[{"end":1,"text":"x"}]}`,
|
|
want: "segment 0 missing numeric start",
|
|
},
|
|
{
|
|
name: "wrong typed end",
|
|
json: `{"segments":[{"start":0,"end":"1","text":"x"}]}`,
|
|
want: "segment 0 end must be numeric",
|
|
},
|
|
{
|
|
name: "wrong typed text",
|
|
json: `{"segments":[{"start":0,"end":1,"text":7}]}`,
|
|
want: "segment 0 text must be a string",
|
|
},
|
|
{
|
|
name: "null text",
|
|
json: `{"segments":[{"start":0,"end":1,"text":null}]}`,
|
|
want: "segment 0 missing string text",
|
|
},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", test.json)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), input) {
|
|
t.Fatalf("expected error to contain source path %q, got %v", input, err)
|
|
}
|
|
if !strings.Contains(err.Error(), test.want) {
|
|
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestInvalidTimingFails(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
json string
|
|
want string
|
|
}{
|
|
{
|
|
name: "negative start",
|
|
json: `{"segments":[{"start":-1,"end":1,"text":"x"}]}`,
|
|
want: "segment 0 has negative start",
|
|
},
|
|
{
|
|
name: "end before start",
|
|
json: `{"segments":[{"start":2,"end":1,"text":"x"}]}`,
|
|
want: "segment 0 has end before start",
|
|
},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
dir := t.TempDir()
|
|
input := writeJSONFile(t, dir, "input.json", test.json)
|
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
|
- speaker: Alice
|
|
match: ["input.json"]
|
|
`)
|
|
output := filepath.Join(dir, "merged.json")
|
|
|
|
err := executeMerge(
|
|
"--input-file", input,
|
|
"--speakers", speakers,
|
|
"--output-file", output,
|
|
)
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if !strings.Contains(err.Error(), test.want) {
|
|
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func executeMerge(args ...string) error {
|
|
cmd := NewRootCommand()
|
|
cmd.SetArgs(append([]string{"merge"}, args...))
|
|
return cmd.Execute()
|
|
}
|
|
|
|
func writeJSONFile(t *testing.T, dir string, name string, content string) string {
|
|
t.Helper()
|
|
|
|
path := filepath.Join(dir, name)
|
|
if err := os.WriteFile(path, []byte(content+"\n"), 0o600); err != nil {
|
|
t.Fatalf("write file: %v", err)
|
|
}
|
|
return path
|
|
}
|
|
|
|
func writeYAMLFile(t *testing.T, dir string, name string, content string) string {
|
|
t.Helper()
|
|
|
|
path := filepath.Join(dir, name)
|
|
if err := os.WriteFile(path, []byte(content), 0o600); err != nil {
|
|
t.Fatalf("write file: %v", err)
|
|
}
|
|
return path
|
|
}
|
|
|
|
func readJSON(t *testing.T, path string, target any) {
|
|
t.Helper()
|
|
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
t.Fatalf("read %s: %v", path, err)
|
|
}
|
|
if err := json.Unmarshal(data, target); err != nil {
|
|
t.Fatalf("unmarshal %s: %v", path, err)
|
|
}
|
|
}
|
|
|
|
func equalStrings(left []string, right []string) bool {
|
|
if len(left) != len(right) {
|
|
return false
|
|
}
|
|
for index := range left {
|
|
if left[index] != right[index] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func hasReportEvent(rpt report.Report, stage string, module string, messageSubstring string) bool {
|
|
for _, event := range rpt.Events {
|
|
if event.Stage == stage && event.Module == module && strings.Contains(event.Message, messageSubstring) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func assertSegment(t *testing.T, segment model.Segment, id int, source string, sourceIndex int, speaker string, start float64, end float64, text string) {
|
|
t.Helper()
|
|
|
|
if segment.ID != id {
|
|
t.Fatalf("segment ID = %d, want %d", segment.ID, id)
|
|
}
|
|
if segment.Source != source {
|
|
t.Fatalf("segment source = %q, want %q", segment.Source, source)
|
|
}
|
|
if segment.SourceSegmentIndex != sourceIndex {
|
|
t.Fatalf("segment source index = %d, want %d", segment.SourceSegmentIndex, sourceIndex)
|
|
}
|
|
if segment.Speaker != speaker {
|
|
t.Fatalf("segment speaker = %q, want %q", segment.Speaker, speaker)
|
|
}
|
|
if segment.Start != start {
|
|
t.Fatalf("segment start = %f, want %f", segment.Start, start)
|
|
}
|
|
if segment.End != end {
|
|
t.Fatalf("segment end = %f, want %f", segment.End, end)
|
|
}
|
|
if segment.Text != text {
|
|
t.Fatalf("segment text = %q, want %q", segment.Text, text)
|
|
}
|
|
}
|