From 18f1873776db2e3a2632f4986602769544d4dcb2 Mon Sep 17 00:00:00 2001 From: Eric Rakestraw Date: Sun, 26 Apr 2026 13:57:13 -0500 Subject: [PATCH] Implemented an initial transcript merge stage --- go.mod | 5 +- go.sum | 2 + internal/builtin/input.go | 88 ++++++++- internal/builtin/preprocess.go | 61 +++++- internal/builtin/registry.go | 2 +- internal/cli/merge_test.go | 326 ++++++++++++++++++++++++++++++--- internal/model/model.go | 10 +- internal/speaker/map.go | 69 +++++++ 8 files changed, 535 insertions(+), 28 deletions(-) create mode 100644 internal/speaker/map.go diff --git a/go.mod b/go.mod index aff73ab..7cf9fde 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module gitea.maximumdirect.net/eric/seriatim go 1.25 -require github.com/spf13/cobra v1.10.1 +require ( + github.com/spf13/cobra v1.10.1 + gopkg.in/yaml.v3 v3.0.1 +) require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/go.sum b/go.sum index e613680..7af0519 100644 --- a/go.sum +++ b/go.sum @@ -6,5 +6,7 @@ github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/builtin/input.go b/internal/builtin/input.go index 9b191c1..d9bef18 100644 --- a/internal/builtin/input.go +++ b/internal/builtin/input.go @@ -2,7 +2,9 @@ package builtin import ( "context" + "encoding/json" "fmt" + "os" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" @@ -22,10 +24,92 @@ func (jsonFilesReader) Read(ctx context.Context, cfg config.Config) ([]model.Raw raw := make([]model.RawTranscript, 0, len(cfg.InputFiles)) for _, inputFile := range cfg.InputFiles { - raw = append(raw, model.RawTranscript{Source: inputFile}) + transcript, err := readRawTranscript(inputFile) + if err != nil { + return nil, nil, err + } + raw = append(raw, transcript) } return raw, []report.Event{ - report.Info("input", "json-files", fmt.Sprintf("accepted %d input file(s)", len(raw))), + report.Info("input", "json-files", fmt.Sprintf("decoded %d input file(s)", len(raw))), }, nil } + +type rawTranscriptFile struct { + Segments json.RawMessage `json:"segments"` +} + +type rawSegmentFile struct { + Start json.RawMessage `json:"start"` + End json.RawMessage `json:"end"` + Text json.RawMessage `json:"text"` +} + +func readRawTranscript(path string) (model.RawTranscript, error) { + data, err := os.ReadFile(path) + if err != nil { + return model.RawTranscript{}, fmt.Errorf("read input file %q: %w", path, err) + } + + var parsed rawTranscriptFile + if err := json.Unmarshal(data, &parsed); err != nil { + return model.RawTranscript{}, fmt.Errorf("parse input file %q: %w", path, err) + } + if parsed.Segments == nil || isJSONNull(parsed.Segments) { + return model.RawTranscript{}, fmt.Errorf("input file %q must contain top-level segments array", path) + } + + var rawSegments []rawSegmentFile + if err := json.Unmarshal(parsed.Segments, &rawSegments); err != nil { + return model.RawTranscript{}, fmt.Errorf("input file %q top-level segments must be an array: %w", path, err) + } + + segments := make([]model.RawSegment, 0, len(rawSegments)) + for index, segment := range rawSegments { + if segment.Start == nil || isJSONNull(segment.Start) { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing numeric start", path, index) + } + if segment.End == nil || isJSONNull(segment.End) { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing numeric end", path, index) + } + if segment.Text == nil || isJSONNull(segment.Text) { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing string text", path, index) + } + + var start float64 + if err := json.Unmarshal(segment.Start, &start); err != nil { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d start must be numeric", path, index) + } + var end float64 + if err := json.Unmarshal(segment.End, &end); err != nil { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d end must be numeric", path, index) + } + var text string + if err := json.Unmarshal(segment.Text, &text); err != nil { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d text must be a string", path, index) + } + + if start < 0 { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d has negative start", path, index) + } + if end < start { + return model.RawTranscript{}, fmt.Errorf("input file %q segment %d has end before start", path, index) + } + + segments = append(segments, model.RawSegment{ + Start: start, + End: end, + Text: text, + }) + } + + return model.RawTranscript{ + Source: path, + Segments: segments, + }, nil +} + +func isJSONNull(value json.RawMessage) bool { + return string(value) == "null" +} diff --git a/internal/builtin/preprocess.go b/internal/builtin/preprocess.go index d705585..47770cf 100644 --- a/internal/builtin/preprocess.go +++ b/internal/builtin/preprocess.go @@ -3,11 +3,13 @@ package builtin import ( "context" "fmt" + "strings" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/internal/pipeline" "gitea.maximumdirect.net/eric/seriatim/internal/report" + "gitea.maximumdirect.net/eric/seriatim/internal/speaker" ) type noopPreprocessor struct { @@ -42,6 +44,39 @@ func (p noopPreprocessor) Process(ctx context.Context, in pipeline.PreprocessSta }, nil } +type trimText struct{} + +func (trimText) Name() string { + return "trim-text" +} + +func (trimText) Requires() pipeline.ModelState { + return pipeline.StateCanonical +} + +func (trimText) Produces() pipeline.ModelState { + return pipeline.StateCanonical +} + +func (trimText) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) { + if err := ctx.Err(); err != nil { + return pipeline.PreprocessState{}, nil, err + } + if in.State != pipeline.StateCanonical { + return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "trim-text", pipeline.StateCanonical, in.State) + } + + for transcriptIndex := range in.Canonical { + for segmentIndex := range in.Canonical[transcriptIndex].Segments { + in.Canonical[transcriptIndex].Segments[segmentIndex].Text = strings.TrimSpace(in.Canonical[transcriptIndex].Segments[segmentIndex].Text) + } + } + + return in, []report.Event{ + report.Info("preprocessing", "trim-text", "trimmed canonical segment text"), + }, nil +} + type normalizeSpeakers struct{} func (normalizeSpeakers) Name() string { @@ -64,11 +99,33 @@ func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessStat return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "normalize-speakers", pipeline.StateRaw, in.State) } + speakers, err := speaker.LoadMap(cfg.SpeakersFile) + if err != nil { + return pipeline.PreprocessState{}, nil, err + } + canonical := make([]model.CanonicalTranscript, 0, len(in.Raw)) for _, raw := range in.Raw { + canonicalSpeaker, err := speakers.SpeakerForSource(raw.Source) + if err != nil { + return pipeline.PreprocessState{}, nil, err + } + + segments := make([]model.Segment, 0, len(raw.Segments)) + for index, rawSegment := range raw.Segments { + segments = append(segments, model.Segment{ + Source: raw.Source, + SourceSegmentIndex: index, + Speaker: canonicalSpeaker, + Start: rawSegment.Start, + End: rawSegment.End, + Text: rawSegment.Text, + }) + } + canonical = append(canonical, model.CanonicalTranscript{ Source: raw.Source, - Segments: nil, + Segments: segments, }) } @@ -77,6 +134,6 @@ func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessStat Raw: append([]model.RawTranscript(nil), in.Raw...), Canonical: canonical, }, []report.Event{ - report.Info("preprocessing", "normalize-speakers", "created placeholder canonical transcript(s)"), + report.Info("preprocessing", "normalize-speakers", "created canonical transcript(s) from raw input"), }, nil } diff --git a/internal/builtin/registry.go b/internal/builtin/registry.go index 470967f..dcbb428 100644 --- a/internal/builtin/registry.go +++ b/internal/builtin/registry.go @@ -9,7 +9,7 @@ func NewRegistry() *pipeline.Registry { registry.RegisterInputReader(jsonFilesReader{}) registry.RegisterPreprocessor(noopPreprocessor{name: "validate-raw", requires: pipeline.StateRaw, produces: pipeline.StateRaw}) registry.RegisterPreprocessor(normalizeSpeakers{}) - registry.RegisterPreprocessor(noopPreprocessor{name: "trim-text", requires: pipeline.StateCanonical, produces: pipeline.StateCanonical}) + registry.RegisterPreprocessor(trimText{}) registry.RegisterPreprocessor(noopPreprocessor{name: "autocorrect", requires: pipeline.StateCanonical, produces: pipeline.StateCanonical}) registry.RegisterMerger(placeholderMerger{}) registry.RegisterPostprocessor(noopPostprocessor{name: "detect-overlaps"}) diff --git a/internal/cli/merge_test.go b/internal/cli/merge_test.go index 42802d8..1bb9f1f 100644 --- a/internal/cli/merge_test.go +++ b/internal/cli/merge_test.go @@ -11,11 +11,25 @@ import ( "gitea.maximumdirect.net/eric/seriatim/internal/report" ) -func TestMergeWritesPlaceholderOutputAndReport(t *testing.T) { +func TestMergeWritesMergedOutputAndReport(t *testing.T) { dir := t.TempDir() - inputA := writeFile(t, dir, "a.json") - inputB := writeFile(t, dir, "b.json") - speakers := writeFile(t, dir, "speakers.yml") + inputA := writeJSONFile(t, dir, "a.json", `{ + "segments": [ + {"start": 10, "end": 11, "text": " second a ", "words": [{"word": "ignored"}]}, + {"start": 1, "end": 2, "text": "first a"} + ] + }`) + inputB := writeJSONFile(t, dir, "b.json", `{ + "segments": [ + {"start": 5, "end": 6, "text": "first b"} + ] + }`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + a.json: + speaker: Alice + b.json: + speaker: Bob +`) output := filepath.Join(dir, "merged.json") reportPath := filepath.Join(dir, "report.json") @@ -37,9 +51,6 @@ func TestMergeWritesPlaceholderOutputAndReport(t *testing.T) { t.Fatalf("read output bytes: %v", err) } outputJSON := string(outputBytes) - if !strings.Contains(outputJSON, `"segments": []`) { - t.Fatalf("expected segments to serialize as an empty array, got:\n%s", outputJSON) - } if !strings.Contains(outputJSON, `"overlap_groups": []`) { t.Fatalf("expected overlap_groups to serialize as an empty array, got:\n%s", outputJSON) } @@ -49,8 +60,17 @@ func TestMergeWritesPlaceholderOutputAndReport(t *testing.T) { if got, want := transcript.Metadata.InputFiles, []string{inputA, inputB}; !equalStrings(got, want) { t.Fatalf("input files not sorted deterministically: got %v want %v", got, want) } - if len(transcript.Segments) != 0 { - t.Fatalf("expected placeholder output to contain no segments, got %d", len(transcript.Segments)) + if got, want := len(transcript.Segments), 3; got != want { + t.Fatalf("expected merged output to contain %d segments, got %d", want, got) + } + assertSegment(t, transcript.Segments[0], 1, inputA, 1, "Alice", 1, 2, "first a") + assertSegment(t, transcript.Segments[1], 2, inputB, 0, "Bob", 5, 6, "first b") + assertSegment(t, transcript.Segments[2], 3, inputA, 0, "Alice", 10, 11, "second a") + if strings.Contains(outputJSON, "internal_ref") { + t.Fatalf("did not expect internal_ref in output:\n%s", outputJSON) + } + if strings.Contains(outputJSON, "words") { + t.Fatalf("did not expect words in output:\n%s", outputJSON) } if len(transcript.OverlapGroups) != 0 { t.Fatalf("expected placeholder output to contain no overlap groups, got %d", len(transcript.OverlapGroups)) @@ -79,10 +99,62 @@ func TestMergeWritesPlaceholderOutputAndReport(t *testing.T) { } } +func TestMergeTieBreakOrder(t *testing.T) { + dir := t.TempDir() + inputA := writeJSONFile(t, dir, "a.json", `{ + "segments": [ + {"start": 1, "end": 4, "text": "a-late-end"}, + {"start": 1, "end": 2, "text": "a-index-one"} + ] + }`) + inputB := writeJSONFile(t, dir, "b.json", `{ + "segments": [ + {"start": 1, "end": 2, "text": "b-same-time"} + ] + }`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + a.json: + speaker: Alice + b.json: + speaker: Bob +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", inputB, + "--input-file", inputA, + "--speakers", speakers, + "--output-file", output, + ) + if err != nil { + t.Fatalf("merge failed: %v", err) + } + + var transcript model.FinalTranscript + readJSON(t, output, &transcript) + got := []string{ + transcript.Segments[0].Text, + transcript.Segments[1].Text, + transcript.Segments[2].Text, + } + want := []string{"a-index-one", "b-same-time", "a-late-end"} + if !equalStrings(got, want) { + t.Fatalf("tie-break order mismatch: got %v want %v", got, want) + } + for index, segment := range transcript.Segments { + if segment.ID != index+1 { + t.Fatalf("segment %d has id %d; want %d", index, segment.ID, index+1) + } + } +} + func TestUnknownModulesFailDuringValidation(t *testing.T) { dir := t.TempDir() - input := writeFile(t, dir, "input.json") - speakers := writeFile(t, dir, "speakers.yml") + input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + input.json: + speaker: Alice +`) output := filepath.Join(dir, "merged.json") tests := []struct { @@ -134,7 +206,7 @@ func TestUnknownModulesFailDuringValidation(t *testing.T) { func TestInvalidPreprocessingOrderFails(t *testing.T) { dir := t.TempDir() - input := writeFile(t, dir, "input.json") + input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`) output := filepath.Join(dir, "merged.json") err := executeMerge( @@ -152,7 +224,10 @@ func TestInvalidPreprocessingOrderFails(t *testing.T) { func TestMissingInputFileFailsBeforePipelineExecution(t *testing.T) { dir := t.TempDir() - speakers := writeFile(t, dir, "speakers.yml") + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + missing.json: + speaker: Alice +`) output := filepath.Join(dir, "merged.json") err := executeMerge( @@ -170,7 +245,7 @@ func TestMissingInputFileFailsBeforePipelineExecution(t *testing.T) { func TestNormalizeSpeakersRequiresSpeakersFile(t *testing.T) { dir := t.TempDir() - input := writeFile(t, dir, "input.json") + input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`) output := filepath.Join(dir, "merged.json") err := executeMerge( @@ -187,8 +262,11 @@ func TestNormalizeSpeakersRequiresSpeakersFile(t *testing.T) { func TestAutocorrectRequiresAutocorrectFile(t *testing.T) { dir := t.TempDir() - input := writeFile(t, dir, "input.json") - speakers := writeFile(t, dir, "speakers.yml") + input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + input.json: + speaker: Alice +`) output := filepath.Join(dir, "merged.json") err := executeMerge( @@ -207,9 +285,14 @@ func TestAutocorrectRequiresAutocorrectFile(t *testing.T) { func TestOutputJSONIsByteStable(t *testing.T) { dir := t.TempDir() - inputA := writeFile(t, dir, "a.json") - inputB := writeFile(t, dir, "b.json") - speakers := writeFile(t, dir, "speakers.yml") + inputA := writeJSONFile(t, dir, "a.json", `{"segments":[{"start":2,"end":3,"text":"a"}]}`) + inputB := writeJSONFile(t, dir, "b.json", `{"segments":[{"start":1,"end":2,"text":"b"}]}`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + a.json: + speaker: Alice + b.json: + speaker: Bob +`) outputA := filepath.Join(dir, "merged-a.json") outputB := filepath.Join(dir, "merged-b.json") @@ -241,17 +324,192 @@ func TestOutputJSONIsByteStable(t *testing.T) { } } +func TestMissingSpeakerMappingFails(t *testing.T) { + dir := t.TempDir() + input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + other.json: + speaker: Alice +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", input, + "--speakers", speakers, + "--output-file", output, + ) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), `speaker map has no entry for input basename "input.json"`) { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestMalformedJSONFails(t *testing.T) { + dir := t.TempDir() + input := writeJSONFile(t, dir, "input.json", `{"segments":[`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + input.json: + speaker: Alice +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", input, + "--speakers", speakers, + "--output-file", output, + ) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "parse input file") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestMissingTopLevelSegmentsFails(t *testing.T) { + dir := t.TempDir() + input := writeJSONFile(t, dir, "input.json", `{}`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + input.json: + speaker: Alice +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", input, + "--speakers", speakers, + "--output-file", output, + ) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "must contain top-level segments array") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestInvalidSegmentFieldsFailWithSourceAndIndex(t *testing.T) { + tests := []struct { + name string + json string + want string + }{ + { + name: "missing start", + json: `{"segments":[{"end":1,"text":"x"}]}`, + want: "segment 0 missing numeric start", + }, + { + name: "wrong typed end", + json: `{"segments":[{"start":0,"end":"1","text":"x"}]}`, + want: "segment 0 end must be numeric", + }, + { + name: "wrong typed text", + json: `{"segments":[{"start":0,"end":1,"text":7}]}`, + want: "segment 0 text must be a string", + }, + { + name: "null text", + json: `{"segments":[{"start":0,"end":1,"text":null}]}`, + want: "segment 0 missing string text", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + dir := t.TempDir() + input := writeJSONFile(t, dir, "input.json", test.json) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + input.json: + speaker: Alice +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", input, + "--speakers", speakers, + "--output-file", output, + ) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), input) { + t.Fatalf("expected error to contain source path %q, got %v", input, err) + } + if !strings.Contains(err.Error(), test.want) { + t.Fatalf("expected error to contain %q, got %v", test.want, err) + } + }) + } +} + +func TestInvalidTimingFails(t *testing.T) { + tests := []struct { + name string + json string + want string + }{ + { + name: "negative start", + json: `{"segments":[{"start":-1,"end":1,"text":"x"}]}`, + want: "segment 0 has negative start", + }, + { + name: "end before start", + json: `{"segments":[{"start":2,"end":1,"text":"x"}]}`, + want: "segment 0 has end before start", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + dir := t.TempDir() + input := writeJSONFile(t, dir, "input.json", test.json) + speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs: + input.json: + speaker: Alice +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", input, + "--speakers", speakers, + "--output-file", output, + ) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), test.want) { + t.Fatalf("expected error to contain %q, got %v", test.want, err) + } + }) + } +} + func executeMerge(args ...string) error { cmd := NewRootCommand() cmd.SetArgs(append([]string{"merge"}, args...)) return cmd.Execute() } -func writeFile(t *testing.T, dir string, name string) string { +func writeJSONFile(t *testing.T, dir string, name string, content string) string { t.Helper() path := filepath.Join(dir, name) - if err := os.WriteFile(path, []byte("{}\n"), 0o600); err != nil { + if err := os.WriteFile(path, []byte(content+"\n"), 0o600); err != nil { + t.Fatalf("write file: %v", err) + } + return path +} + +func writeYAMLFile(t *testing.T, dir string, name string, content string) string { + t.Helper() + + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(content), 0o600); err != nil { t.Fatalf("write file: %v", err) } return path @@ -280,3 +538,29 @@ func equalStrings(left []string, right []string) bool { } return true } + +func assertSegment(t *testing.T, segment model.Segment, id int, source string, sourceIndex int, speaker string, start float64, end float64, text string) { + t.Helper() + + if segment.ID != id { + t.Fatalf("segment ID = %d, want %d", segment.ID, id) + } + if segment.Source != source { + t.Fatalf("segment source = %q, want %q", segment.Source, source) + } + if segment.SourceSegmentIndex != sourceIndex { + t.Fatalf("segment source index = %d, want %d", segment.SourceSegmentIndex, sourceIndex) + } + if segment.Speaker != speaker { + t.Fatalf("segment speaker = %q, want %q", segment.Speaker, speaker) + } + if segment.Start != start { + t.Fatalf("segment start = %f, want %f", segment.Start, start) + } + if segment.End != end { + t.Fatalf("segment end = %f, want %f", segment.End, end) + } + if segment.Text != text { + t.Fatalf("segment text = %q, want %q", segment.Text, text) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 0e1e648..2360397 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -2,7 +2,15 @@ package model // RawTranscript is a loaded input document before canonical normalization. type RawTranscript struct { - Source string `json:"source"` + Source string `json:"source"` + Segments []RawSegment `json:"segments"` +} + +// RawSegment is the supported WhisperX segment subset. +type RawSegment struct { + Start float64 `json:"start"` + End float64 `json:"end"` + Text string `json:"text"` } // CanonicalTranscript is a per-speaker transcript in seriatim's internal model. diff --git a/internal/speaker/map.go b/internal/speaker/map.go new file mode 100644 index 0000000..0ab3ffa --- /dev/null +++ b/internal/speaker/map.go @@ -0,0 +1,69 @@ +package speaker + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// Map resolves input file basenames to canonical speaker names. +type Map struct { + inputs map[string]Input +} + +// Input describes one input entry in speakers.yml. +type Input struct { + Speaker string `yaml:"speaker"` +} + +type fileSchema struct { + Inputs map[string]Input `yaml:"inputs"` +} + +// LoadMap parses a speakers.yml file. +func LoadMap(path string) (Map, error) { + data, err := os.ReadFile(path) + if err != nil { + return Map{}, err + } + + var parsed fileSchema + if err := yaml.Unmarshal(data, &parsed); err != nil { + return Map{}, fmt.Errorf("parse speaker map %q: %w", path, err) + } + if len(parsed.Inputs) == 0 { + return Map{}, fmt.Errorf("speaker map %q must contain at least one inputs entry", path) + } + + inputs := make(map[string]Input, len(parsed.Inputs)) + for key, input := range parsed.Inputs { + basename := filepath.Base(strings.TrimSpace(key)) + if basename == "." || basename == "" { + return Map{}, fmt.Errorf("speaker map %q contains an empty input key", path) + } + if _, exists := inputs[basename]; exists { + return Map{}, fmt.Errorf("speaker map %q contains duplicate basename mapping for %q", path, basename) + } + + input.Speaker = strings.TrimSpace(input.Speaker) + if input.Speaker == "" { + return Map{}, fmt.Errorf("speaker map entry %q must include speaker", basename) + } + inputs[basename] = input + } + + return Map{inputs: inputs}, nil +} + +// SpeakerForSource returns the canonical speaker for a transcript source path. +func (m Map) SpeakerForSource(source string) (string, error) { + basename := filepath.Base(source) + input, ok := m.inputs[basename] + if !ok { + return "", fmt.Errorf("speaker map has no entry for input basename %q", basename) + } + return input.Speaker, nil +}