From 28c2eea340d9d1ca1e04a7fe21c3d69b064317f4 Mon Sep 17 00:00:00 2001 From: Eric Rakestraw Date: Mon, 27 Apr 2026 21:48:04 -0500 Subject: [PATCH] Cleaned up documentation and development artifcats in advance of release --- README.md | 47 +++++--- architecture.md | 54 +++++----- internal/artifact/transcript.go | 8 +- internal/artifact/transcript_test.go | 22 ++++ internal/buildinfo/buildinfo.go | 7 ++ internal/builtin/merge.go | 11 +- internal/builtin/postprocess.go | 18 ---- internal/builtin/preprocess.go | 57 +++++++--- internal/builtin/preprocess_test.go | 153 +++++++++++++++++++++++++++ internal/builtin/registry.go | 6 +- internal/cli/merge_test.go | 7 +- internal/cli/root.go | 3 + internal/cli/root_test.go | 10 ++ internal/pipeline/runner.go | 4 +- internal/pipeline/runner_test.go | 21 ++++ 15 files changed, 336 insertions(+), 92 deletions(-) create mode 100644 internal/artifact/transcript_test.go create mode 100644 internal/buildinfo/buildinfo.go create mode 100644 internal/builtin/preprocess_test.go create mode 100644 internal/cli/root_test.go create mode 100644 internal/pipeline/runner_test.go diff --git a/README.md b/README.md index bfbd759..f5ce48c 100644 --- a/README.md +++ b/README.md @@ -31,21 +31,34 @@ go run ./cmd/seriatim merge \ seriatim merge [flags] ``` -Required flags for the default pipeline: +Global flags: -- `--input-file`: input transcript JSON file. Repeat once per speaker/input file. -- `--output-file`: merged transcript JSON output path. +| Flag | Description | +| --- | --- | +| `--help` | Show command help. | +| `--version` | Show application version. Local builds default to `dev`; release builds inject the release version. | -Optional flags: +`merge` flags: -- `--report-file`: write a JSON report with pipeline events. -- `--speakers`: speaker map YAML file. When omitted, input file basenames are used as speaker labels. -- `--autocorrect`: autocorrect rules file. When omitted, the default `autocorrect` module no-ops. -- `--input-reader`: input reader module. Default: `json-files`. -- `--output-modules`: comma-separated output modules. Default: `json`. -- `--preprocessing-modules`: comma-separated preprocessing modules. Default: `validate-raw,normalize-speakers,trim-text`. -- `--postprocessing-modules`: comma-separated postprocessing modules. Default: `detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output`. -- `--coalesce-gap`: maximum same-speaker gap in seconds for `coalesce`. Default: `3.0`. +| Flag | Required | Default | Description | +| --- | --- | --- | --- | +| `--input-file` | Yes | none | Input transcript JSON file. Repeat once per speaker/input file. | +| `--output-file` | Yes | none | Merged transcript JSON output path. | +| `--report-file` | No | none | Optional report JSON output path. | +| `--speakers` | No | none | Speaker map YAML file. When omitted, input file basenames are used as speaker labels. | +| `--autocorrect` | No | none | Autocorrect rules YAML file. When omitted, the default `autocorrect` module leaves text unchanged. | +| `--input-reader` | No | `json-files` | Input reader module. | +| `--output-modules` | No | `json` | Comma-separated output modules. | +| `--preprocessing-modules` | No | `validate-raw,normalize-speakers,trim-text` | Comma-separated preprocessing modules, evaluated in order. | +| `--postprocessing-modules` | No | `detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output` | Comma-separated postprocessing modules, evaluated in order. | +| `--coalesce-gap` | No | `3.0` | Maximum same-speaker gap in seconds for `coalesce`. Must be a non-negative float. | + +Environment variables: + +| Environment Variable | Default | Description | +| --- | --- | --- | +| `SERIATIM_OVERLAP_WORD_RUN_GAP` | `0.75` | Maximum gap in seconds between adjacent timed words when `resolve-overlaps` builds word-run replacement segments. Must be a positive float. | +| `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW` | `0.4` | Near-start window in seconds for ordering replacement word runs shortest-first. Must be a positive float. | ## Input JSON Format @@ -312,4 +325,12 @@ Matching behavior: - Only JSON input is supported. - Overlap resolution depends on WhisperX word timing; groups without usable word timing remain unresolved. -- Coalescing and alternate output formats are not implemented yet. +- Alternate output formats are not implemented yet. + +## Release Builds + +Local builds record version metadata as `dev`. Release builds should inject the release version with `ldflags`: + +```sh +go build -ldflags "-X gitea.maximumdirect.net/eric/seriatim/internal/buildinfo.Version=v1.0.0" ./cmd/seriatim +``` diff --git a/architecture.md b/architecture.md index 4964fde..d23d6a9 100644 --- a/architecture.md +++ b/architecture.md @@ -4,7 +4,7 @@ The initial use case is merging independently transcribed speaker audio tracks from the same recorded session, such as a weekly tabletop RPG session. The architecture should also support meetings, podcasts, interviews, and other multi-speaker events. -`seriatim` will be implemented in Go. +`seriatim` is implemented in Go. ## Goals @@ -21,19 +21,19 @@ The initial use case is merging independently transcribed speaker audio tracks f 9. Emit one or more output artifacts through output writers. 10. Produce report data for validation findings, corrections, and transformations. -## Non-goals for the MVP +## Non-goals -The MVP should not attempt to: +The 1.0 release does not attempt to: - Perform transcription. - Perform audio diarization. - Use an LLM. - Summarize transcript content. - Infer speaker identity from audio or text. -- Fully resolve crosstalk. +- Fully resolve every crosstalk case. - Load arbitrary third-party code as dynamic plugins. -The MVP should support runtime composition of built-in modules by canonical module name. Arbitrary external plugin loading can be considered later. +The application supports runtime composition of built-in modules by canonical module name. Arbitrary external plugin loading can be considered later. ## Core Assumption @@ -78,7 +78,7 @@ The configuration stage produces an application config value that is passed thro The input stage converts external inputs into raw transcript documents with source metadata. -The MVP input method is one or more JSON files passed with repeated `--input-file` flags: +The current input method is one or more JSON files passed with repeated `--input-file` flags: ```text seriatim merge --input-file eric.json --input-file mike.json --output-file merged.json @@ -107,7 +107,7 @@ Preprocessing starts with raw transcript documents from input readers and must e Preprocessing modules are selected at runtime with a comma-separated list of canonical module names: ```text ---preprocessing-modules validate-raw,normalize-speakers,trim-text,autocorrect +--preprocessing-modules validate-raw,normalize-speakers,trim-text ``` Modules run in the exact order provided. Unknown module names are configuration errors. @@ -120,7 +120,6 @@ Potential preprocessing modules include: - Speaker name normalization based on input filename. - Timing validation and deterministic correction. - Text trimming. -- Word replacement from `autocorrect.yml`. Preprocessing should not depend on global chronological ordering across speakers. Modules that need the globally merged transcript belong in postprocessing. @@ -147,7 +146,7 @@ The postprocessing stage applies zero or more modules to the merged transcript. Postprocessing modules are selected at runtime with a comma-separated list of canonical module names: ```text ---postprocessing-modules detect-overlaps,resolve-overlaps,coalesce,assign-ids,validate-output +--postprocessing-modules detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output ``` Modules run in the exact order provided. Unknown module names are configuration errors. @@ -168,7 +167,7 @@ Any module that can reorder, split, merge, drop, or create segments must run bef The output stage emits one or more artifacts from the final transcript and report model. -The MVP output format is JSON, specified with: +The current output format is JSON, specified with: ```text --output-file merged.json @@ -202,7 +201,7 @@ This classification should guide Go interfaces and package boundaries. It should ## Runtime Module Composition -The MVP should support runtime composition of built-in modules. +The application supports runtime composition of built-in modules. Module names are canonical strings registered at startup. CLI flags refer to those names. The configuration stage resolves names into module instances before the pipeline runs. @@ -214,9 +213,9 @@ seriatim merge \ --input-file mike.json \ --speakers speakers.yml \ --autocorrect autocorrect.yml \ - --preprocessing-modules validate-raw,normalize-speakers,trim-text,autocorrect \ - --postprocessing-modules detect-overlaps,resolve-overlaps,coalesce,assign-ids,validate-output \ - --output-module json \ + --preprocessing-modules validate-raw,normalize-speakers,trim-text \ + --postprocessing-modules detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output \ + --output-modules json \ --output-file merged.json \ --report-file report.json ``` @@ -358,7 +357,7 @@ Initial classifications may include: - `backchannel` - `crosstalk` -The MVP `resolve-overlaps` module may be a stub that marks groups as unresolved. This preserves the architecture for future word-level crosstalk serialization without complicating the initial implementation. +The `resolve-overlaps` module uses preserved word-level timing to replace detected overlap-group segments with smaller word-run segments when usable timing is available. Groups without usable word timing remain unresolved for later passes or human review. Overlap resolution should be non-destructive. Original segment text, timing, and source metadata must remain recoverable. @@ -403,31 +402,28 @@ To support this: - Record application version in output metadata. - Record enabled module names and module order in output metadata or report data. -## Suggested Go Package Layout +## Go Package Layout ```text cmd/seriatim/ CLI entrypoint internal/config/ CLI/env/config loading and validation internal/pipeline/ Pipeline orchestration and module registry -internal/input/ Input readers -internal/raw/ Raw transcript structs -internal/schema/ Schema loading and validation helpers +internal/builtin/ Built-in pipeline modules +internal/artifact/ Conversion from internal model to public output schema +internal/buildinfo/ Build-time version metadata internal/speaker/ Speaker map parsing and lookup internal/model/ Canonical and merged transcript models -internal/preprocess/ Preprocessing modules -internal/merge/ Deterministic merge logic -internal/postprocess/ Postprocessing modules internal/overlap/ Overlap detection and refinement helpers internal/autocorrect/ Word replacement rules internal/report/ Report model and event accumulation -internal/output/ Output writers +schema/ Public output contract and JSON Schema validation ``` Package boundaries should follow data ownership. Shared models belong in `internal/model`; stage-specific behavior belongs in the relevant stage package. -## MVP Defaults +## Default Modules -The MVP should define documented defaults equivalent to explicit module lists. +The default pipeline is equivalent to explicit module lists. Recommended default preprocessing modules: @@ -438,7 +434,11 @@ validate-raw,normalize-speakers,trim-text Recommended default postprocessing modules: ```text -detect-overlaps,resolve-overlaps,assign-ids,validate-output +detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output ``` -Optional modules such as `autocorrect` and `coalesce` should be opt-in until their behavior is thoroughly specified and tested. +The default output module is: + +```text +json +``` diff --git a/internal/artifact/transcript.go b/internal/artifact/transcript.go index 1e8d322..ac5750d 100644 --- a/internal/artifact/transcript.go +++ b/internal/artifact/transcript.go @@ -1,15 +1,13 @@ package artifact import ( + "gitea.maximumdirect.net/eric/seriatim/internal/buildinfo" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/schema" ) -const ( - ApplicationName = "seriatim" - Version = "dev" -) +const ApplicationName = "seriatim" // FromMerged converts the internal merged transcript model into the public // serialized output contract. @@ -47,7 +45,7 @@ func FromMerged(cfg config.Config, merged model.MergedTranscript) schema.Transcr return schema.Transcript{ Metadata: schema.Metadata{ Application: ApplicationName, - Version: Version, + Version: buildinfo.Version, InputReader: cfg.InputReader, InputFiles: append([]string(nil), cfg.InputFiles...), PreprocessingModules: append([]string(nil), cfg.PreprocessingModules...), diff --git a/internal/artifact/transcript_test.go b/internal/artifact/transcript_test.go new file mode 100644 index 0000000..864fec9 --- /dev/null +++ b/internal/artifact/transcript_test.go @@ -0,0 +1,22 @@ +package artifact + +import ( + "testing" + + "gitea.maximumdirect.net/eric/seriatim/internal/buildinfo" + "gitea.maximumdirect.net/eric/seriatim/internal/config" + "gitea.maximumdirect.net/eric/seriatim/internal/model" +) + +func TestFromMergedUsesBuildVersion(t *testing.T) { + original := buildinfo.Version + t.Cleanup(func() { + buildinfo.Version = original + }) + buildinfo.Version = "v1.0.0-test" + + transcript := FromMerged(config.Config{}, model.MergedTranscript{}) + if transcript.Metadata.Version != "v1.0.0-test" { + t.Fatalf("version = %q, want v1.0.0-test", transcript.Metadata.Version) + } +} diff --git a/internal/buildinfo/buildinfo.go b/internal/buildinfo/buildinfo.go new file mode 100644 index 0000000..b5639db --- /dev/null +++ b/internal/buildinfo/buildinfo.go @@ -0,0 +1,7 @@ +package buildinfo + +// Version is the application version recorded in output artifacts and CLI +// metadata. Release builds can override it with: +// +// go build -ldflags "-X gitea.maximumdirect.net/eric/seriatim/internal/buildinfo.Version=v1.0.0" +var Version = "dev" diff --git a/internal/builtin/merge.go b/internal/builtin/merge.go index c3c2dcf..d2712ff 100644 --- a/internal/builtin/merge.go +++ b/internal/builtin/merge.go @@ -2,6 +2,7 @@ package builtin import ( "context" + "fmt" "sort" "gitea.maximumdirect.net/eric/seriatim/internal/config" @@ -9,13 +10,13 @@ import ( "gitea.maximumdirect.net/eric/seriatim/internal/report" ) -type placeholderMerger struct{} +type chronologicalMerger struct{} -func (placeholderMerger) Name() string { - return "placeholder-merger" +func (chronologicalMerger) Name() string { + return "chronological-merge" } -func (placeholderMerger) Merge(ctx context.Context, in []model.CanonicalTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) { +func (chronologicalMerger) Merge(ctx context.Context, in []model.CanonicalTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) { if err := ctx.Err(); err != nil { return model.MergedTranscript{}, nil, err } @@ -33,6 +34,6 @@ func (placeholderMerger) Merge(ctx context.Context, in []model.CanonicalTranscri Segments: segments, OverlapGroups: nil, }, []report.Event{ - report.Info("merge", "placeholder-merger", "merged placeholder canonical transcript(s)"), + report.Info("merge", "chronological-merge", fmt.Sprintf("merged %d canonical transcript(s) into %d segment(s)", len(in), len(segments))), }, nil } diff --git a/internal/builtin/postprocess.go b/internal/builtin/postprocess.go index 0a476df..ff17a31 100644 --- a/internal/builtin/postprocess.go +++ b/internal/builtin/postprocess.go @@ -16,24 +16,6 @@ import ( "gitea.maximumdirect.net/eric/seriatim/schema" ) -type noopPostprocessor struct { - name string -} - -func (p noopPostprocessor) Name() string { - return p.name -} - -func (p noopPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) { - if err := ctx.Err(); err != nil { - return model.MergedTranscript{}, nil, err - } - - return in, []report.Event{ - report.Info("postprocessing", p.name, "completed no-op postprocessing module"), - }, nil -} - type assignIDs struct{} func (assignIDs) Name() string { diff --git a/internal/builtin/preprocess.go b/internal/builtin/preprocess.go index e54898c..0d8db56 100644 --- a/internal/builtin/preprocess.go +++ b/internal/builtin/preprocess.go @@ -13,35 +13,58 @@ import ( "gitea.maximumdirect.net/eric/seriatim/internal/speaker" ) -type noopPreprocessor struct { - name string - requires pipeline.ModelState - produces pipeline.ModelState +type validateRaw struct{} + +func (validateRaw) Name() string { + return "validate-raw" } -func (p noopPreprocessor) Name() string { - return p.name +func (validateRaw) Requires() pipeline.ModelState { + return pipeline.StateRaw } -func (p noopPreprocessor) Requires() pipeline.ModelState { - return p.requires +func (validateRaw) Produces() pipeline.ModelState { + return pipeline.StateRaw } -func (p noopPreprocessor) Produces() pipeline.ModelState { - return p.produces -} - -func (p noopPreprocessor) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) { +func (validateRaw) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) { if err := ctx.Err(); err != nil { return pipeline.PreprocessState{}, nil, err } - if in.State != p.requires { - return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", p.name, p.requires, in.State) + if in.State != pipeline.StateRaw { + return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "validate-raw", pipeline.StateRaw, in.State) + } + if len(in.Raw) == 0 { + return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: no raw transcript(s) to validate") + } + + for transcriptIndex, transcript := range in.Raw { + if strings.TrimSpace(transcript.Source) == "" { + return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %d has empty source", transcriptIndex) + } + for segmentIndex, segment := range transcript.Segments { + if segment.Start < 0 { + return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has negative start", transcript.Source, segmentIndex) + } + if segment.End < segment.Start { + return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has end before start", transcript.Source, segmentIndex) + } + for wordIndex, word := range segment.Words { + if !word.Timed { + continue + } + if word.Start < 0 { + return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has negative start", transcript.Source, segmentIndex, wordIndex) + } + if word.End < word.Start { + return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has end before start", transcript.Source, segmentIndex, wordIndex) + } + } + } } - in.State = p.produces return in, []report.Event{ - report.Info("preprocessing", p.name, "completed no-op preprocessing module"), + report.Info("preprocessing", "validate-raw", fmt.Sprintf("validated %d raw transcript(s)", len(in.Raw))), }, nil } diff --git a/internal/builtin/preprocess_test.go b/internal/builtin/preprocess_test.go new file mode 100644 index 0000000..aad2135 --- /dev/null +++ b/internal/builtin/preprocess_test.go @@ -0,0 +1,153 @@ +package builtin + +import ( + "context" + "strings" + "testing" + + "gitea.maximumdirect.net/eric/seriatim/internal/config" + "gitea.maximumdirect.net/eric/seriatim/internal/model" + "gitea.maximumdirect.net/eric/seriatim/internal/pipeline" +) + +func TestValidateRawAcceptsValidRawTranscripts(t *testing.T) { + state := pipeline.PreprocessState{ + State: pipeline.StateRaw, + Raw: []model.RawTranscript{ + { + Source: "input.json", + Segments: []model.RawSegment{ + { + Start: 1, + End: 2, + Text: "hello", + Words: []model.Word{ + {Text: "hello", Start: 1, End: 1.5, Timed: true}, + {Text: "untimed"}, + }, + }, + }, + }, + }, + } + + got, events, err := validateRaw{}.Process(context.Background(), state, config.Config{}) + if err != nil { + t.Fatalf("validate raw: %v", err) + } + if got.State != pipeline.StateRaw || len(got.Raw) != 1 { + t.Fatalf("unexpected state: %#v", got) + } + if len(events) != 1 || !strings.Contains(events[0].Message, "validated 1 raw transcript(s)") { + t.Fatalf("events = %#v", events) + } +} + +func TestValidateRawRejectsInvalidState(t *testing.T) { + state := pipeline.PreprocessState{State: pipeline.StateCanonical} + + _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) + assertPreprocessError(t, err, `requires state "raw"`) +} + +func TestValidateRawRejectsNoRawTranscripts(t *testing.T) { + state := pipeline.PreprocessState{State: pipeline.StateRaw} + + _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) + assertPreprocessError(t, err, "no raw transcript(s)") +} + +func TestValidateRawRejectsEmptySource(t *testing.T) { + state := validRawState() + state.Raw[0].Source = " " + + _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) + assertPreprocessError(t, err, "raw transcript 0 has empty source") +} + +func TestValidateRawRejectsInvalidSegmentTiming(t *testing.T) { + tests := []struct { + name string + mutate func(*model.RawSegment) + want string + }{ + { + name: "negative start", + mutate: func(segment *model.RawSegment) { + segment.Start = -1 + }, + want: "segment 0 has negative start", + }, + { + name: "end before start", + mutate: func(segment *model.RawSegment) { + segment.Start = 2 + segment.End = 1 + }, + want: "segment 0 has end before start", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + state := validRawState() + test.mutate(&state.Raw[0].Segments[0]) + + _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) + assertPreprocessError(t, err, test.want) + }) + } +} + +func TestValidateRawRejectsInvalidTimedWordTiming(t *testing.T) { + tests := []struct { + name string + word model.Word + want string + }{ + { + name: "negative start", + word: model.Word{Text: "bad", Start: -1, End: 1, Timed: true}, + want: "word 0 has negative start", + }, + { + name: "end before start", + word: model.Word{Text: "bad", Start: 2, End: 1, Timed: true}, + want: "word 0 has end before start", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + state := validRawState() + state.Raw[0].Segments[0].Words = []model.Word{test.word} + + _, _, err := validateRaw{}.Process(context.Background(), state, config.Config{}) + assertPreprocessError(t, err, test.want) + }) + } +} + +func validRawState() pipeline.PreprocessState { + return pipeline.PreprocessState{ + State: pipeline.StateRaw, + Raw: []model.RawTranscript{ + { + Source: "input.json", + Segments: []model.RawSegment{ + {Start: 1, End: 2, Text: ""}, + }, + }, + }, + } +} + +func assertPreprocessError(t *testing.T, err error, want string) { + t.Helper() + if err == nil { + t.Fatalf("expected error containing %q", want) + } + if !strings.Contains(err.Error(), want) { + t.Fatalf("expected error containing %q, got %v", want, err) + } +} diff --git a/internal/builtin/registry.go b/internal/builtin/registry.go index 25815cb..29a73fa 100644 --- a/internal/builtin/registry.go +++ b/internal/builtin/registry.go @@ -2,15 +2,15 @@ package builtin import "gitea.maximumdirect.net/eric/seriatim/internal/pipeline" -// NewRegistry registers the MVP built-in modules. +// NewRegistry registers the built-in modules. func NewRegistry() *pipeline.Registry { registry := pipeline.NewRegistry() registry.RegisterInputReader(jsonFilesReader{}) - registry.RegisterPreprocessor(noopPreprocessor{name: "validate-raw", requires: pipeline.StateRaw, produces: pipeline.StateRaw}) + registry.RegisterPreprocessor(validateRaw{}) registry.RegisterPreprocessor(normalizeSpeakers{}) registry.RegisterPreprocessor(trimText{}) - registry.RegisterMerger(placeholderMerger{}) + registry.RegisterMerger(chronologicalMerger{}) registry.RegisterPostprocessor(detectOverlaps{}) registry.RegisterPostprocessor(resolveOverlaps{}) registry.RegisterPostprocessor(backchannelPostprocessor{}) diff --git a/internal/cli/merge_test.go b/internal/cli/merge_test.go index 1d8d2ad..2ed86cd 100644 --- a/internal/cli/merge_test.go +++ b/internal/cli/merge_test.go @@ -73,7 +73,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) { t.Fatalf("did not expect words in output:\n%s", outputJSON) } if len(transcript.OverlapGroups) != 0 { - t.Fatalf("expected placeholder output to contain no overlap groups, got %d", len(transcript.OverlapGroups)) + t.Fatalf("expected output to contain no overlap groups, got %d", len(transcript.OverlapGroups)) } var rpt report.Report @@ -87,7 +87,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) { "validate-raw", "normalize-speakers", "trim-text", - "placeholder-merger", + "chronological-merge", "detect-overlaps", "resolve-overlaps", "backchannel", @@ -105,6 +105,9 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) { if !hasReportEvent(rpt, "postprocessing", "validate-output", "validated 3 output segment(s)") { t.Fatal("expected validate-output report event") } + if !hasReportEvent(rpt, "merge", "chronological-merge", "merged 2 canonical transcript(s) into 3 segment(s)") { + t.Fatal("expected chronological-merge report event") + } } func TestMergeTieBreakOrder(t *testing.T) { diff --git a/internal/cli/root.go b/internal/cli/root.go index 684ee7b..811d6ca 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -2,6 +2,8 @@ package cli import ( "github.com/spf13/cobra" + + "gitea.maximumdirect.net/eric/seriatim/internal/buildinfo" ) // NewRootCommand builds the seriatim command tree. @@ -9,6 +11,7 @@ func NewRootCommand() *cobra.Command { cmd := &cobra.Command{ Use: "seriatim", Short: "Merge per-speaker transcripts into a chronological transcript", + Version: buildinfo.Version, SilenceErrors: true, SilenceUsage: true, } diff --git a/internal/cli/root_test.go b/internal/cli/root_test.go new file mode 100644 index 0000000..62d0f60 --- /dev/null +++ b/internal/cli/root_test.go @@ -0,0 +1,10 @@ +package cli + +import "testing" + +func TestRootCommandDefaultsVersionToDev(t *testing.T) { + cmd := NewRootCommand() + if cmd.Version != "dev" { + t.Fatalf("version = %q, want dev", cmd.Version) + } +} diff --git a/internal/pipeline/runner.go b/internal/pipeline/runner.go index cba1d3d..6c7e1df 100644 --- a/internal/pipeline/runner.go +++ b/internal/pipeline/runner.go @@ -5,6 +5,7 @@ import ( "fmt" "gitea.maximumdirect.net/eric/seriatim/internal/artifact" + "gitea.maximumdirect.net/eric/seriatim/internal/buildinfo" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/internal/report" @@ -13,7 +14,6 @@ import ( const ( applicationName = artifact.ApplicationName - version = artifact.Version ) // Run validates module composition, executes the pipeline, and emits outputs. @@ -149,7 +149,7 @@ func finalizeReport(cfg config.Config, events []report.Event) report.Report { return report.Report{ Metadata: report.Metadata{ Application: applicationName, - Version: version, + Version: buildinfo.Version, InputReader: cfg.InputReader, InputFiles: append([]string(nil), cfg.InputFiles...), PreprocessingModules: append([]string(nil), cfg.PreprocessingModules...), diff --git a/internal/pipeline/runner_test.go b/internal/pipeline/runner_test.go new file mode 100644 index 0000000..a02b25b --- /dev/null +++ b/internal/pipeline/runner_test.go @@ -0,0 +1,21 @@ +package pipeline + +import ( + "testing" + + "gitea.maximumdirect.net/eric/seriatim/internal/buildinfo" + "gitea.maximumdirect.net/eric/seriatim/internal/config" +) + +func TestFinalizeReportUsesBuildVersion(t *testing.T) { + original := buildinfo.Version + t.Cleanup(func() { + buildinfo.Version = original + }) + buildinfo.Version = "v1.0.0-test" + + rpt := finalizeReport(config.Config{}, nil) + if rpt.Metadata.Version != "v1.0.0-test" { + t.Fatalf("version = %q, want v1.0.0-test", rpt.Metadata.Version) + } +}