Cleaned up documentation and development artifcats in advance of release
This commit is contained in:
@@ -1,15 +1,13 @@
|
||||
package artifact
|
||||
|
||||
import (
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/buildinfo"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
ApplicationName = "seriatim"
|
||||
Version = "dev"
|
||||
)
|
||||
const ApplicationName = "seriatim"
|
||||
|
||||
// FromMerged converts the internal merged transcript model into the public
|
||||
// serialized output contract.
|
||||
@@ -47,7 +45,7 @@ func FromMerged(cfg config.Config, merged model.MergedTranscript) schema.Transcr
|
||||
return schema.Transcript{
|
||||
Metadata: schema.Metadata{
|
||||
Application: ApplicationName,
|
||||
Version: Version,
|
||||
Version: buildinfo.Version,
|
||||
InputReader: cfg.InputReader,
|
||||
InputFiles: append([]string(nil), cfg.InputFiles...),
|
||||
PreprocessingModules: append([]string(nil), cfg.PreprocessingModules...),
|
||||
|
||||
22
internal/artifact/transcript_test.go
Normal file
22
internal/artifact/transcript_test.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package artifact
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/buildinfo"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
func TestFromMergedUsesBuildVersion(t *testing.T) {
|
||||
original := buildinfo.Version
|
||||
t.Cleanup(func() {
|
||||
buildinfo.Version = original
|
||||
})
|
||||
buildinfo.Version = "v1.0.0-test"
|
||||
|
||||
transcript := FromMerged(config.Config{}, model.MergedTranscript{})
|
||||
if transcript.Metadata.Version != "v1.0.0-test" {
|
||||
t.Fatalf("version = %q, want v1.0.0-test", transcript.Metadata.Version)
|
||||
}
|
||||
}
|
||||
7
internal/buildinfo/buildinfo.go
Normal file
7
internal/buildinfo/buildinfo.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package buildinfo
|
||||
|
||||
// Version is the application version recorded in output artifacts and CLI
|
||||
// metadata. Release builds can override it with:
|
||||
//
|
||||
// go build -ldflags "-X gitea.maximumdirect.net/eric/seriatim/internal/buildinfo.Version=v1.0.0"
|
||||
var Version = "dev"
|
||||
@@ -2,6 +2,7 @@ package builtin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
@@ -9,13 +10,13 @@ import (
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
||||
)
|
||||
|
||||
type placeholderMerger struct{}
|
||||
type chronologicalMerger struct{}
|
||||
|
||||
func (placeholderMerger) Name() string {
|
||||
return "placeholder-merger"
|
||||
func (chronologicalMerger) Name() string {
|
||||
return "chronological-merge"
|
||||
}
|
||||
|
||||
func (placeholderMerger) Merge(ctx context.Context, in []model.CanonicalTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
func (chronologicalMerger) Merge(ctx context.Context, in []model.CanonicalTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
@@ -33,6 +34,6 @@ func (placeholderMerger) Merge(ctx context.Context, in []model.CanonicalTranscri
|
||||
Segments: segments,
|
||||
OverlapGroups: nil,
|
||||
}, []report.Event{
|
||||
report.Info("merge", "placeholder-merger", "merged placeholder canonical transcript(s)"),
|
||||
report.Info("merge", "chronological-merge", fmt.Sprintf("merged %d canonical transcript(s) into %d segment(s)", len(in), len(segments))),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -16,24 +16,6 @@ import (
|
||||
"gitea.maximumdirect.net/eric/seriatim/schema"
|
||||
)
|
||||
|
||||
type noopPostprocessor struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func (p noopPostprocessor) Name() string {
|
||||
return p.name
|
||||
}
|
||||
|
||||
func (p noopPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
return in, []report.Event{
|
||||
report.Info("postprocessing", p.name, "completed no-op postprocessing module"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type assignIDs struct{}
|
||||
|
||||
func (assignIDs) Name() string {
|
||||
|
||||
@@ -13,35 +13,58 @@ import (
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/speaker"
|
||||
)
|
||||
|
||||
type noopPreprocessor struct {
|
||||
name string
|
||||
requires pipeline.ModelState
|
||||
produces pipeline.ModelState
|
||||
type validateRaw struct{}
|
||||
|
||||
func (validateRaw) Name() string {
|
||||
return "validate-raw"
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Name() string {
|
||||
return p.name
|
||||
func (validateRaw) Requires() pipeline.ModelState {
|
||||
return pipeline.StateRaw
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Requires() pipeline.ModelState {
|
||||
return p.requires
|
||||
func (validateRaw) Produces() pipeline.ModelState {
|
||||
return pipeline.StateRaw
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Produces() pipeline.ModelState {
|
||||
return p.produces
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
|
||||
func (validateRaw) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return pipeline.PreprocessState{}, nil, err
|
||||
}
|
||||
if in.State != p.requires {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", p.name, p.requires, in.State)
|
||||
if in.State != pipeline.StateRaw {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "validate-raw", pipeline.StateRaw, in.State)
|
||||
}
|
||||
if len(in.Raw) == 0 {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: no raw transcript(s) to validate")
|
||||
}
|
||||
|
||||
for transcriptIndex, transcript := range in.Raw {
|
||||
if strings.TrimSpace(transcript.Source) == "" {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %d has empty source", transcriptIndex)
|
||||
}
|
||||
for segmentIndex, segment := range transcript.Segments {
|
||||
if segment.Start < 0 {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has negative start", transcript.Source, segmentIndex)
|
||||
}
|
||||
if segment.End < segment.Start {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has end before start", transcript.Source, segmentIndex)
|
||||
}
|
||||
for wordIndex, word := range segment.Words {
|
||||
if !word.Timed {
|
||||
continue
|
||||
}
|
||||
if word.Start < 0 {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has negative start", transcript.Source, segmentIndex, wordIndex)
|
||||
}
|
||||
if word.End < word.Start {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has end before start", transcript.Source, segmentIndex, wordIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in.State = p.produces
|
||||
return in, []report.Event{
|
||||
report.Info("preprocessing", p.name, "completed no-op preprocessing module"),
|
||||
report.Info("preprocessing", "validate-raw", fmt.Sprintf("validated %d raw transcript(s)", len(in.Raw))),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
153
internal/builtin/preprocess_test.go
Normal file
153
internal/builtin/preprocess_test.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package builtin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/pipeline"
|
||||
)
|
||||
|
||||
func TestValidateRawAcceptsValidRawTranscripts(t *testing.T) {
|
||||
state := pipeline.PreprocessState{
|
||||
State: pipeline.StateRaw,
|
||||
Raw: []model.RawTranscript{
|
||||
{
|
||||
Source: "input.json",
|
||||
Segments: []model.RawSegment{
|
||||
{
|
||||
Start: 1,
|
||||
End: 2,
|
||||
Text: "hello",
|
||||
Words: []model.Word{
|
||||
{Text: "hello", Start: 1, End: 1.5, Timed: true},
|
||||
{Text: "untimed"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
got, events, err := validateRaw{}.Process(context.Background(), state, config.Config{})
|
||||
if err != nil {
|
||||
t.Fatalf("validate raw: %v", err)
|
||||
}
|
||||
if got.State != pipeline.StateRaw || len(got.Raw) != 1 {
|
||||
t.Fatalf("unexpected state: %#v", got)
|
||||
}
|
||||
if len(events) != 1 || !strings.Contains(events[0].Message, "validated 1 raw transcript(s)") {
|
||||
t.Fatalf("events = %#v", events)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRawRejectsInvalidState(t *testing.T) {
|
||||
state := pipeline.PreprocessState{State: pipeline.StateCanonical}
|
||||
|
||||
_, _, err := validateRaw{}.Process(context.Background(), state, config.Config{})
|
||||
assertPreprocessError(t, err, `requires state "raw"`)
|
||||
}
|
||||
|
||||
func TestValidateRawRejectsNoRawTranscripts(t *testing.T) {
|
||||
state := pipeline.PreprocessState{State: pipeline.StateRaw}
|
||||
|
||||
_, _, err := validateRaw{}.Process(context.Background(), state, config.Config{})
|
||||
assertPreprocessError(t, err, "no raw transcript(s)")
|
||||
}
|
||||
|
||||
func TestValidateRawRejectsEmptySource(t *testing.T) {
|
||||
state := validRawState()
|
||||
state.Raw[0].Source = " "
|
||||
|
||||
_, _, err := validateRaw{}.Process(context.Background(), state, config.Config{})
|
||||
assertPreprocessError(t, err, "raw transcript 0 has empty source")
|
||||
}
|
||||
|
||||
func TestValidateRawRejectsInvalidSegmentTiming(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
mutate func(*model.RawSegment)
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "negative start",
|
||||
mutate: func(segment *model.RawSegment) {
|
||||
segment.Start = -1
|
||||
},
|
||||
want: "segment 0 has negative start",
|
||||
},
|
||||
{
|
||||
name: "end before start",
|
||||
mutate: func(segment *model.RawSegment) {
|
||||
segment.Start = 2
|
||||
segment.End = 1
|
||||
},
|
||||
want: "segment 0 has end before start",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
state := validRawState()
|
||||
test.mutate(&state.Raw[0].Segments[0])
|
||||
|
||||
_, _, err := validateRaw{}.Process(context.Background(), state, config.Config{})
|
||||
assertPreprocessError(t, err, test.want)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRawRejectsInvalidTimedWordTiming(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
word model.Word
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "negative start",
|
||||
word: model.Word{Text: "bad", Start: -1, End: 1, Timed: true},
|
||||
want: "word 0 has negative start",
|
||||
},
|
||||
{
|
||||
name: "end before start",
|
||||
word: model.Word{Text: "bad", Start: 2, End: 1, Timed: true},
|
||||
want: "word 0 has end before start",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
state := validRawState()
|
||||
state.Raw[0].Segments[0].Words = []model.Word{test.word}
|
||||
|
||||
_, _, err := validateRaw{}.Process(context.Background(), state, config.Config{})
|
||||
assertPreprocessError(t, err, test.want)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func validRawState() pipeline.PreprocessState {
|
||||
return pipeline.PreprocessState{
|
||||
State: pipeline.StateRaw,
|
||||
Raw: []model.RawTranscript{
|
||||
{
|
||||
Source: "input.json",
|
||||
Segments: []model.RawSegment{
|
||||
{Start: 1, End: 2, Text: ""},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func assertPreprocessError(t *testing.T, err error, want string) {
|
||||
t.Helper()
|
||||
if err == nil {
|
||||
t.Fatalf("expected error containing %q", want)
|
||||
}
|
||||
if !strings.Contains(err.Error(), want) {
|
||||
t.Fatalf("expected error containing %q, got %v", want, err)
|
||||
}
|
||||
}
|
||||
@@ -2,15 +2,15 @@ package builtin
|
||||
|
||||
import "gitea.maximumdirect.net/eric/seriatim/internal/pipeline"
|
||||
|
||||
// NewRegistry registers the MVP built-in modules.
|
||||
// NewRegistry registers the built-in modules.
|
||||
func NewRegistry() *pipeline.Registry {
|
||||
registry := pipeline.NewRegistry()
|
||||
|
||||
registry.RegisterInputReader(jsonFilesReader{})
|
||||
registry.RegisterPreprocessor(noopPreprocessor{name: "validate-raw", requires: pipeline.StateRaw, produces: pipeline.StateRaw})
|
||||
registry.RegisterPreprocessor(validateRaw{})
|
||||
registry.RegisterPreprocessor(normalizeSpeakers{})
|
||||
registry.RegisterPreprocessor(trimText{})
|
||||
registry.RegisterMerger(placeholderMerger{})
|
||||
registry.RegisterMerger(chronologicalMerger{})
|
||||
registry.RegisterPostprocessor(detectOverlaps{})
|
||||
registry.RegisterPostprocessor(resolveOverlaps{})
|
||||
registry.RegisterPostprocessor(backchannelPostprocessor{})
|
||||
|
||||
@@ -73,7 +73,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
t.Fatalf("did not expect words in output:\n%s", outputJSON)
|
||||
}
|
||||
if len(transcript.OverlapGroups) != 0 {
|
||||
t.Fatalf("expected placeholder output to contain no overlap groups, got %d", len(transcript.OverlapGroups))
|
||||
t.Fatalf("expected output to contain no overlap groups, got %d", len(transcript.OverlapGroups))
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
@@ -87,7 +87,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
"validate-raw",
|
||||
"normalize-speakers",
|
||||
"trim-text",
|
||||
"placeholder-merger",
|
||||
"chronological-merge",
|
||||
"detect-overlaps",
|
||||
"resolve-overlaps",
|
||||
"backchannel",
|
||||
@@ -105,6 +105,9 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
if !hasReportEvent(rpt, "postprocessing", "validate-output", "validated 3 output segment(s)") {
|
||||
t.Fatal("expected validate-output report event")
|
||||
}
|
||||
if !hasReportEvent(rpt, "merge", "chronological-merge", "merged 2 canonical transcript(s) into 3 segment(s)") {
|
||||
t.Fatal("expected chronological-merge report event")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeTieBreakOrder(t *testing.T) {
|
||||
|
||||
@@ -2,6 +2,8 @@ package cli
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/buildinfo"
|
||||
)
|
||||
|
||||
// NewRootCommand builds the seriatim command tree.
|
||||
@@ -9,6 +11,7 @@ func NewRootCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "seriatim",
|
||||
Short: "Merge per-speaker transcripts into a chronological transcript",
|
||||
Version: buildinfo.Version,
|
||||
SilenceErrors: true,
|
||||
SilenceUsage: true,
|
||||
}
|
||||
|
||||
10
internal/cli/root_test.go
Normal file
10
internal/cli/root_test.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package cli
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRootCommandDefaultsVersionToDev(t *testing.T) {
|
||||
cmd := NewRootCommand()
|
||||
if cmd.Version != "dev" {
|
||||
t.Fatalf("version = %q, want dev", cmd.Version)
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/artifact"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/buildinfo"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
||||
@@ -13,7 +14,6 @@ import (
|
||||
|
||||
const (
|
||||
applicationName = artifact.ApplicationName
|
||||
version = artifact.Version
|
||||
)
|
||||
|
||||
// Run validates module composition, executes the pipeline, and emits outputs.
|
||||
@@ -149,7 +149,7 @@ func finalizeReport(cfg config.Config, events []report.Event) report.Report {
|
||||
return report.Report{
|
||||
Metadata: report.Metadata{
|
||||
Application: applicationName,
|
||||
Version: version,
|
||||
Version: buildinfo.Version,
|
||||
InputReader: cfg.InputReader,
|
||||
InputFiles: append([]string(nil), cfg.InputFiles...),
|
||||
PreprocessingModules: append([]string(nil), cfg.PreprocessingModules...),
|
||||
|
||||
21
internal/pipeline/runner_test.go
Normal file
21
internal/pipeline/runner_test.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/buildinfo"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
)
|
||||
|
||||
func TestFinalizeReportUsesBuildVersion(t *testing.T) {
|
||||
original := buildinfo.Version
|
||||
t.Cleanup(func() {
|
||||
buildinfo.Version = original
|
||||
})
|
||||
buildinfo.Version = "v1.0.0-test"
|
||||
|
||||
rpt := finalizeReport(config.Config{}, nil)
|
||||
if rpt.Metadata.Version != "v1.0.0-test" {
|
||||
t.Fatalf("version = %q, want v1.0.0-test", rpt.Metadata.Version)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user