Simplify the CLI interface and update documentation accordingly
This commit is contained in:
@@ -58,6 +58,11 @@ func (autocorrectPostprocessor) Process(ctx context.Context, in model.MergedTran
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
if cfg.AutocorrectFile == "" {
|
||||
return in, []report.Event{
|
||||
report.Info("postprocessing", "autocorrect", "skipped autocorrect because no autocorrect file was supplied"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
rules, err := autocorrect.Load(cfg.AutocorrectFile)
|
||||
if err != nil {
|
||||
|
||||
@@ -3,6 +3,7 @@ package builtin
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
@@ -99,16 +100,25 @@ func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessStat
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "normalize-speakers", pipeline.StateRaw, in.State)
|
||||
}
|
||||
|
||||
speakers, err := speaker.LoadMap(cfg.SpeakersFile)
|
||||
if err != nil {
|
||||
return pipeline.PreprocessState{}, nil, err
|
||||
var speakers speaker.Map
|
||||
useSpeakerMap := cfg.SpeakersFile != ""
|
||||
if useSpeakerMap {
|
||||
var err error
|
||||
speakers, err = speaker.LoadMap(cfg.SpeakersFile)
|
||||
if err != nil {
|
||||
return pipeline.PreprocessState{}, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
canonical := make([]model.CanonicalTranscript, 0, len(in.Raw))
|
||||
for _, raw := range in.Raw {
|
||||
canonicalSpeaker, err := speakers.SpeakerForSource(raw.Source)
|
||||
if err != nil {
|
||||
return pipeline.PreprocessState{}, nil, err
|
||||
canonicalSpeaker := filepath.Base(raw.Source)
|
||||
if useSpeakerMap {
|
||||
var err error
|
||||
canonicalSpeaker, err = speakers.SpeakerForSource(raw.Source)
|
||||
if err != nil {
|
||||
return pipeline.PreprocessState{}, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
segments := make([]model.Segment, 0, len(raw.Segments))
|
||||
@@ -129,11 +139,16 @@ func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessStat
|
||||
})
|
||||
}
|
||||
|
||||
message := "created canonical transcript(s) from raw input"
|
||||
if !useSpeakerMap {
|
||||
message = "created canonical transcript(s) using input basenames as speaker labels"
|
||||
}
|
||||
|
||||
return pipeline.PreprocessState{
|
||||
State: pipeline.StateCanonical,
|
||||
Raw: append([]model.RawTranscript(nil), in.Raw...),
|
||||
Canonical: canonical,
|
||||
}, []report.Event{
|
||||
report.Info("preprocessing", "normalize-speakers", "created canonical transcript(s) from raw input"),
|
||||
report.Info("preprocessing", "normalize-speakers", message),
|
||||
}, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user