Implemented an autocorrect module at the postprocessing stage
This commit is contained in:
@@ -2,7 +2,9 @@ package builtin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
||||
@@ -45,3 +47,31 @@ func (assignIDs) Process(ctx context.Context, in model.MergedTranscript, cfg con
|
||||
report.Info("postprocessing", "assign-ids", "assigned final segment IDs"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type autocorrectPostprocessor struct{}
|
||||
|
||||
func (autocorrectPostprocessor) Name() string {
|
||||
return "autocorrect"
|
||||
}
|
||||
|
||||
func (autocorrectPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
rules, err := autocorrect.Load(cfg.AutocorrectFile)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
replacements := 0
|
||||
for index := range in.Segments {
|
||||
var count int
|
||||
in.Segments[index].Text, count = rules.Apply(in.Segments[index].Text)
|
||||
replacements += count
|
||||
}
|
||||
|
||||
return in, []report.Event{
|
||||
report.Info("postprocessing", "autocorrect", fmt.Sprintf("applied %d autocorrect replacement(s)", replacements)),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -10,13 +10,12 @@ func NewRegistry() *pipeline.Registry {
|
||||
registry.RegisterPreprocessor(noopPreprocessor{name: "validate-raw", requires: pipeline.StateRaw, produces: pipeline.StateRaw})
|
||||
registry.RegisterPreprocessor(normalizeSpeakers{})
|
||||
registry.RegisterPreprocessor(trimText{})
|
||||
registry.RegisterPreprocessor(noopPreprocessor{name: "autocorrect", requires: pipeline.StateCanonical, produces: pipeline.StateCanonical})
|
||||
registry.RegisterMerger(placeholderMerger{})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "detect-overlaps"})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "resolve-overlaps"})
|
||||
registry.RegisterPostprocessor(assignIDs{})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "autocorrect"})
|
||||
registry.RegisterPostprocessor(autocorrectPostprocessor{})
|
||||
registry.RegisterOutputWriter(jsonOutputWriter{})
|
||||
|
||||
return registry
|
||||
|
||||
Reference in New Issue
Block a user