Files
seriatim/internal/builtin/postprocess.go

132 lines
3.5 KiB
Go

package builtin
import (
"context"
"fmt"
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/model"
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
"gitea.maximumdirect.net/eric/seriatim/internal/report"
)
type noopPostprocessor struct {
name string
}
func (p noopPostprocessor) Name() string {
return p.name
}
func (p noopPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
return in, []report.Event{
report.Info("postprocessing", p.name, "completed no-op postprocessing module"),
}, nil
}
type assignIDs struct{}
func (assignIDs) Name() string {
return "assign-ids"
}
func (assignIDs) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
for index := range in.Segments {
in.Segments[index].ID = index + 1
}
return in, []report.Event{
report.Info("postprocessing", "assign-ids", "assigned final segment IDs"),
}, nil
}
type detectOverlaps struct{}
func (detectOverlaps) Name() string {
return "detect-overlaps"
}
func (detectOverlaps) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
in = overlap.Detect(in)
return in, []report.Event{
report.Info("postprocessing", "detect-overlaps", fmt.Sprintf("detected %d overlap group(s)", len(in.OverlapGroups))),
}, nil
}
type resolveOverlaps struct{}
func (resolveOverlaps) Name() string {
return "resolve-overlaps"
}
func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap)
if err != nil {
return model.MergedTranscript{}, nil, err
}
return resolved, []report.Event{
report.Info(
"postprocessing",
"resolve-overlaps",
fmt.Sprintf(
"processed %d overlap group(s); changed %d; removed %d original segment(s); created %d replacement segment(s)",
summary.GroupsProcessed,
summary.GroupsChanged,
summary.OriginalsRemoved,
summary.ReplacementsCreated,
),
),
}, nil
}
type autocorrectPostprocessor struct{}
func (autocorrectPostprocessor) Name() string {
return "autocorrect"
}
func (autocorrectPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
if cfg.AutocorrectFile == "" {
return in, []report.Event{
report.Info("postprocessing", "autocorrect", "skipped autocorrect because no autocorrect file was supplied"),
}, nil
}
rules, err := autocorrect.Load(cfg.AutocorrectFile)
if err != nil {
return model.MergedTranscript{}, nil, err
}
replacements := 0
for index := range in.Segments {
var count int
in.Segments[index].Text, count = rules.Apply(in.Segments[index].Text)
replacements += count
}
return in, []report.Event{
report.Info("postprocessing", "autocorrect", fmt.Sprintf("applied %d autocorrect replacement(s)", replacements)),
}, nil
}