226 lines
6.7 KiB
Go
226 lines
6.7 KiB
Go
package builtin
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/artifact"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/backchannel"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/coalesce"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/danglers"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/filler"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
|
"gitea.maximumdirect.net/eric/seriatim/schema"
|
|
)
|
|
|
|
type assignIDs struct{}
|
|
|
|
func (assignIDs) Name() string {
|
|
return "assign-ids"
|
|
}
|
|
|
|
func (assignIDs) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
for index := range in.Segments {
|
|
in.Segments[index].ID = index + 1
|
|
}
|
|
|
|
return in, []report.Event{
|
|
report.Info("postprocessing", "assign-ids", "assigned final segment IDs"),
|
|
}, nil
|
|
}
|
|
|
|
type validateOutput struct{}
|
|
|
|
func (validateOutput) Name() string {
|
|
return "validate-output"
|
|
}
|
|
|
|
func (validateOutput) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
selected := artifact.SelectedFromMerged(cfg, in)
|
|
var err error
|
|
switch transcript := selected.(type) {
|
|
case schema.MinimalTranscript:
|
|
err = schema.ValidateMinimalTranscript(transcript)
|
|
case schema.Transcript:
|
|
err = schema.ValidateTranscript(transcript)
|
|
default:
|
|
err = fmt.Errorf("unsupported output artifact type %T", selected)
|
|
}
|
|
if err != nil {
|
|
return model.MergedTranscript{}, nil, fmt.Errorf("validate-output: %w", err)
|
|
}
|
|
|
|
return in, []report.Event{
|
|
report.Info("postprocessing", "validate-output", fmt.Sprintf("validated %d output segment(s)", len(in.Segments))),
|
|
}, nil
|
|
}
|
|
|
|
type detectOverlaps struct{}
|
|
|
|
func (detectOverlaps) Name() string {
|
|
return "detect-overlaps"
|
|
}
|
|
|
|
func (detectOverlaps) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
in = overlap.Detect(in)
|
|
return in, []report.Event{
|
|
report.Info("postprocessing", "detect-overlaps", fmt.Sprintf("detected %d overlap group(s)", len(in.OverlapGroups))),
|
|
}, nil
|
|
}
|
|
|
|
type resolveOverlaps struct{}
|
|
|
|
func (resolveOverlaps) Name() string {
|
|
return "resolve-overlaps"
|
|
}
|
|
|
|
func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap, cfg.WordRunReorderWindow, cfg.CoalesceGap)
|
|
if err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
return resolved, []report.Event{
|
|
report.Info(
|
|
"postprocessing",
|
|
"resolve-overlaps",
|
|
fmt.Sprintf(
|
|
"processed %d overlap group(s); changed %d; removed %d original segment(s); created %d replacement segment(s)",
|
|
summary.GroupsProcessed,
|
|
summary.GroupsChanged,
|
|
summary.OriginalsRemoved,
|
|
summary.ReplacementsCreated,
|
|
),
|
|
),
|
|
}, nil
|
|
}
|
|
|
|
type backchannelPostprocessor struct{}
|
|
|
|
func (backchannelPostprocessor) Name() string {
|
|
return "backchannel"
|
|
}
|
|
|
|
func (backchannelPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
out, tagged := backchannel.Apply(in, cfg.BackchannelMaxDuration)
|
|
return out, []report.Event{
|
|
report.Info("postprocessing", "backchannel", fmt.Sprintf("tagged %d backchannel segment(s)", tagged)),
|
|
}, nil
|
|
}
|
|
|
|
type fillerPostprocessor struct{}
|
|
|
|
func (fillerPostprocessor) Name() string {
|
|
return "filler"
|
|
}
|
|
|
|
func (fillerPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
out, tagged := filler.Apply(in, cfg.FillerMaxDuration)
|
|
return out, []report.Event{
|
|
report.Info("postprocessing", "filler", fmt.Sprintf("tagged %d filler segment(s)", tagged)),
|
|
}, nil
|
|
}
|
|
|
|
type coalescePostprocessor struct{}
|
|
|
|
func (coalescePostprocessor) Name() string {
|
|
return "coalesce"
|
|
}
|
|
|
|
func (coalescePostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
out, summary := coalesce.Apply(in, cfg.CoalesceGap)
|
|
return out, []report.Event{
|
|
report.Info(
|
|
"postprocessing",
|
|
"coalesce",
|
|
fmt.Sprintf("merged %d original segment(s) into %d coalesced segment(s)", summary.OriginalSegmentsMerged, summary.CoalescedSegments),
|
|
),
|
|
}, nil
|
|
}
|
|
|
|
type resolveDanglersPostprocessor struct{}
|
|
|
|
func (resolveDanglersPostprocessor) Name() string {
|
|
return "resolve-danglers"
|
|
}
|
|
|
|
func (resolveDanglersPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
out, summary := danglers.Apply(in)
|
|
return out, []report.Event{
|
|
report.Info(
|
|
"postprocessing",
|
|
"resolve-danglers",
|
|
fmt.Sprintf("merged %d dangling segment(s) into %d target segment(s)", summary.DanglersMerged, summary.TargetsChanged),
|
|
),
|
|
}, nil
|
|
}
|
|
|
|
type autocorrectPostprocessor struct{}
|
|
|
|
func (autocorrectPostprocessor) Name() string {
|
|
return "autocorrect"
|
|
}
|
|
|
|
func (autocorrectPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
if cfg.AutocorrectFile == "" {
|
|
return in, []report.Event{
|
|
report.Info("postprocessing", "autocorrect", "skipped autocorrect because no autocorrect file was supplied"),
|
|
}, nil
|
|
}
|
|
|
|
rules, err := autocorrect.Load(cfg.AutocorrectFile)
|
|
if err != nil {
|
|
return model.MergedTranscript{}, nil, err
|
|
}
|
|
|
|
replacements := 0
|
|
for index := range in.Segments {
|
|
var count int
|
|
in.Segments[index].Text, count = rules.Apply(in.Segments[index].Text)
|
|
replacements += count
|
|
}
|
|
|
|
return in, []report.Event{
|
|
report.Info("postprocessing", "autocorrect", fmt.Sprintf("applied %d autocorrect replacement(s)", replacements)),
|
|
}, nil
|
|
}
|