Implemented an overlap detection module in the postprocessing chain

This commit is contained in:
2026-04-26 20:39:49 -05:00
parent f9ca80f2e8
commit e42a2326e8
8 changed files with 464 additions and 4 deletions

View File

@@ -7,6 +7,7 @@ import (
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/model"
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
"gitea.maximumdirect.net/eric/seriatim/internal/report"
)
@@ -48,6 +49,23 @@ func (assignIDs) Process(ctx context.Context, in model.MergedTranscript, cfg con
}, nil
}
type detectOverlaps struct{}
func (detectOverlaps) Name() string {
return "detect-overlaps"
}
func (detectOverlaps) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
in = overlap.Detect(in)
return in, []report.Event{
report.Info("postprocessing", "detect-overlaps", fmt.Sprintf("detected %d overlap group(s)", len(in.OverlapGroups))),
}, nil
}
type autocorrectPostprocessor struct{}
func (autocorrectPostprocessor) Name() string {

View File

@@ -11,7 +11,7 @@ func NewRegistry() *pipeline.Registry {
registry.RegisterPreprocessor(normalizeSpeakers{})
registry.RegisterPreprocessor(trimText{})
registry.RegisterMerger(placeholderMerger{})
registry.RegisterPostprocessor(noopPostprocessor{name: "detect-overlaps"})
registry.RegisterPostprocessor(detectOverlaps{})
registry.RegisterPostprocessor(noopPostprocessor{name: "resolve-overlaps"})
registry.RegisterPostprocessor(assignIDs{})
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})