Implemented a module to detect filler segments, and skip them for purposes of same-speaker segment coalescing

This commit is contained in:
2026-04-27 19:58:55 -05:00
parent bbfb8aba44
commit fb0519c561
9 changed files with 319 additions and 14 deletions

View File

@@ -8,6 +8,7 @@ import (
"gitea.maximumdirect.net/eric/seriatim/internal/backchannel"
"gitea.maximumdirect.net/eric/seriatim/internal/coalesce"
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/filler"
"gitea.maximumdirect.net/eric/seriatim/internal/model"
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
"gitea.maximumdirect.net/eric/seriatim/internal/report"
@@ -116,6 +117,23 @@ func (backchannelPostprocessor) Process(ctx context.Context, in model.MergedTran
}, nil
}
type fillerPostprocessor struct{}
func (fillerPostprocessor) Name() string {
return "filler"
}
func (fillerPostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
out, tagged := filler.Apply(in)
return out, []report.Event{
report.Info("postprocessing", "filler", fmt.Sprintf("tagged %d filler segment(s)", tagged)),
}, nil
}
type coalescePostprocessor struct{}
func (coalescePostprocessor) Name() string {

View File

@@ -14,6 +14,7 @@ func NewRegistry() *pipeline.Registry {
registry.RegisterPostprocessor(detectOverlaps{})
registry.RegisterPostprocessor(resolveOverlaps{})
registry.RegisterPostprocessor(backchannelPostprocessor{})
registry.RegisterPostprocessor(fillerPostprocessor{})
registry.RegisterPostprocessor(coalescePostprocessor{})
registry.RegisterPostprocessor(assignIDs{})
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})