Added a module to coalesce adjacent same-speaker segments

This commit is contained in:
2026-04-27 19:30:00 -05:00
parent 13d972cb24
commit aab6d12730
12 changed files with 919 additions and 28 deletions

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
"gitea.maximumdirect.net/eric/seriatim/internal/coalesce"
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/model"
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
@@ -77,7 +78,7 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
return model.MergedTranscript{}, nil, err
}
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap)
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap, cfg.WordRunReorderWindow)
if err != nil {
return model.MergedTranscript{}, nil, err
}
@@ -97,6 +98,27 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
}, nil
}
type coalescePostprocessor struct{}
func (coalescePostprocessor) Name() string {
return "coalesce"
}
func (coalescePostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
if err := ctx.Err(); err != nil {
return model.MergedTranscript{}, nil, err
}
out, summary := coalesce.Apply(in, cfg.CoalesceGap)
return out, []report.Event{
report.Info(
"postprocessing",
"coalesce",
fmt.Sprintf("merged %d original segment(s) into %d coalesced segment(s)", summary.OriginalSegmentsMerged, summary.CoalescedSegments),
),
}, nil
}
type autocorrectPostprocessor struct{}
func (autocorrectPostprocessor) Name() string {

View File

@@ -13,6 +13,7 @@ func NewRegistry() *pipeline.Registry {
registry.RegisterMerger(placeholderMerger{})
registry.RegisterPostprocessor(detectOverlaps{})
registry.RegisterPostprocessor(resolveOverlaps{})
registry.RegisterPostprocessor(coalescePostprocessor{})
registry.RegisterPostprocessor(assignIDs{})
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})
registry.RegisterPostprocessor(autocorrectPostprocessor{})