Added a module to coalesce adjacent same-speaker segments
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/coalesce"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
|
||||
@@ -77,7 +78,7 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap)
|
||||
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap, cfg.WordRunReorderWindow)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
@@ -97,6 +98,27 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
|
||||
}, nil
|
||||
}
|
||||
|
||||
type coalescePostprocessor struct{}
|
||||
|
||||
func (coalescePostprocessor) Name() string {
|
||||
return "coalesce"
|
||||
}
|
||||
|
||||
func (coalescePostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
out, summary := coalesce.Apply(in, cfg.CoalesceGap)
|
||||
return out, []report.Event{
|
||||
report.Info(
|
||||
"postprocessing",
|
||||
"coalesce",
|
||||
fmt.Sprintf("merged %d original segment(s) into %d coalesced segment(s)", summary.OriginalSegmentsMerged, summary.CoalescedSegments),
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type autocorrectPostprocessor struct{}
|
||||
|
||||
func (autocorrectPostprocessor) Name() string {
|
||||
|
||||
@@ -13,6 +13,7 @@ func NewRegistry() *pipeline.Registry {
|
||||
registry.RegisterMerger(placeholderMerger{})
|
||||
registry.RegisterPostprocessor(detectOverlaps{})
|
||||
registry.RegisterPostprocessor(resolveOverlaps{})
|
||||
registry.RegisterPostprocessor(coalescePostprocessor{})
|
||||
registry.RegisterPostprocessor(assignIDs{})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})
|
||||
registry.RegisterPostprocessor(autocorrectPostprocessor{})
|
||||
|
||||
Reference in New Issue
Block a user