Added a module to coalesce adjacent same-speaker segments
This commit is contained in:
16
README.md
16
README.md
@@ -44,7 +44,8 @@ Optional flags:
|
||||
- `--input-reader`: input reader module. Default: `json-files`.
|
||||
- `--output-modules`: comma-separated output modules. Default: `json`.
|
||||
- `--preprocessing-modules`: comma-separated preprocessing modules. Default: `validate-raw,normalize-speakers,trim-text`.
|
||||
- `--postprocessing-modules`: comma-separated postprocessing modules. Default: `detect-overlaps,resolve-overlaps,detect-overlaps,autocorrect,assign-ids,validate-output`.
|
||||
- `--postprocessing-modules`: comma-separated postprocessing modules. Default: `detect-overlaps,resolve-overlaps,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output`.
|
||||
- `--coalesce-gap`: maximum same-speaker gap in seconds for `coalesce`. Default: `3.0`.
|
||||
|
||||
## Input JSON Format
|
||||
|
||||
@@ -150,7 +151,7 @@ The merged output uses the current seriatim envelope:
|
||||
"input_reader": "json-files",
|
||||
"input_files": ["eric.json", "mike.json"],
|
||||
"preprocessing_modules": ["validate-raw", "normalize-speakers", "trim-text"],
|
||||
"postprocessing_modules": ["detect-overlaps", "resolve-overlaps", "detect-overlaps", "autocorrect", "assign-ids", "validate-output"],
|
||||
"postprocessing_modules": ["detect-overlaps", "resolve-overlaps", "coalesce", "detect-overlaps", "autocorrect", "assign-ids", "validate-output"],
|
||||
"output_modules": ["json"]
|
||||
},
|
||||
"segments": [
|
||||
@@ -214,7 +215,7 @@ Overlap behavior:
|
||||
|
||||
## Overlap Resolution
|
||||
|
||||
The default postprocessing pipeline runs `detect-overlaps`, then `resolve-overlaps`, then a second `detect-overlaps` pass.
|
||||
The default postprocessing pipeline runs `detect-overlaps`, then `resolve-overlaps`, then `coalesce`, then a second `detect-overlaps` pass.
|
||||
|
||||
For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word timing to build smaller word-run replacement segments:
|
||||
|
||||
@@ -224,6 +225,9 @@ For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word
|
||||
- Words for the same speaker are merged into one run when the gap between adjacent words is no greater than `SERIATIM_OVERLAP_WORD_RUN_GAP`.
|
||||
- The default word-run gap is `0.75` seconds.
|
||||
- Set `SERIATIM_OVERLAP_WORD_RUN_GAP` to a positive number of seconds to override the default.
|
||||
- Near-start replacement word runs are reordered so shorter segments come first when adjacent starts are within `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW`.
|
||||
- The default word-run reorder window is `0.4` seconds.
|
||||
- Set `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW` to a positive number of seconds to override the default.
|
||||
- Replacement segment text is built by joining word text with single spaces.
|
||||
- Replacement segments include `source_ref` and `derived_from`.
|
||||
- Replacement segments omit `source_segment_index` because they are derived from one or more original segments.
|
||||
@@ -232,6 +236,12 @@ For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word
|
||||
- If a speaker has no usable word timing in a group, that speaker's original segment is kept.
|
||||
- If no speakers in a group have usable word timing, the original group and annotations remain unchanged.
|
||||
|
||||
## Coalescing
|
||||
|
||||
The default pipeline runs `coalesce` before the second overlap detection pass. It merges adjacent same-speaker segments in the transcript's current order when `next.start - current.end <= --coalesce-gap`.
|
||||
|
||||
Coalesced segments use `source_ref` values such as `coalesce:1`, include `derived_from`, and omit `source_segment_index`.
|
||||
|
||||
## Autocorrect
|
||||
|
||||
Autocorrect is included in the default postprocessing pipeline. If `--autocorrect` is omitted, the module leaves transcript text unchanged and records a skip event in the optional report.
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/autocorrect"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/coalesce"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/overlap"
|
||||
@@ -77,7 +78,7 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap)
|
||||
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap, cfg.WordRunReorderWindow)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
@@ -97,6 +98,27 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c
|
||||
}, nil
|
||||
}
|
||||
|
||||
type coalescePostprocessor struct{}
|
||||
|
||||
func (coalescePostprocessor) Name() string {
|
||||
return "coalesce"
|
||||
}
|
||||
|
||||
func (coalescePostprocessor) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
out, summary := coalesce.Apply(in, cfg.CoalesceGap)
|
||||
return out, []report.Event{
|
||||
report.Info(
|
||||
"postprocessing",
|
||||
"coalesce",
|
||||
fmt.Sprintf("merged %d original segment(s) into %d coalesced segment(s)", summary.OriginalSegmentsMerged, summary.CoalescedSegments),
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type autocorrectPostprocessor struct{}
|
||||
|
||||
func (autocorrectPostprocessor) Name() string {
|
||||
|
||||
@@ -13,6 +13,7 @@ func NewRegistry() *pipeline.Registry {
|
||||
registry.RegisterMerger(placeholderMerger{})
|
||||
registry.RegisterPostprocessor(detectOverlaps{})
|
||||
registry.RegisterPostprocessor(resolveOverlaps{})
|
||||
registry.RegisterPostprocessor(coalescePostprocessor{})
|
||||
registry.RegisterPostprocessor(assignIDs{})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})
|
||||
registry.RegisterPostprocessor(autocorrectPostprocessor{})
|
||||
|
||||
@@ -34,6 +34,7 @@ func newMergeCommand() *cobra.Command {
|
||||
flags.StringVar(&opts.OutputModules, "output-modules", config.DefaultOutputModules, "comma-separated output modules")
|
||||
flags.StringVar(&opts.PreprocessingModules, "preprocessing-modules", config.DefaultPreprocessingModules, "comma-separated preprocessing modules")
|
||||
flags.StringVar(&opts.PostprocessingModules, "postprocessing-modules", config.DefaultPostprocessingModules, "comma-separated postprocessing modules")
|
||||
flags.StringVar(&opts.CoalesceGap, "coalesce-gap", config.DefaultCoalesceGapValue, "maximum same-speaker gap in seconds for coalesce")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
@@ -90,6 +90,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
"placeholder-merger",
|
||||
"detect-overlaps",
|
||||
"resolve-overlaps",
|
||||
"coalesce",
|
||||
"detect-overlaps",
|
||||
"autocorrect",
|
||||
"assign-ids",
|
||||
@@ -195,7 +196,7 @@ func TestMergeDetectsOverlapGroups(t *testing.T) {
|
||||
if group.Start != 1 || group.End != 6 {
|
||||
t.Fatalf("group bounds = %f-%f, want 1-6", group.Start, group.End)
|
||||
}
|
||||
wantRefs := []string{inputA + "#0", inputA + "#1", inputB + "#0"}
|
||||
wantRefs := []string{"coalesce:1", inputB + "#0"}
|
||||
if !equalStrings(group.Segments, wantRefs) {
|
||||
t.Fatalf("group refs = %v, want %v", group.Segments, wantRefs)
|
||||
}
|
||||
@@ -210,6 +211,15 @@ func TestMergeDetectsOverlapGroups(t *testing.T) {
|
||||
t.Fatalf("segment %d overlap group ID = %d, want 1", index, segment.OverlapGroupID)
|
||||
}
|
||||
}
|
||||
if len(transcript.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
|
||||
}
|
||||
if transcript.Segments[0].SourceRef != "coalesce:1" {
|
||||
t.Fatalf("coalesced source_ref = %q, want coalesce:1", transcript.Segments[0].SourceRef)
|
||||
}
|
||||
if !equalStrings(transcript.Segments[0].DerivedFrom, []string{inputA + "#0", inputA + "#1"}) {
|
||||
t.Fatalf("coalesced derived_from = %v", transcript.Segments[0].DerivedFrom)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
@@ -410,6 +420,171 @@ func TestMergeDetectsResidualOverlapsAfterResolution(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeOrdersNearStartWordRunsShorterFirst(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1,
|
||||
"end": 4,
|
||||
"text": "alice long",
|
||||
"words": [
|
||||
{"word": "alice-long", "start": 1.0, "end": 2.0}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1.1,
|
||||
"end": 3,
|
||||
"text": "bob short",
|
||||
"words": [
|
||||
{"word": "bob-short", "start": 1.2, "end": 1.3}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputA,
|
||||
"--input-file", inputB,
|
||||
"--speakers", speakers,
|
||||
"--output-file", output,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if len(transcript.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
|
||||
}
|
||||
if transcript.Segments[0].Text != "bob-short" || transcript.Segments[0].ID != 1 {
|
||||
t.Fatalf("first segment = %#v, want bob-short with ID 1", transcript.Segments[0])
|
||||
}
|
||||
if transcript.Segments[1].Text != "alice-long" || transcript.Segments[1].ID != 2 {
|
||||
t.Fatalf("second segment = %#v, want alice-long with ID 2", transcript.Segments[1])
|
||||
}
|
||||
if len(transcript.OverlapGroups) != 1 {
|
||||
t.Fatalf("overlap group count = %d, want 1", len(transcript.OverlapGroups))
|
||||
}
|
||||
if transcript.Segments[0].OverlapGroupID != 1 || transcript.Segments[1].OverlapGroupID != 1 {
|
||||
t.Fatalf("segments should retain residual overlap annotation: %#v", transcript.Segments)
|
||||
}
|
||||
wantRefs := []string{"word-run:1:1:1", "word-run:1:2:1"}
|
||||
if !equalStrings(transcript.OverlapGroups[0].Segments, wantRefs) {
|
||||
t.Fatalf("overlap refs = %v, want %v", transcript.OverlapGroups[0].Segments, wantRefs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeCoalescesSameSpeakerSegmentsBeforeFinalOverlapDetection(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{"start": 1, "end": 2, "text": "first"},
|
||||
{"start": 4, "end": 5, "text": "second"}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{"start": 4.5, "end": 6, "text": "bob"}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
reportPath := filepath.Join(dir, "report.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputA,
|
||||
"--input-file", inputB,
|
||||
"--speakers", speakers,
|
||||
"--output-file", output,
|
||||
"--report-file", reportPath,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if len(transcript.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
|
||||
}
|
||||
alice := transcript.Segments[0]
|
||||
if alice.ID != 1 || alice.Text != "first second" || alice.SourceRef != "coalesce:1" {
|
||||
t.Fatalf("unexpected coalesced Alice segment: %#v", alice)
|
||||
}
|
||||
if alice.SourceSegmentIndex != nil {
|
||||
t.Fatalf("coalesced segment source_segment_index = %d, want nil", *alice.SourceSegmentIndex)
|
||||
}
|
||||
if !equalStrings(alice.DerivedFrom, []string{inputA + "#0", inputA + "#1"}) {
|
||||
t.Fatalf("derived_from = %v", alice.DerivedFrom)
|
||||
}
|
||||
if transcript.Segments[1].ID != 2 || transcript.Segments[1].Text != "bob" {
|
||||
t.Fatalf("unexpected Bob segment: %#v", transcript.Segments[1])
|
||||
}
|
||||
if len(transcript.OverlapGroups) != 1 {
|
||||
t.Fatalf("overlap group count = %d, want 1", len(transcript.OverlapGroups))
|
||||
}
|
||||
group := transcript.OverlapGroups[0]
|
||||
if !equalStrings(group.Segments, []string{"coalesce:1", inputB + "#0"}) {
|
||||
t.Fatalf("group refs = %v", group.Segments)
|
||||
}
|
||||
if alice.OverlapGroupID != 1 || transcript.Segments[1].OverlapGroupID != 1 {
|
||||
t.Fatalf("expected final overlap annotation after coalesce: %#v", transcript.Segments)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
if !hasReportEvent(rpt, "postprocessing", "coalesce", "merged 2 original segment(s) into 1 coalesced segment(s)") {
|
||||
t.Fatal("expected coalesce report event")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeCoalesceGapOverridePreventsMerge(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{"start": 1, "end": 2, "text": "first"},
|
||||
{"start": 4, "end": 5, "text": "second"}
|
||||
]
|
||||
}`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--coalesce-gap", "1",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if len(transcript.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(transcript.Segments))
|
||||
}
|
||||
if transcript.Segments[0].Text != "first" || transcript.Segments[1].Text != "second" {
|
||||
t.Fatalf("segments were unexpectedly coalesced: %#v", transcript.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpeakerMatchingUsesFirstMatchingRuleCaseInsensitive(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "2026-04-19-Adam_Rakestraw.json", `{
|
||||
|
||||
118
internal/coalesce/coalesce.go
Normal file
118
internal/coalesce/coalesce.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package coalesce
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
// Summary records deterministic counters for a coalesce pass.
|
||||
type Summary struct {
|
||||
OriginalSegmentsMerged int
|
||||
CoalescedSegments int
|
||||
}
|
||||
|
||||
// Apply merges adjacent same-speaker segments in the transcript's current order.
|
||||
func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summary) {
|
||||
if len(in.Segments) < 2 {
|
||||
return in, Summary{}
|
||||
}
|
||||
|
||||
out := model.MergedTranscript{
|
||||
Segments: make([]model.Segment, 0, len(in.Segments)),
|
||||
OverlapGroups: in.OverlapGroups,
|
||||
}
|
||||
summary := Summary{}
|
||||
coalescedID := 0
|
||||
|
||||
current := newRun(in.Segments[0])
|
||||
for _, segment := range in.Segments[1:] {
|
||||
if current.canMerge(segment, gap) {
|
||||
current.add(segment)
|
||||
continue
|
||||
}
|
||||
|
||||
coalescedID = appendRun(&out, current, coalescedID, &summary)
|
||||
current = newRun(segment)
|
||||
}
|
||||
appendRun(&out, current, coalescedID, &summary)
|
||||
|
||||
return out, summary
|
||||
}
|
||||
|
||||
type run struct {
|
||||
segments []model.Segment
|
||||
}
|
||||
|
||||
func newRun(segment model.Segment) run {
|
||||
return run{
|
||||
segments: []model.Segment{segment},
|
||||
}
|
||||
}
|
||||
|
||||
func (r run) canMerge(next model.Segment, gap float64) bool {
|
||||
current := r.segments[len(r.segments)-1]
|
||||
return current.Speaker == next.Speaker && next.Start-current.End <= gap
|
||||
}
|
||||
|
||||
func (r *run) add(segment model.Segment) {
|
||||
r.segments = append(r.segments, segment)
|
||||
}
|
||||
|
||||
func appendRun(out *model.MergedTranscript, current run, coalescedID int, summary *Summary) int {
|
||||
if len(current.segments) == 1 {
|
||||
out.Segments = append(out.Segments, current.segments[0])
|
||||
return coalescedID
|
||||
}
|
||||
|
||||
coalescedID++
|
||||
out.Segments = append(out.Segments, current.coalescedSegment(coalescedID))
|
||||
summary.OriginalSegmentsMerged += len(current.segments)
|
||||
summary.CoalescedSegments++
|
||||
return coalescedID
|
||||
}
|
||||
|
||||
func (r run) coalescedSegment(id int) model.Segment {
|
||||
first := r.segments[0]
|
||||
merged := model.Segment{
|
||||
Source: first.Source,
|
||||
SourceRef: fmt.Sprintf("coalesce:%d", id),
|
||||
DerivedFrom: make([]string, 0, len(r.segments)),
|
||||
Speaker: first.Speaker,
|
||||
Start: first.Start,
|
||||
End: first.End,
|
||||
Words: make([]model.Word, 0),
|
||||
}
|
||||
|
||||
text := make([]string, 0, len(r.segments))
|
||||
for _, segment := range r.segments {
|
||||
if segment.Start < merged.Start {
|
||||
merged.Start = segment.Start
|
||||
}
|
||||
if segment.End > merged.End {
|
||||
merged.End = segment.End
|
||||
}
|
||||
if segment.Source != merged.Source {
|
||||
merged.Source = "derived"
|
||||
}
|
||||
if trimmed := strings.TrimSpace(segment.Text); trimmed != "" {
|
||||
text = append(text, trimmed)
|
||||
}
|
||||
merged.Words = append(merged.Words, segment.Words...)
|
||||
merged.DerivedFrom = append(merged.DerivedFrom, segmentRef(segment))
|
||||
}
|
||||
|
||||
merged.Text = strings.Join(text, " ")
|
||||
return merged
|
||||
}
|
||||
|
||||
func segmentRef(segment model.Segment) string {
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
return fmt.Sprintf("%s#%d", segment.Source, *segment.SourceSegmentIndex)
|
||||
}
|
||||
if segment.SourceRef != "" {
|
||||
return segment.SourceRef
|
||||
}
|
||||
return segment.Source
|
||||
}
|
||||
156
internal/coalesce/coalesce_test.go
Normal file
156
internal/coalesce/coalesce_test.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package coalesce
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
func TestApplyMergesConsecutiveSameSpeakerWithinGap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 2, " first "),
|
||||
segment("a.json", 1, "Alice", 4, 5, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
segment := got.Segments[0]
|
||||
if segment.Text != "first second" {
|
||||
t.Fatalf("text = %q", segment.Text)
|
||||
}
|
||||
if segment.Start != 1 || segment.End != 5 {
|
||||
t.Fatalf("bounds = %f-%f, want 1-5", segment.Start, segment.End)
|
||||
}
|
||||
if segment.Source != "a.json" {
|
||||
t.Fatalf("source = %q, want a.json", segment.Source)
|
||||
}
|
||||
if segment.SourceRef != "coalesce:1" {
|
||||
t.Fatalf("source_ref = %q, want coalesce:1", segment.SourceRef)
|
||||
}
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
t.Fatalf("source_segment_index = %d, want nil", *segment.SourceSegmentIndex)
|
||||
}
|
||||
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) {
|
||||
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyDoesNotMergeSameSpeakerBeyondGap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 2, "first"),
|
||||
segment("a.json", 1, "Alice", 5.1, 6, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Segments, merged.Segments) {
|
||||
t.Fatalf("segments changed:\ngot %#v\nwant %#v", got.Segments, merged.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyDoesNotMergeAcrossDifferentSpeaker(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 2, "first"),
|
||||
segment("b.json", 0, "Bob", 2.5, 3, "bob"),
|
||||
segment("a.json", 1, "Alice", 3.5, 4, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 0 || summary.CoalescedSegments != 0 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if len(got.Segments) != 3 {
|
||||
t.Fatalf("segment count = %d, want 3", len(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMergesNegativeGapOverlap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 1, 4, "first"),
|
||||
segment("a.json", 1, "Alice", 3, 5, "second"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 0)
|
||||
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if got.Segments[0].Start != 1 || got.Segments[0].End != 5 {
|
||||
t.Fatalf("bounds = %f-%f, want 1-5", got.Segments[0].Start, got.Segments[0].End)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyHonorsCurrentOrder(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segment("a.json", 0, "Alice", 10, 11, "later"),
|
||||
segment("a.json", 1, "Alice", 1, 2, "earlier"),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary := Apply(merged, 3)
|
||||
if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 {
|
||||
t.Fatalf("summary = %#v", summary)
|
||||
}
|
||||
if got.Segments[0].Text != "later earlier" {
|
||||
t.Fatalf("text = %q, want current-order merge", got.Segments[0].Text)
|
||||
}
|
||||
if got.Segments[0].Start != 1 || got.Segments[0].End != 11 {
|
||||
t.Fatalf("bounds = %f-%f, want 1-11", got.Segments[0].Start, got.Segments[0].End)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) {
|
||||
first := segment("a.json", 0, "Alice", 1, 2, "first")
|
||||
second := model.Segment{
|
||||
Source: "b.json",
|
||||
SourceRef: "word-run:1:1:1",
|
||||
DerivedFrom: []string{"b.json#0"},
|
||||
Speaker: "Alice",
|
||||
Start: 2.5,
|
||||
End: 3,
|
||||
Text: "second",
|
||||
}
|
||||
|
||||
got, _ := Apply(model.MergedTranscript{Segments: []model.Segment{first, second}}, 3)
|
||||
segment := got.Segments[0]
|
||||
if segment.Source != "derived" {
|
||||
t.Fatalf("source = %q, want derived", segment.Source)
|
||||
}
|
||||
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "word-run:1:1:1"}) {
|
||||
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||
}
|
||||
}
|
||||
|
||||
func segment(source string, sourceIndex int, speaker string, start float64, end float64, text string) model.Segment {
|
||||
return model.Segment{
|
||||
Source: source,
|
||||
SourceSegmentIndex: intPtr(sourceIndex),
|
||||
Speaker: speaker,
|
||||
Start: start,
|
||||
End: end,
|
||||
Text: text,
|
||||
Words: []model.Word{
|
||||
{Text: text, Start: start, End: end, Timed: true},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func intPtr(value int) *int {
|
||||
return &value
|
||||
}
|
||||
@@ -14,9 +14,13 @@ const (
|
||||
DefaultInputReader = "json-files"
|
||||
DefaultOutputModules = "json"
|
||||
DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text"
|
||||
DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,detect-overlaps,autocorrect,assign-ids,validate-output"
|
||||
DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output"
|
||||
DefaultOverlapWordRunGap = 0.75
|
||||
DefaultWordRunReorderWindow = 0.4
|
||||
DefaultCoalesceGap = 3.0
|
||||
DefaultCoalesceGapValue = "3.0"
|
||||
OverlapWordRunGapEnv = "SERIATIM_OVERLAP_WORD_RUN_GAP"
|
||||
WordRunReorderWindowEnv = "SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW"
|
||||
)
|
||||
|
||||
// MergeOptions captures raw CLI option values before validation.
|
||||
@@ -30,6 +34,7 @@ type MergeOptions struct {
|
||||
OutputModules string
|
||||
PreprocessingModules string
|
||||
PostprocessingModules string
|
||||
CoalesceGap string
|
||||
}
|
||||
|
||||
// Config is the validated runtime configuration for a merge invocation.
|
||||
@@ -44,6 +49,8 @@ type Config struct {
|
||||
PreprocessingModules []string
|
||||
PostprocessingModules []string
|
||||
OverlapWordRunGap float64
|
||||
WordRunReorderWindow float64
|
||||
CoalesceGap float64
|
||||
}
|
||||
|
||||
// NewMergeConfig validates raw merge options and returns normalized config.
|
||||
@@ -54,6 +61,8 @@ func NewMergeConfig(opts MergeOptions) (Config, error) {
|
||||
PreprocessingModules: nil,
|
||||
PostprocessingModules: nil,
|
||||
OverlapWordRunGap: DefaultOverlapWordRunGap,
|
||||
WordRunReorderWindow: DefaultWordRunReorderWindow,
|
||||
CoalesceGap: DefaultCoalesceGap,
|
||||
}
|
||||
|
||||
if cfg.InputReader == "" {
|
||||
@@ -119,6 +128,14 @@ func NewMergeConfig(opts MergeOptions) (Config, error) {
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.WordRunReorderWindow, err = parseWordRunReorderWindow()
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.CoalesceGap, err = parseCoalesceGap(opts.CoalesceGap)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
@@ -198,21 +215,45 @@ func requireFile(path string, flag string) error {
|
||||
}
|
||||
|
||||
func parseOverlapWordRunGap() (float64, error) {
|
||||
value := strings.TrimSpace(os.Getenv(OverlapWordRunGapEnv))
|
||||
return parsePositiveFloatEnv(OverlapWordRunGapEnv, DefaultOverlapWordRunGap)
|
||||
}
|
||||
|
||||
func parseWordRunReorderWindow() (float64, error) {
|
||||
return parsePositiveFloatEnv(WordRunReorderWindowEnv, DefaultWordRunReorderWindow)
|
||||
}
|
||||
|
||||
func parseCoalesceGap(value string) (float64, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return DefaultOverlapWordRunGap, nil
|
||||
return DefaultCoalesceGap, nil
|
||||
}
|
||||
|
||||
gap, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s must be a positive number of seconds: %w", OverlapWordRunGapEnv, err)
|
||||
return 0, fmt.Errorf("--coalesce-gap must be a non-negative number of seconds: %w", err)
|
||||
}
|
||||
if gap <= 0 {
|
||||
return 0, fmt.Errorf("%s must be positive", OverlapWordRunGapEnv)
|
||||
if gap < 0 {
|
||||
return 0, fmt.Errorf("--coalesce-gap must be non-negative")
|
||||
}
|
||||
return gap, nil
|
||||
}
|
||||
|
||||
func parsePositiveFloatEnv(name string, defaultValue float64) (float64, error) {
|
||||
value := strings.TrimSpace(os.Getenv(name))
|
||||
if value == "" {
|
||||
return defaultValue, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s must be a positive number of seconds: %w", name, err)
|
||||
}
|
||||
if parsed <= 0 {
|
||||
return 0, fmt.Errorf("%s must be positive", name)
|
||||
}
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func contains(values []string, target string) bool {
|
||||
for _, value := range values {
|
||||
if value == target {
|
||||
|
||||
@@ -126,6 +126,186 @@ func TestOverlapWordRunGapRejectsInvalidEnvOverride(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestWordRunReorderWindowDefaultsTo04(t *testing.T) {
|
||||
t.Setenv(WordRunReorderWindowEnv, "")
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.WordRunReorderWindow != DefaultWordRunReorderWindow {
|
||||
t.Fatalf("window = %f, want %f", cfg.WordRunReorderWindow, DefaultWordRunReorderWindow)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWordRunReorderWindowUsesValidEnvOverride(t *testing.T) {
|
||||
t.Setenv(WordRunReorderWindowEnv, "0.2")
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.WordRunReorderWindow != 0.2 {
|
||||
t.Fatalf("window = %f, want 0.2", cfg.WordRunReorderWindow)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWordRunReorderWindowRejectsInvalidEnvOverride(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value string
|
||||
want string
|
||||
}{
|
||||
{name: "non-numeric", value: "fast", want: "must be a positive number"},
|
||||
{name: "zero", value: "0", want: "must be positive"},
|
||||
{name: "negative", value: "-0.1", want: "must be positive"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
t.Setenv(WordRunReorderWindowEnv, test.value)
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
_, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), test.want) {
|
||||
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoalesceGapDefaultsTo3(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.CoalesceGap != DefaultCoalesceGap {
|
||||
t.Fatalf("coalesce gap = %f, want %f", cfg.CoalesceGap, DefaultCoalesceGap)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoalesceGapUsesValidOverride(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
CoalesceGap: "1.5",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.CoalesceGap != 1.5 {
|
||||
t.Fatalf("coalesce gap = %f, want 1.5", cfg.CoalesceGap)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoalesceGapAllowsZero(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
CoalesceGap: "0",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.CoalesceGap != 0 {
|
||||
t.Fatalf("coalesce gap = %f, want 0", cfg.CoalesceGap)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoalesceGapRejectsInvalidOverride(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value string
|
||||
want string
|
||||
}{
|
||||
{name: "non-numeric", value: "fast", want: "must be a non-negative number"},
|
||||
{name: "negative", value: "-0.1", want: "must be non-negative"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
_, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
CoalesceGap: test.value,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), test.want) {
|
||||
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func writeTempFile(t *testing.T, dir string, name string) string {
|
||||
t.Helper()
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package overlap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
@@ -18,9 +19,10 @@ func Detect(in model.MergedTranscript) model.MergedTranscript {
|
||||
return in
|
||||
}
|
||||
|
||||
order := sortedSegmentIndices(in.Segments)
|
||||
var groupID int
|
||||
var candidate overlapCandidate
|
||||
for index := range in.Segments {
|
||||
for _, index := range order {
|
||||
segment := in.Segments[index]
|
||||
if !candidate.active {
|
||||
candidate = newCandidate(index, segment)
|
||||
@@ -40,6 +42,17 @@ func Detect(in model.MergedTranscript) model.MergedTranscript {
|
||||
return in
|
||||
}
|
||||
|
||||
func sortedSegmentIndices(segments []model.Segment) []int {
|
||||
indices := make([]int, len(segments))
|
||||
for index := range segments {
|
||||
indices[index] = index
|
||||
}
|
||||
sort.SliceStable(indices, func(i, j int) bool {
|
||||
return model.SegmentLess(segments[indices[i]], segments[indices[j]])
|
||||
})
|
||||
return indices
|
||||
}
|
||||
|
||||
type overlapCandidate struct {
|
||||
active bool
|
||||
indices []int
|
||||
|
||||
@@ -18,7 +18,7 @@ type ResolutionSummary struct {
|
||||
|
||||
// Resolve replaces detected overlap-group segments with word-run segments when
|
||||
// word-level timing is available.
|
||||
func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscript, ResolutionSummary, error) {
|
||||
func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64) (model.MergedTranscript, ResolutionSummary, error) {
|
||||
summary := ResolutionSummary{
|
||||
GroupsProcessed: len(in.OverlapGroups),
|
||||
}
|
||||
@@ -35,9 +35,10 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
clearAnnotationRefs := make(map[string]struct{})
|
||||
removeGroupIDs := make(map[int]struct{})
|
||||
replacements := make([]model.Segment, 0)
|
||||
replacementOrder := make(map[string]replacementOrder)
|
||||
|
||||
for _, group := range in.OverlapGroups {
|
||||
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap)
|
||||
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap, wordRunReorderWindow)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, ResolutionSummary{}, err
|
||||
}
|
||||
@@ -48,6 +49,9 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
summary.GroupsChanged++
|
||||
removeGroupIDs[group.ID] = struct{}{}
|
||||
replacements = append(replacements, resolved.replacements...)
|
||||
for sourceRef, order := range resolved.replacementOrder {
|
||||
replacementOrder[sourceRef] = order
|
||||
}
|
||||
|
||||
for _, ref := range group.Segments {
|
||||
clearAnnotationRefs[ref] = struct{}{}
|
||||
@@ -78,7 +82,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
}
|
||||
segments = append(segments, replacements...)
|
||||
sort.SliceStable(segments, func(i, j int) bool {
|
||||
return model.SegmentLess(segments[i], segments[j])
|
||||
return resolvedSegmentLess(segments[i], segments[j], replacementOrder)
|
||||
})
|
||||
|
||||
overlapGroups := make([]model.OverlapGroup, 0, len(in.OverlapGroups)-len(removeGroupIDs))
|
||||
@@ -98,6 +102,13 @@ func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscr
|
||||
type resolvedGroup struct {
|
||||
removeRefs []string
|
||||
replacements []model.Segment
|
||||
replacementOrder map[string]replacementOrder
|
||||
}
|
||||
|
||||
type replacementOrder struct {
|
||||
cluster string
|
||||
rank int
|
||||
anchor float64
|
||||
}
|
||||
|
||||
type resolutionWord struct {
|
||||
@@ -114,7 +125,7 @@ type wordRun struct {
|
||||
end float64
|
||||
}
|
||||
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64) (resolvedGroup, error) {
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64, wordRunReorderWindow float64) (resolvedGroup, error) {
|
||||
segmentsBySpeaker := make(map[string][]model.Segment)
|
||||
refsBySpeaker := make(map[string][]string)
|
||||
for _, ref := range group.Segments {
|
||||
@@ -147,9 +158,76 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde
|
||||
}
|
||||
}
|
||||
|
||||
resolved.replacements, resolved.replacementOrder = reorderReplacementSegments(group.ID, resolved.replacements, wordRunReorderWindow)
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
func reorderReplacementSegments(groupID int, replacements []model.Segment, wordRunReorderWindow float64) ([]model.Segment, map[string]replacementOrder) {
|
||||
if len(replacements) == 0 {
|
||||
return replacements, nil
|
||||
}
|
||||
|
||||
ordered := append([]model.Segment(nil), replacements...)
|
||||
sort.SliceStable(ordered, func(i, j int) bool {
|
||||
return model.SegmentLess(ordered[i], ordered[j])
|
||||
})
|
||||
|
||||
ranks := make(map[string]replacementOrder, len(ordered))
|
||||
clusterStart := 0
|
||||
clusterIndex := 1
|
||||
for clusterStart < len(ordered) {
|
||||
clusterEnd := clusterStart + 1
|
||||
for clusterEnd < len(ordered) && ordered[clusterEnd].Start-ordered[clusterEnd-1].Start <= wordRunReorderWindow {
|
||||
clusterEnd++
|
||||
}
|
||||
|
||||
cluster := ordered[clusterStart:clusterEnd]
|
||||
anchor := cluster[0].Start
|
||||
sort.SliceStable(cluster, func(i, j int) bool {
|
||||
leftDuration := cluster[i].End - cluster[i].Start
|
||||
rightDuration := cluster[j].End - cluster[j].Start
|
||||
if leftDuration != rightDuration {
|
||||
return leftDuration < rightDuration
|
||||
}
|
||||
return model.SegmentLess(cluster[i], cluster[j])
|
||||
})
|
||||
|
||||
clusterKey := fmt.Sprintf("%d:%d", groupID, clusterIndex)
|
||||
for index := range cluster {
|
||||
ranks[cluster[index].SourceRef] = replacementOrder{
|
||||
cluster: clusterKey,
|
||||
rank: index,
|
||||
anchor: anchor,
|
||||
}
|
||||
}
|
||||
|
||||
clusterStart = clusterEnd
|
||||
clusterIndex++
|
||||
}
|
||||
|
||||
return ordered, ranks
|
||||
}
|
||||
|
||||
func resolvedSegmentLess(left model.Segment, right model.Segment, replacementOrder map[string]replacementOrder) bool {
|
||||
leftOrder, leftHasOrder := replacementOrder[left.SourceRef]
|
||||
rightOrder, rightHasOrder := replacementOrder[right.SourceRef]
|
||||
if leftHasOrder && rightHasOrder && leftOrder.cluster == rightOrder.cluster && leftOrder.rank != rightOrder.rank {
|
||||
return leftOrder.rank < rightOrder.rank
|
||||
}
|
||||
leftStart := left.Start
|
||||
if leftHasOrder {
|
||||
leftStart = leftOrder.anchor
|
||||
}
|
||||
rightStart := right.Start
|
||||
if rightHasOrder {
|
||||
rightStart = rightOrder.anchor
|
||||
}
|
||||
if leftStart != rightStart {
|
||||
return leftStart < rightStart
|
||||
}
|
||||
return model.SegmentLess(left, right)
|
||||
}
|
||||
|
||||
func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][]model.Segment) []string {
|
||||
seen := make(map[string]struct{}, len(group.Speakers))
|
||||
speakers := make([]string, 0, len(group.Speakers))
|
||||
|
||||
@@ -2,6 +2,7 @@ package overlap
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
@@ -14,7 +15,7 @@ func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -39,7 +40,7 @@ func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) {
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -93,7 +94,7 @@ func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) {
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 10)
|
||||
got, _, err := Resolve(merged, 10, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -116,7 +117,7 @@ func TestResolveWordRunGapThreshold(t *testing.T) {
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -141,7 +142,7 @@ func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) {
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -178,7 +179,7 @@ func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) {
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -201,7 +202,7 @@ func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -238,7 +239,7 @@ func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -273,7 +274,7 @@ func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -299,7 +300,7 @@ func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
got, summary, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
@@ -311,6 +312,93 @@ func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReordersNearStartWordRunsByDuration(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("short", 1.2, 1.3)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "short,long" {
|
||||
t.Fatalf("segment order = %s, want short,long", gotTexts(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Start != 1.2 || got.Segments[0].End != 1.3 {
|
||||
t.Fatalf("short segment bounds changed: %#v", got.Segments[0])
|
||||
}
|
||||
if got.Segments[1].SourceRef != "word-run:1:1:1" || got.Segments[1].Text != "long" {
|
||||
t.Fatalf("long segment provenance/text changed: %#v", got.Segments[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveDoesNotReorderWordRunsOutsideWindow(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("short", 1.5, 1.6)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "long,short" {
|
||||
t.Fatalf("segment order = %s, want long,short", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReordersTransitiveNearStartClustersByDuration(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("long", 1, 2)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("medium", 1.3, 1.8)),
|
||||
segmentWithWords("c.json", 0, "Carol", 1, 3, word("short", 1.65, 1.75)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0", "b.json#0", "c.json#0"}, []string{"Alice", "Bob", "Carol"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "short,medium,long" {
|
||||
t.Fatalf("segment order = %s, want short,medium,long", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReorderFallsBackToDeterministicOrderForEqualDurations(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("b.json", 0, "Bob", 1, 3, word("bob", 1, 1.5)),
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("alice", 1.2, 1.7)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"b.json#0", "a.json#0"}, []string{"Bob", "Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75, 0.4)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if gotTexts(got.Segments) != "bob,alice" {
|
||||
t.Fatalf("segment order = %s, want bob,alice", gotTexts(got.Segments))
|
||||
}
|
||||
}
|
||||
|
||||
func segmentWithWords(source string, sourceIndex int, speaker string, start float64, end float64, words ...model.Word) model.Segment {
|
||||
segment := segment(source, sourceIndex, speaker, start, end)
|
||||
segment.Words = words
|
||||
@@ -326,6 +414,14 @@ func word(text string, start float64, end float64) model.Word {
|
||||
}
|
||||
}
|
||||
|
||||
func gotTexts(segments []model.Segment) string {
|
||||
texts := make([]string, 0, len(segments))
|
||||
for _, segment := range segments {
|
||||
texts = append(texts, segment.Text)
|
||||
}
|
||||
return strings.Join(texts, ",")
|
||||
}
|
||||
|
||||
func untimedWord(text string) model.Word {
|
||||
return model.Word{
|
||||
Text: text,
|
||||
|
||||
Reference in New Issue
Block a user