From a3ca6665a9e7f9c173832b87a9063028f132c281 Mon Sep 17 00:00:00 2001 From: Eric Rakestraw Date: Tue, 28 Apr 2026 14:11:38 -0500 Subject: [PATCH] Minor updates to overlap detection and segment coalescing logic --- README.md | 13 +- architecture.md | 2 +- internal/backchannel/backchannel.go | 25 +++- internal/backchannel/backchannel_test.go | 44 ++++-- internal/builtin/postprocess.go | 6 +- internal/cli/merge_test.go | 83 ++++++++++- internal/coalesce/coalesce.go | 23 +-- internal/coalesce/coalesce_test.go | 45 ++++++ internal/config/config.go | 82 +++++++---- internal/config/config_test.go | 134 +++++++++++++++++ internal/filler/filler.go | 21 ++- internal/filler/filler_test.go | 42 +++++- internal/overlap/resolve.go | 57 ++++++- internal/overlap/resolve_test.go | 180 +++++++++++++++++++++-- 14 files changed, 662 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index f5ce48c..fc66720 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Global flags: | `--output-modules` | No | `json` | Comma-separated output modules. | | `--preprocessing-modules` | No | `validate-raw,normalize-speakers,trim-text` | Comma-separated preprocessing modules, evaluated in order. | | `--postprocessing-modules` | No | `detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output` | Comma-separated postprocessing modules, evaluated in order. | -| `--coalesce-gap` | No | `3.0` | Maximum same-speaker gap in seconds for `coalesce`. Must be a non-negative float. | +| `--coalesce-gap` | No | `3.0` | Maximum same-speaker gap in seconds for `coalesce`; also used as the `resolve-overlaps` context window. Must be a non-negative float. | Environment variables: @@ -59,6 +59,8 @@ Environment variables: | --- | --- | --- | | `SERIATIM_OVERLAP_WORD_RUN_GAP` | `0.75` | Maximum gap in seconds between adjacent timed words when `resolve-overlaps` builds word-run replacement segments. Must be a positive float. | | `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW` | `0.4` | Near-start window in seconds for ordering replacement word runs shortest-first. Must be a positive float. | +| `SERIATIM_BACKCHANNEL_MAX_DURATION` | `2.0` | Maximum duration in seconds for `backchannel` classification. Must be a positive float. | +| `SERIATIM_FILLER_MAX_DURATION` | `1.25` | Maximum duration in seconds for `filler` classification. Must be a positive float. | ## Input JSON Format @@ -241,7 +243,10 @@ The default postprocessing pipeline runs `detect-overlaps`, then `resolve-overla For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word timing to build smaller word-run replacement segments: -- Words are included when their interval intersects the overlap window: `word.end > group.start && word.start < group.end`. +- The resolution window expands the detected overlap group by `--coalesce-gap` seconds on both sides. +- Nearby same-speaker context segments are included when they intersect the expanded window and their start or end is within `--coalesce-gap` of the original overlap boundary. +- Words are included when their interval intersects the expanded resolution window. +- Context segments that are part of another detected overlap group are not pulled into the current group. - Untimed words are included in replacement text in original word order when nearby timed words create a replacement run. - Untimed words do not affect replacement segment start/end times or word-run gap splitting. - Words for the same speaker are merged into one run when the gap between adjacent words is no greater than `SERIATIM_OVERLAP_WORD_RUN_GAP`. @@ -266,7 +271,7 @@ The default pipeline runs `backchannel` before `coalesce`. It tags short acknowl "categories": ["backchannel"] ``` -Backchannel matching is case-insensitive, trims surrounding whitespace, and requires a matching acknowledgement phrase, no more than three whitespace-delimited words, and duration no greater than `1.0` second. +Backchannel matching is case-insensitive, ignores punctuation for matching and word-count purposes, trims surrounding whitespace, and requires a matching acknowledgement phrase, no more than three whitespace-delimited words, and duration no greater than `SERIATIM_BACKCHANNEL_MAX_DURATION` seconds. The default maximum duration is `2.0` seconds. ## Fillers @@ -276,7 +281,7 @@ The default pipeline runs `filler` after `backchannel` and before `coalesce`. It "categories": ["filler"] ``` -Filler matching is case-insensitive, trims surrounding whitespace, and requires only filler tokens such as `um`, `uh`, `er`, `erm`, `ah`, `eh`, `hmm`, `mm`, or repeated combinations of those tokens. Matching segments must contain no more than three whitespace-delimited words and have duration no greater than `1.0` second. +Filler matching is case-insensitive, ignores punctuation for matching and word-count purposes, trims surrounding whitespace, and requires only filler tokens such as `um`, `uh`, `er`, `erm`, `ah`, `eh`, `hmm`, `mm`, or repeated combinations of those tokens. Matching segments must contain no more than three whitespace-delimited words and have duration no greater than `SERIATIM_FILLER_MAX_DURATION` seconds. The default maximum duration is `1.25` seconds. ## Coalescing diff --git a/architecture.md b/architecture.md index d23d6a9..4e996db 100644 --- a/architecture.md +++ b/architecture.md @@ -357,7 +357,7 @@ Initial classifications may include: - `backchannel` - `crosstalk` -The `resolve-overlaps` module uses preserved word-level timing to replace detected overlap-group segments with smaller word-run segments when usable timing is available. Groups without usable word timing remain unresolved for later passes or human review. +The `resolve-overlaps` module uses preserved word-level timing to replace detected overlap-group segments with smaller word-run segments when usable timing is available. Resolution expands each overlap window by the configured coalesce gap so nearby same-speaker context can be absorbed into the replacement runs. Groups without usable word timing remain unresolved for later passes or human review. Overlap resolution should be non-destructive. Original segment text, timing, and source metadata must remain recoverable. diff --git a/internal/backchannel/backchannel.go b/internal/backchannel/backchannel.go index 5a897e2..335ab44 100644 --- a/internal/backchannel/backchannel.go +++ b/internal/backchannel/backchannel.go @@ -3,6 +3,7 @@ package backchannel import ( "regexp" "strings" + "unicode" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) @@ -10,16 +11,16 @@ import ( const Category = "backchannel" var patterns = []*regexp.Regexp{ - regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|uh[- ]huh|mhm|mm-hmm)\.?$`), + regexp.MustCompile(`(?i)^(yeah|yep|yes|right|okay|ok|sure|mm+h?m+|mm+\s+hmm|uh[- ]huh|mhm|mm-hmm)\.?$`), regexp.MustCompile(`(?i)^(yeah|yep|right|okay|ok)([,.\s]+(yeah|yep|right|okay|ok))*\.?$`), - regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good)\.?$`), + regexp.MustCompile(`(?i)^(i see|got it|makes sense|that makes sense|fair enough|sounds good|there you go)\.?$`), } // Apply tags matching short acknowledgement segments. -func Apply(in model.MergedTranscript) (model.MergedTranscript, int) { +func Apply(in model.MergedTranscript, maxDuration float64) (model.MergedTranscript, int) { tagged := 0 for index := range in.Segments { - if !matches(in.Segments[index]) { + if !matches(in.Segments[index], maxDuration) { continue } if hasCategory(in.Segments[index], Category) { @@ -31,15 +32,15 @@ func Apply(in model.MergedTranscript) (model.MergedTranscript, int) { return in, tagged } -func matches(segment model.Segment) bool { - text := strings.TrimSpace(segment.Text) +func matches(segment model.Segment, maxDuration float64) bool { + text := normalizeForMatching(segment.Text) if text == "" { return false } if len(strings.Fields(text)) > 3 { return false } - if segment.End-segment.Start > 1.0 { + if segment.End-segment.Start > maxDuration { return false } for _, pattern := range patterns { @@ -50,6 +51,16 @@ func matches(segment model.Segment) bool { return false } +func normalizeForMatching(text string) string { + text = strings.Map(func(r rune) rune { + if unicode.IsPunct(r) { + return ' ' + } + return r + }, text) + return strings.Join(strings.Fields(text), " ") +} + func hasCategory(segment model.Segment, category string) bool { for _, existing := range segment.Categories { if existing == category { diff --git a/internal/backchannel/backchannel_test.go b/internal/backchannel/backchannel_test.go index 291cbbb..c6c20d1 100644 --- a/internal/backchannel/backchannel_test.go +++ b/internal/backchannel/backchannel_test.go @@ -10,7 +10,7 @@ import ( func TestApplyTagsVerySafeBackchannels(t *testing.T) { for _, text := range []string{"yeah", "Yep.", "mmhm", "uh-huh", "mm-hmm"} { t.Run(text, func(t *testing.T) { - got, tagged := Apply(transcript(segment(text, 1, 1.5))) + got, tagged := Apply(transcript(segment(text, 1, 1.5)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } @@ -20,7 +20,7 @@ func TestApplyTagsVerySafeBackchannels(t *testing.T) { } func TestApplyTagsRepeatedBackchannels(t *testing.T) { - got, tagged := Apply(transcript(segment("Yeah, okay yep.", 1, 1.8))) + got, tagged := Apply(transcript(segment("Yeah, okay yep.", 1, 1.8)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } @@ -30,7 +30,7 @@ func TestApplyTagsRepeatedBackchannels(t *testing.T) { func TestApplyTagsShortAcknowledgements(t *testing.T) { for _, text := range []string{"i see", "Got it.", "sounds good"} { t.Run(text, func(t *testing.T) { - got, tagged := Apply(transcript(segment(text, 1, 1.8))) + got, tagged := Apply(transcript(segment(text, 1, 1.8)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } @@ -40,15 +40,27 @@ func TestApplyTagsShortAcknowledgements(t *testing.T) { } func TestApplyMatchesTrimAwareCaseInsensitive(t *testing.T) { - got, tagged := Apply(transcript(segment(" YES. ", 1, 1.2))) + got, tagged := Apply(transcript(segment(" YES. ", 1, 1.2)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } assertCategories(t, got.Segments[0], []string{Category}) } +func TestApplyIgnoresPunctuationWhenMatching(t *testing.T) { + for _, text := range []string{"Okay?!", "Yeah... okay?!", "that makes sense!", "mm-hmm.", "uh... huh"} { + t.Run(text, func(t *testing.T) { + got, tagged := Apply(transcript(segment(text, 1, 1.8)), 1.0) + if tagged != 1 { + t.Fatalf("tagged = %d, want 1", tagged) + } + assertCategories(t, got.Segments[0], []string{Category}) + }) + } +} + func TestApplyDoesNotTagNonMatches(t *testing.T) { - got, tagged := Apply(transcript(segment("yeah I think so", 1, 1.5))) + got, tagged := Apply(transcript(segment("yeah I think so", 1, 1.5)), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } @@ -56,15 +68,29 @@ func TestApplyDoesNotTagNonMatches(t *testing.T) { } func TestApplyRejectsWordCountOverThree(t *testing.T) { - got, tagged := Apply(transcript(segment("that makes sense okay", 1, 1.5))) + got, tagged := Apply(transcript(segment("that makes sense okay", 1, 1.5)), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } assertCategories(t, got.Segments[0], nil) } -func TestApplyRejectsDurationOverOneSecond(t *testing.T) { - got, tagged := Apply(transcript(segment("yeah", 1, 2.1))) +func TestApplyUsesConfiguredMaxDuration(t *testing.T) { + got, tagged := Apply(transcript(segment("yeah", 1, 2.1)), 2.0) + if tagged != 1 { + t.Fatalf("tagged = %d, want 1", tagged) + } + assertCategories(t, got.Segments[0], []string{Category}) + + got, tagged = Apply(transcript(segment("yeah", 1, 3.1)), 2.0) + if tagged != 0 { + t.Fatalf("tagged = %d, want 0", tagged) + } + assertCategories(t, got.Segments[0], nil) +} + +func TestApplyRejectsDurationOverConfiguredMax(t *testing.T) { + got, tagged := Apply(transcript(segment("yeah", 1, 2.1)), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } @@ -75,7 +101,7 @@ func TestApplyPreservesExistingCategoriesAndAvoidsDuplicate(t *testing.T) { existing := segment("yeah", 1, 1.2) existing.Categories = []string{"manual", Category} - got, tagged := Apply(transcript(existing)) + got, tagged := Apply(transcript(existing), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } diff --git a/internal/builtin/postprocess.go b/internal/builtin/postprocess.go index ff17a31..358b9c3 100644 --- a/internal/builtin/postprocess.go +++ b/internal/builtin/postprocess.go @@ -85,7 +85,7 @@ func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, c return model.MergedTranscript{}, nil, err } - resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap, cfg.WordRunReorderWindow) + resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap, cfg.WordRunReorderWindow, cfg.CoalesceGap) if err != nil { return model.MergedTranscript{}, nil, err } @@ -116,7 +116,7 @@ func (backchannelPostprocessor) Process(ctx context.Context, in model.MergedTran return model.MergedTranscript{}, nil, err } - out, tagged := backchannel.Apply(in) + out, tagged := backchannel.Apply(in, cfg.BackchannelMaxDuration) return out, []report.Event{ report.Info("postprocessing", "backchannel", fmt.Sprintf("tagged %d backchannel segment(s)", tagged)), }, nil @@ -133,7 +133,7 @@ func (fillerPostprocessor) Process(ctx context.Context, in model.MergedTranscrip return model.MergedTranscript{}, nil, err } - out, tagged := filler.Apply(in) + out, tagged := filler.Apply(in, cfg.FillerMaxDuration) return out, []report.Event{ report.Info("postprocessing", "filler", fmt.Sprintf("tagged %d filler segment(s)", tagged)), }, nil diff --git a/internal/cli/merge_test.go b/internal/cli/merge_test.go index 2ed86cd..a0e6cd5 100644 --- a/internal/cli/merge_test.go +++ b/internal/cli/merge_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "os" "path/filepath" + "reflect" "strings" "testing" @@ -317,7 +318,7 @@ func TestMergeResolvesOverlapGroupsWithWordRuns(t *testing.T) { t.Fatalf("segment count = %d, want %d", got, want) } - wantTexts := []string{"hello there", "bob reply", "later"} + wantTexts := []string{"outside hello there", "bob reply", "later"} wantSpeakers := []string{"Alice", "Bob", "Alice"} wantRefs := []string{"word-run:1:1:1", "word-run:1:2:1", "word-run:1:1:2"} for index, segment := range transcript.Segments { @@ -1496,6 +1497,86 @@ func TestMergeResolutionPreservesUntimedWordText(t *testing.T) { } } +func TestMergeResolveOverlapsAbsorbsNearbyContext(t *testing.T) { + dir := t.TempDir() + inputA := writeJSONFile(t, dir, "a.json", `{ + "segments": [ + { + "start": 9, + "end": 9.95, + "text": "before", + "words": [ + {"word": "before", "start": 9.7, "end": 9.9} + ] + }, + { + "start": 10, + "end": 11, + "text": "inside", + "words": [ + {"word": "inside", "start": 10.5, "end": 10.7} + ] + }, + { + "start": 11.1, + "end": 12, + "text": "after", + "words": [ + {"word": "after", "start": 11.2, "end": 11.3} + ] + } + ] + }`) + inputB := writeJSONFile(t, dir, "b.json", `{ + "segments": [ + { + "start": 10.2, + "end": 11, + "text": "bob", + "words": [ + {"word": "bob", "start": 10.4, "end": 10.6} + ] + } + ] + }`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `match: + - speaker: Alice + match: ["a.json"] + - speaker: Bob + match: ["b.json"] +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", inputA, + "--input-file", inputB, + "--speakers", speakers, + "--output-file", output, + ) + if err != nil { + t.Fatalf("merge failed: %v", err) + } + + var transcript model.FinalTranscript + readJSON(t, output, &transcript) + + var aliceSegments []model.Segment + for _, segment := range transcript.Segments { + if segment.Speaker == "Alice" { + aliceSegments = append(aliceSegments, segment) + } + } + if len(aliceSegments) != 1 { + t.Fatalf("Alice segment count = %d, want 1: %#v", len(aliceSegments), aliceSegments) + } + if aliceSegments[0].Text != "before inside after" { + t.Fatalf("Alice text = %q", aliceSegments[0].Text) + } + if !reflect.DeepEqual(aliceSegments[0].DerivedFrom, []string{inputA + "#0", inputA + "#1", inputA + "#2"}) { + t.Fatalf("Alice derived_from = %v", aliceSegments[0].DerivedFrom) + } +} + func TestInvalidTimingFails(t *testing.T) { tests := []struct { name string diff --git a/internal/coalesce/coalesce.go b/internal/coalesce/coalesce.go index 7925d75..6b8566e 100644 --- a/internal/coalesce/coalesce.go +++ b/internal/coalesce/coalesce.go @@ -57,17 +57,20 @@ func Apply(in model.MergedTranscript, gap float64) (model.MergedTranscript, Summ type run struct { segments []model.Segment + start float64 + end float64 } func newRun(segment model.Segment) run { return run{ segments: []model.Segment{segment}, + start: segment.Start, + end: segment.End, } } func (r run) canMerge(next model.Segment, gap float64) bool { - current := r.segments[len(r.segments)-1] - return current.Speaker == next.Speaker && next.Start-current.End <= gap + return r.speaker() == next.Speaker && next.Start-r.end <= gap } func (r run) speaker() string { @@ -76,6 +79,12 @@ func (r run) speaker() string { func (r *run) add(segment model.Segment) { r.segments = append(r.segments, segment) + if segment.Start < r.start { + r.start = segment.Start + } + if segment.End > r.end { + r.end = segment.End + } } func seedRunFromPending(pending []model.Segment, segment model.Segment, gap float64) (run, []model.Segment, bool) { @@ -129,19 +138,13 @@ func (r run) coalescedSegment(id int) model.Segment { SourceRef: fmt.Sprintf("coalesce:%d", id), DerivedFrom: make([]string, 0, len(r.segments)), Speaker: first.Speaker, - Start: first.Start, - End: first.End, + Start: r.start, + End: r.end, Words: make([]model.Word, 0), } text := make([]string, 0, len(r.segments)) for _, segment := range r.segments { - if segment.Start < merged.Start { - merged.Start = segment.Start - } - if segment.End > merged.End { - merged.End = segment.End - } if segment.Source != merged.Source { merged.Source = "derived" } diff --git a/internal/coalesce/coalesce_test.go b/internal/coalesce/coalesce_test.go index 380e3f7..73fd633 100644 --- a/internal/coalesce/coalesce_test.go +++ b/internal/coalesce/coalesce_test.go @@ -115,6 +115,51 @@ func TestApplyHonorsCurrentOrder(t *testing.T) { } } +func TestApplyUsesEffectiveRunEndForReorderedSegments(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segment("a.json", 0, "Alice", 10, 20, "long"), + segment("a.json", 1, "Alice", 1, 2, "early"), + segment("a.json", 2, "Alice", 22, 23, "after long"), + }, + } + + got, summary := Apply(merged, 3) + if summary.OriginalSegmentsMerged != 3 || summary.CoalescedSegments != 1 { + t.Fatalf("summary = %#v", summary) + } + if len(got.Segments) != 1 { + t.Fatalf("segment count = %d, want 1", len(got.Segments)) + } + if got.Segments[0].Text != "long early after long" { + t.Fatalf("text = %q", got.Segments[0].Text) + } + if got.Segments[0].Start != 1 || got.Segments[0].End != 23 { + t.Fatalf("bounds = %f-%f, want 1-23", got.Segments[0].Start, got.Segments[0].End) + } +} + +func TestApplyDoesNotMergeBeyondEffectiveRunEndGap(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segment("a.json", 0, "Alice", 10, 20, "long"), + segment("a.json", 1, "Alice", 1, 2, "early"), + segment("a.json", 2, "Alice", 23.1, 24, "too late"), + }, + } + + got, summary := Apply(merged, 3) + if summary.OriginalSegmentsMerged != 2 || summary.CoalescedSegments != 1 { + t.Fatalf("summary = %#v", summary) + } + if len(got.Segments) != 2 { + t.Fatalf("segment count = %d, want 2", len(got.Segments)) + } + if got.Segments[0].Text != "long early" || got.Segments[1].Text != "too late" { + t.Fatalf("segments = %#v", got.Segments) + } +} + func TestApplyDerivedProvenanceForMixedSourcesAndDerivedInputs(t *testing.T) { first := segment("a.json", 0, "Alice", 1, 2, "first") second := model.Segment{ diff --git a/internal/config/config.go b/internal/config/config.go index c1a8077..88b86db 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -11,16 +11,20 @@ import ( ) const ( - DefaultInputReader = "json-files" - DefaultOutputModules = "json" - DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text" - DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output" - DefaultOverlapWordRunGap = 0.75 - DefaultWordRunReorderWindow = 0.4 - DefaultCoalesceGap = 3.0 - DefaultCoalesceGapValue = "3.0" - OverlapWordRunGapEnv = "SERIATIM_OVERLAP_WORD_RUN_GAP" - WordRunReorderWindowEnv = "SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW" + DefaultInputReader = "json-files" + DefaultOutputModules = "json" + DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text" + DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output" + DefaultOverlapWordRunGap = 0.75 + DefaultWordRunReorderWindow = 0.4 + DefaultCoalesceGap = 3.0 + DefaultCoalesceGapValue = "3.0" + DefaultBackchannelMaxDuration = 2.0 + DefaultFillerMaxDuration = 1.25 + OverlapWordRunGapEnv = "SERIATIM_OVERLAP_WORD_RUN_GAP" + WordRunReorderWindowEnv = "SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW" + BackchannelMaxDurationEnv = "SERIATIM_BACKCHANNEL_MAX_DURATION" + FillerMaxDurationEnv = "SERIATIM_FILLER_MAX_DURATION" ) // MergeOptions captures raw CLI option values before validation. @@ -39,30 +43,34 @@ type MergeOptions struct { // Config is the validated runtime configuration for a merge invocation. type Config struct { - InputFiles []string - OutputFile string - ReportFile string - SpeakersFile string - AutocorrectFile string - InputReader string - OutputModules []string - PreprocessingModules []string - PostprocessingModules []string - OverlapWordRunGap float64 - WordRunReorderWindow float64 - CoalesceGap float64 + InputFiles []string + OutputFile string + ReportFile string + SpeakersFile string + AutocorrectFile string + InputReader string + OutputModules []string + PreprocessingModules []string + PostprocessingModules []string + OverlapWordRunGap float64 + WordRunReorderWindow float64 + CoalesceGap float64 + BackchannelMaxDuration float64 + FillerMaxDuration float64 } // NewMergeConfig validates raw merge options and returns normalized config. func NewMergeConfig(opts MergeOptions) (Config, error) { cfg := Config{ - InputReader: strings.TrimSpace(opts.InputReader), - OutputModules: nil, - PreprocessingModules: nil, - PostprocessingModules: nil, - OverlapWordRunGap: DefaultOverlapWordRunGap, - WordRunReorderWindow: DefaultWordRunReorderWindow, - CoalesceGap: DefaultCoalesceGap, + InputReader: strings.TrimSpace(opts.InputReader), + OutputModules: nil, + PreprocessingModules: nil, + PostprocessingModules: nil, + OverlapWordRunGap: DefaultOverlapWordRunGap, + WordRunReorderWindow: DefaultWordRunReorderWindow, + CoalesceGap: DefaultCoalesceGap, + BackchannelMaxDuration: DefaultBackchannelMaxDuration, + FillerMaxDuration: DefaultFillerMaxDuration, } if cfg.InputReader == "" { @@ -136,6 +144,14 @@ func NewMergeConfig(opts MergeOptions) (Config, error) { if err != nil { return Config{}, err } + cfg.BackchannelMaxDuration, err = parseBackchannelMaxDuration() + if err != nil { + return Config{}, err + } + cfg.FillerMaxDuration, err = parseFillerMaxDuration() + if err != nil { + return Config{}, err + } return cfg, nil } @@ -222,6 +238,14 @@ func parseWordRunReorderWindow() (float64, error) { return parsePositiveFloatEnv(WordRunReorderWindowEnv, DefaultWordRunReorderWindow) } +func parseBackchannelMaxDuration() (float64, error) { + return parsePositiveFloatEnv(BackchannelMaxDurationEnv, DefaultBackchannelMaxDuration) +} + +func parseFillerMaxDuration() (float64, error) { + return parsePositiveFloatEnv(FillerMaxDurationEnv, DefaultFillerMaxDuration) +} + func parseCoalesceGap(value string) (float64, error) { value = strings.TrimSpace(value) if value == "" { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 7892aa4..20b521f 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -206,6 +206,102 @@ func TestWordRunReorderWindowRejectsInvalidEnvOverride(t *testing.T) { } } +func TestBackchannelMaxDurationDefaultsTo2(t *testing.T) { + t.Setenv(BackchannelMaxDurationEnv, "") + dir := t.TempDir() + input := writeTempFile(t, dir, "input.json") + output := filepath.Join(dir, "merged.json") + + cfg, err := NewMergeConfig(MergeOptions{ + InputFiles: []string{input}, + OutputFile: output, + InputReader: DefaultInputReader, + OutputModules: DefaultOutputModules, + PreprocessingModules: DefaultPreprocessingModules, + PostprocessingModules: DefaultPostprocessingModules, + }) + if err != nil { + t.Fatalf("config failed: %v", err) + } + if cfg.BackchannelMaxDuration != DefaultBackchannelMaxDuration { + t.Fatalf("backchannel max duration = %f, want %f", cfg.BackchannelMaxDuration, DefaultBackchannelMaxDuration) + } +} + +func TestBackchannelMaxDurationUsesValidEnvOverride(t *testing.T) { + t.Setenv(BackchannelMaxDurationEnv, "1.5") + dir := t.TempDir() + input := writeTempFile(t, dir, "input.json") + output := filepath.Join(dir, "merged.json") + + cfg, err := NewMergeConfig(MergeOptions{ + InputFiles: []string{input}, + OutputFile: output, + InputReader: DefaultInputReader, + OutputModules: DefaultOutputModules, + PreprocessingModules: DefaultPreprocessingModules, + PostprocessingModules: DefaultPostprocessingModules, + }) + if err != nil { + t.Fatalf("config failed: %v", err) + } + if cfg.BackchannelMaxDuration != 1.5 { + t.Fatalf("backchannel max duration = %f, want 1.5", cfg.BackchannelMaxDuration) + } +} + +func TestBackchannelMaxDurationRejectsInvalidEnvOverride(t *testing.T) { + assertPositiveFloatEnvValidation(t, BackchannelMaxDurationEnv) +} + +func TestFillerMaxDurationDefaultsTo125(t *testing.T) { + t.Setenv(FillerMaxDurationEnv, "") + dir := t.TempDir() + input := writeTempFile(t, dir, "input.json") + output := filepath.Join(dir, "merged.json") + + cfg, err := NewMergeConfig(MergeOptions{ + InputFiles: []string{input}, + OutputFile: output, + InputReader: DefaultInputReader, + OutputModules: DefaultOutputModules, + PreprocessingModules: DefaultPreprocessingModules, + PostprocessingModules: DefaultPostprocessingModules, + }) + if err != nil { + t.Fatalf("config failed: %v", err) + } + if cfg.FillerMaxDuration != DefaultFillerMaxDuration { + t.Fatalf("filler max duration = %f, want %f", cfg.FillerMaxDuration, DefaultFillerMaxDuration) + } +} + +func TestFillerMaxDurationUsesValidEnvOverride(t *testing.T) { + t.Setenv(FillerMaxDurationEnv, "1.75") + dir := t.TempDir() + input := writeTempFile(t, dir, "input.json") + output := filepath.Join(dir, "merged.json") + + cfg, err := NewMergeConfig(MergeOptions{ + InputFiles: []string{input}, + OutputFile: output, + InputReader: DefaultInputReader, + OutputModules: DefaultOutputModules, + PreprocessingModules: DefaultPreprocessingModules, + PostprocessingModules: DefaultPostprocessingModules, + }) + if err != nil { + t.Fatalf("config failed: %v", err) + } + if cfg.FillerMaxDuration != 1.75 { + t.Fatalf("filler max duration = %f, want 1.75", cfg.FillerMaxDuration) + } +} + +func TestFillerMaxDurationRejectsInvalidEnvOverride(t *testing.T) { + assertPositiveFloatEnvValidation(t, FillerMaxDurationEnv) +} + func TestCoalesceGapDefaultsTo3(t *testing.T) { dir := t.TempDir() input := writeTempFile(t, dir, "input.json") @@ -306,6 +402,44 @@ func TestCoalesceGapRejectsInvalidOverride(t *testing.T) { } } +func assertPositiveFloatEnvValidation(t *testing.T, envName string) { + t.Helper() + + tests := []struct { + name string + value string + want string + }{ + {name: "non-numeric", value: "fast", want: "must be a positive number"}, + {name: "zero", value: "0", want: "must be positive"}, + {name: "negative", value: "-0.1", want: "must be positive"}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Setenv(envName, test.value) + dir := t.TempDir() + input := writeTempFile(t, dir, "input.json") + output := filepath.Join(dir, "merged.json") + + _, err := NewMergeConfig(MergeOptions{ + InputFiles: []string{input}, + OutputFile: output, + InputReader: DefaultInputReader, + OutputModules: DefaultOutputModules, + PreprocessingModules: DefaultPreprocessingModules, + PostprocessingModules: DefaultPostprocessingModules, + }) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), test.want) { + t.Fatalf("expected error to contain %q, got %v", test.want, err) + } + }) + } +} + func writeTempFile(t *testing.T, dir string, name string) string { t.Helper() diff --git a/internal/filler/filler.go b/internal/filler/filler.go index 3ad6e94..9fedb93 100644 --- a/internal/filler/filler.go +++ b/internal/filler/filler.go @@ -3,6 +3,7 @@ package filler import ( "regexp" "strings" + "unicode" "gitea.maximumdirect.net/eric/seriatim/internal/model" ) @@ -15,10 +16,10 @@ var patterns = []*regexp.Regexp{ } // Apply tags short filler segments. -func Apply(in model.MergedTranscript) (model.MergedTranscript, int) { +func Apply(in model.MergedTranscript, maxDuration float64) (model.MergedTranscript, int) { tagged := 0 for index := range in.Segments { - if !matches(in.Segments[index]) || hasCategory(in.Segments[index], Category) { + if !matches(in.Segments[index], maxDuration) || hasCategory(in.Segments[index], Category) { continue } in.Segments[index].Categories = append(in.Segments[index].Categories, Category) @@ -27,15 +28,15 @@ func Apply(in model.MergedTranscript) (model.MergedTranscript, int) { return in, tagged } -func matches(segment model.Segment) bool { - text := strings.TrimSpace(segment.Text) +func matches(segment model.Segment, maxDuration float64) bool { + text := normalizeForMatching(segment.Text) if text == "" { return false } if len(strings.Fields(text)) > 3 { return false } - if segment.End-segment.Start > 1.0 { + if segment.End-segment.Start > maxDuration { return false } for _, pattern := range patterns { @@ -46,6 +47,16 @@ func matches(segment model.Segment) bool { return false } +func normalizeForMatching(text string) string { + text = strings.Map(func(r rune) rune { + if unicode.IsPunct(r) { + return ' ' + } + return r + }, text) + return strings.Join(strings.Fields(text), " ") +} + func hasCategory(segment model.Segment, category string) bool { for _, existing := range segment.Categories { if existing == category { diff --git a/internal/filler/filler_test.go b/internal/filler/filler_test.go index 2a7a6b0..f4a7177 100644 --- a/internal/filler/filler_test.go +++ b/internal/filler/filler_test.go @@ -10,7 +10,7 @@ import ( func TestApplyTagsVerySafeFillers(t *testing.T) { for _, text := range []string{"um", "uhhh", "ER", "ermm", "ah", "eh", "hmmm", "mm", "mmm"} { t.Run(text, func(t *testing.T) { - got, tagged := Apply(transcript(segment(text, 1, 1.5))) + got, tagged := Apply(transcript(segment(text, 1, 1.5)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } @@ -20,7 +20,7 @@ func TestApplyTagsVerySafeFillers(t *testing.T) { } func TestApplyTagsRepeatedFillers(t *testing.T) { - got, tagged := Apply(transcript(segment("um uh hmm", 1, 1.8))) + got, tagged := Apply(transcript(segment("um uh hmm", 1, 1.8)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } @@ -28,17 +28,29 @@ func TestApplyTagsRepeatedFillers(t *testing.T) { } func TestApplyMatchesTrimAwareCaseInsensitive(t *testing.T) { - got, tagged := Apply(transcript(segment(" UM uh ", 1, 1.5))) + got, tagged := Apply(transcript(segment(" UM uh ", 1, 1.5)), 1.0) if tagged != 1 { t.Fatalf("tagged = %d, want 1", tagged) } assertCategories(t, got.Segments[0], []string{Category}) } +func TestApplyIgnoresPunctuationWhenMatching(t *testing.T) { + for _, text := range []string{"um.", "uh?!", "um, uh... hmm!", "hmm--mm"} { + t.Run(text, func(t *testing.T) { + got, tagged := Apply(transcript(segment(text, 1, 1.8)), 1.0) + if tagged != 1 { + t.Fatalf("tagged = %d, want 1", tagged) + } + assertCategories(t, got.Segments[0], []string{Category}) + }) + } +} + func TestApplyDoesNotTagNonMatches(t *testing.T) { for _, text := range []string{"um okay", "uh-huh", "hmm, okay"} { t.Run(text, func(t *testing.T) { - got, tagged := Apply(transcript(segment(text, 1, 1.5))) + got, tagged := Apply(transcript(segment(text, 1, 1.5)), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } @@ -48,15 +60,29 @@ func TestApplyDoesNotTagNonMatches(t *testing.T) { } func TestApplyRejectsWordCountOverThree(t *testing.T) { - got, tagged := Apply(transcript(segment("um uh er ah", 1, 1.5))) + got, tagged := Apply(transcript(segment("um uh er ah", 1, 1.5)), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } assertCategories(t, got.Segments[0], nil) } -func TestApplyRejectsDurationOverOneSecond(t *testing.T) { - got, tagged := Apply(transcript(segment("um", 1, 2.1))) +func TestApplyUsesConfiguredMaxDuration(t *testing.T) { + got, tagged := Apply(transcript(segment("um", 1, 2.2)), 1.25) + if tagged != 1 { + t.Fatalf("tagged = %d, want 1", tagged) + } + assertCategories(t, got.Segments[0], []string{Category}) + + got, tagged = Apply(transcript(segment("um", 1, 2.3)), 1.25) + if tagged != 0 { + t.Fatalf("tagged = %d, want 0", tagged) + } + assertCategories(t, got.Segments[0], nil) +} + +func TestApplyRejectsDurationOverConfiguredMax(t *testing.T) { + got, tagged := Apply(transcript(segment("um", 1, 2.1)), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } @@ -67,7 +93,7 @@ func TestApplyPreservesExistingCategoriesAndAvoidsDuplicate(t *testing.T) { existing := segment("um", 1, 1.2) existing.Categories = []string{"manual", Category} - got, tagged := Apply(transcript(existing)) + got, tagged := Apply(transcript(existing), 1.0) if tagged != 0 { t.Fatalf("tagged = %d, want 0", tagged) } diff --git a/internal/overlap/resolve.go b/internal/overlap/resolve.go index e23b4af..d2371a6 100644 --- a/internal/overlap/resolve.go +++ b/internal/overlap/resolve.go @@ -18,7 +18,7 @@ type ResolutionSummary struct { // Resolve replaces detected overlap-group segments with word-run segments when // word-level timing is available. -func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64) (model.MergedTranscript, ResolutionSummary, error) { +func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (model.MergedTranscript, ResolutionSummary, error) { summary := ResolutionSummary{ GroupsProcessed: len(in.OverlapGroups), } @@ -30,6 +30,12 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow for index, segment := range in.Segments { refToIndex[SegmentRef(segment)] = index } + overlapRefs := make(map[string]struct{}) + for _, group := range in.OverlapGroups { + for _, ref := range group.Segments { + overlapRefs[ref] = struct{}{} + } + } removeRefs := make(map[string]struct{}) clearAnnotationRefs := make(map[string]struct{}) @@ -38,7 +44,7 @@ func Resolve(in model.MergedTranscript, wordRunGap float64, wordRunReorderWindow replacementOrder := make(map[string]replacementOrder) for _, group := range in.OverlapGroups { - resolved, err := resolveGroup(in, group, refToIndex, wordRunGap, wordRunReorderWindow) + resolved, err := resolveGroup(in, group, refToIndex, overlapRefs, wordRunGap, wordRunReorderWindow, contextWindow) if err != nil { return model.MergedTranscript{}, ResolutionSummary{}, err } @@ -125,15 +131,39 @@ type wordRun struct { end float64 } -func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64, wordRunReorderWindow float64) (resolvedGroup, error) { +func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, overlapRefs map[string]struct{}, wordRunGap float64, wordRunReorderWindow float64, contextWindow float64) (resolvedGroup, error) { segmentsBySpeaker := make(map[string][]model.Segment) refsBySpeaker := make(map[string][]string) + groupRefs := make(map[string]struct{}, len(group.Segments)) + groupSpeakers := make(map[string]struct{}) for _, ref := range group.Segments { index, exists := refToIndex[ref] if !exists { return resolvedGroup{}, fmt.Errorf("overlap group %d references missing segment %q", group.ID, ref) } + groupRefs[ref] = struct{}{} segment := in.Segments[index] + groupSpeakers[segment.Speaker] = struct{}{} + } + + expandedStart := group.Start - contextWindow + expandedEnd := group.End + contextWindow + for _, segment := range in.Segments { + ref := SegmentRef(segment) + if _, exists := groupRefs[ref]; !exists { + if _, exists := overlapRefs[ref]; exists { + continue + } + if _, exists := groupSpeakers[segment.Speaker]; !exists { + continue + } + if !intervalIntersects(segment.Start, segment.End, expandedStart, expandedEnd) { + continue + } + if !segmentNearGroupBoundary(segment, group, contextWindow) { + continue + } + } segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment) refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref) } @@ -141,7 +171,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde speakers := groupSpeakerOrder(group, segmentsBySpeaker) resolved := resolvedGroup{} for speakerIndex, speaker := range speakers { - timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], group.Start, group.End) + timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], expandedStart, expandedEnd) if len(timedWords) == 0 { continue } @@ -162,6 +192,25 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde return resolved, nil } +func intervalIntersects(start float64, end float64, windowStart float64, windowEnd float64) bool { + return end > windowStart && start < windowEnd +} + +func segmentNearGroupBoundary(segment model.Segment, group model.OverlapGroup, window float64) bool { + return withinWindow(segment.Start, group.Start, window) || + withinWindow(segment.End, group.Start, window) || + withinWindow(segment.Start, group.End, window) || + withinWindow(segment.End, group.End, window) +} + +func withinWindow(value float64, boundary float64, window float64) bool { + diff := value - boundary + if diff < 0 { + diff = -diff + } + return diff <= window +} + func reorderReplacementSegments(groupID int, replacements []model.Segment, wordRunReorderWindow float64) ([]model.Segment, map[string]replacementOrder) { if len(replacements) == 0 { return replacements, nil diff --git a/internal/overlap/resolve_test.go b/internal/overlap/resolve_test.go index 05d2674..120cc70 100644 --- a/internal/overlap/resolve_test.go +++ b/internal/overlap/resolve_test.go @@ -15,7 +15,7 @@ func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) { }, } - got, summary, err := Resolve(merged, 0.75, 0.4) + got, summary, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -40,7 +40,7 @@ func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) { merged.Segments[0].OverlapGroupID = 1 merged.Segments[1].OverlapGroupID = 1 - got, summary, err := Resolve(merged, 0.75, 0.4) + got, summary, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -94,7 +94,7 @@ func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) { } merged.Segments[0].OverlapGroupID = 1 - got, _, err := Resolve(merged, 10, 0.4) + got, _, err := Resolve(merged, 10, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -106,6 +106,158 @@ func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) { } } +func TestResolveIncludesContextWordsAroundOverlapWindow(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 7.5, 9.5, word("before", 8.5, 8.7)), + segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords("a.json", 2, "Alice", 12.5, 13.5, word("after", 13, 13.2)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + merged.Segments[1].OverlapGroupID = 1 + merged.Segments[3].OverlapGroupID = 1 + + got, summary, err := Resolve(merged, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if summary.GroupsChanged != 1 || summary.OriginalsRemoved != 4 || summary.ReplacementsCreated != 2 { + t.Fatalf("unexpected summary: %#v", summary) + } + if gotTexts(got.Segments) != "before inside after,bob" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } + alice := got.Segments[0] + if alice.Start != 8.5 || alice.End != 13.2 { + t.Fatalf("context bounds = %f-%f, want 8.5-13.2", alice.Start, alice.End) + } + if !reflect.DeepEqual(alice.DerivedFrom, []string{"a.json#0", "a.json#1", "a.json#2"}) { + t.Fatalf("derived_from = %v", alice.DerivedFrom) + } +} + +func TestResolveDoesNotIncludeContextOutsideWindow(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 5, 6.9, word("outside", 6, 6.2)), + segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + + got, _, err := Resolve(merged, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if gotTexts(got.Segments) != "Alice,bob,inside" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } + if got.Segments[0].SourceSegmentIndex == nil || *got.Segments[0].SourceSegmentIndex != 0 { + t.Fatalf("outside context segment was not preserved: %#v", got.Segments[0]) + } +} + +func TestResolveDoesNotIncludeNearbyNonGroupSpeakerContext(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 10, 12, word("alice", 10.5, 10.7)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + segmentWithWords("c.json", 0, "Carol", 12.5, 13.5, word("carol", 13, 13.2)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + + got, _, err := Resolve(merged, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if gotTexts(got.Segments) != "bob,alice,Carol" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } + if got.Segments[2].SourceSegmentIndex == nil || *got.Segments[2].SourceSegmentIndex != 0 { + t.Fatalf("non-group speaker context segment was not preserved: %#v", got.Segments[2]) + } +} + +func TestResolveRemovesIncludedContextSegmentsForReplacedSpeaker(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 8, 9, word("before", 8.5, 8.7)), + segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + merged.Segments[1].OverlapGroupID = 1 + merged.Segments[2].OverlapGroupID = 1 + + got, summary, err := Resolve(merged, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if summary.OriginalsRemoved != 2 || summary.ReplacementsCreated != 1 { + t.Fatalf("unexpected summary: %#v", summary) + } + if gotTexts(got.Segments) != "before inside,Bob" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } + if got.Segments[1].OverlapGroupID != 0 { + t.Fatalf("kept original group annotation = %d, want 0", got.Segments[1].OverlapGroupID) + } +} + +func TestResolveSkipsContextSegmentReferencedByAnotherOverlapGroup(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 8, 9, word("other-group", 8.5, 8.7)), + segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + segmentWithWords("c.json", 0, "Carol", 8.5, 9.5), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}), + group(2, 8, 9.5, []string{"a.json#0", "c.json#0"}, []string{"Alice", "Carol"}), + }, + } + merged.Segments[0].OverlapGroupID = 2 + merged.Segments[1].OverlapGroupID = 1 + merged.Segments[2].OverlapGroupID = 1 + merged.Segments[3].OverlapGroupID = 2 + + refToIndex := map[string]int{} + for index, segment := range merged.Segments { + refToIndex[SegmentRef(segment)] = index + } + overlapRefs := map[string]struct{}{ + "a.json#0": {}, + "a.json#1": {}, + "b.json#0": {}, + "c.json#0": {}, + } + + resolved, err := resolveGroup(merged, merged.OverlapGroups[0], refToIndex, overlapRefs, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if !reflect.DeepEqual(resolved.removeRefs, []string{"a.json#1", "b.json#0"}) { + t.Fatalf("remove refs = %v", resolved.removeRefs) + } + if gotTexts(resolved.replacements) != "bob,inside" { + t.Fatalf("replacement texts = %s", gotTexts(resolved.replacements)) + } +} + func TestResolveWordRunGapThreshold(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ @@ -117,7 +269,7 @@ func TestResolveWordRunGapThreshold(t *testing.T) { } merged.Segments[0].OverlapGroupID = 1 - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -142,7 +294,7 @@ func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) { merged.Segments[0].OverlapGroupID = 1 merged.Segments[1].OverlapGroupID = 1 - got, summary, err := Resolve(merged, 0.75, 0.4) + got, summary, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -179,7 +331,7 @@ func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) { merged.Segments[0].OverlapGroupID = 1 merged.Segments[1].OverlapGroupID = 1 - got, summary, err := Resolve(merged, 0.75, 0.4) + got, summary, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -202,7 +354,7 @@ func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) { }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -239,7 +391,7 @@ func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) { }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -274,7 +426,7 @@ func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) { }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -300,7 +452,7 @@ func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) { } merged.Segments[0].OverlapGroupID = 1 - got, summary, err := Resolve(merged, 0.75, 0.4) + got, summary, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -323,7 +475,7 @@ func TestResolveReordersNearStartWordRunsByDuration(t *testing.T) { }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -349,7 +501,7 @@ func TestResolveDoesNotReorderWordRunsOutsideWindow(t *testing.T) { }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -370,7 +522,7 @@ func TestResolveReordersTransitiveNearStartClustersByDuration(t *testing.T) { }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) } @@ -390,7 +542,7 @@ func TestResolveReorderFallsBackToDeterministicOrderForEqualDurations(t *testing }, } - got, _, err := Resolve(merged, 0.75, 0.4) + got, _, err := Resolve(merged, 0.75, 0.4, 0) if err != nil { t.Fatalf("resolve failed: %v", err) }