diff --git a/README.md b/README.md index 58a5b5b..2287a36 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Environment variables: | Environment Variable | Default | Description | | --- | --- | --- | | `SERIATIM_OVERLAP_WORD_RUN_GAP` | `0.75` | Maximum gap in seconds between adjacent timed words when `resolve-overlaps` builds word-run replacement segments. Must be a positive float. | -| `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW` | `0.4` | Near-start window in seconds for ordering replacement word runs shortest-first. Must be a positive float. | +| `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW` | `1.0` | Near-start window in seconds for ordering replacement word runs shortest-first. Must be a positive float. | | `SERIATIM_BACKCHANNEL_MAX_DURATION` | `2.0` | Maximum duration in seconds for `backchannel` classification. Must be a positive float. | | `SERIATIM_FILLER_MAX_DURATION` | `1.25` | Maximum duration in seconds for `filler` classification. Must be a positive float. | @@ -279,7 +279,7 @@ For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word - The default word-run gap is `0.75` seconds. - Set `SERIATIM_OVERLAP_WORD_RUN_GAP` to a positive number of seconds to override the default. - Near-start replacement word runs are reordered so shorter segments come first when adjacent starts are within `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW`. -- The default word-run reorder window is `0.4` seconds. +- The default word-run reorder window is `1.0` seconds. - Set `SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW` to a positive number of seconds to override the default. - Replacement segment text is built by joining word text with single spaces. - Replacement segments include `source_ref` and `derived_from`. diff --git a/internal/cli/merge_test.go b/internal/cli/merge_test.go index 4f527a3..6c66cd4 100644 --- a/internal/cli/merge_test.go +++ b/internal/cli/merge_test.go @@ -8,6 +8,7 @@ import ( "strings" "testing" + "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/internal/report" "gitea.maximumdirect.net/eric/seriatim/schema" @@ -342,6 +343,8 @@ func TestMergeDetectsOverlapGroups(t *testing.T) { } func TestMergeResolvesOverlapGroupsWithWordRuns(t *testing.T) { + t.Setenv(config.WordRunReorderWindowEnv, "0.4") + dir := t.TempDir() inputA := writeJSONFile(t, dir, "a.json", `{ "segments": [ @@ -1677,6 +1680,75 @@ func TestMergeResolveOverlapsAbsorbsNearbyContext(t *testing.T) { } } +func TestMergeResolveOverlapsPreservesAbsorbedContextPrefix(t *testing.T) { + dir := t.TempDir() + inputA := writeJSONFile(t, dir, "a.json", `{ + "segments": [ + { + "start": 7, + "end": 9.95, + "text": "full context prefix near", + "words": [ + {"word": "full", "start": 7.1, "end": 7.2}, + {"word": "context", "start": 7.3, "end": 7.4}, + {"word": "prefix", "start": 7.5, "end": 7.6}, + {"word": "near", "start": 9.7, "end": 9.9} + ] + }, + { + "start": 10, + "end": 11, + "text": "inside", + "words": [ + {"word": "inside", "start": 10.5, "end": 10.7} + ] + } + ] + }`) + inputB := writeJSONFile(t, dir, "b.json", `{ + "segments": [ + { + "start": 10.2, + "end": 11, + "text": "bob", + "words": [ + {"word": "bob", "start": 10.4, "end": 10.6} + ] + } + ] + }`) + speakers := writeYAMLFile(t, dir, "speakers.yml", `match: + - speaker: Alice + match: ["a.json"] + - speaker: Bob + match: ["b.json"] +`) + output := filepath.Join(dir, "merged.json") + + err := executeMerge( + "--input-file", inputA, + "--input-file", inputB, + "--speakers", speakers, + "--output-schema", "minimal", + "--output-file", output, + ) + if err != nil { + t.Fatalf("merge failed: %v", err) + } + + var transcript schema.MinimalTranscript + readJSON(t, output, &transcript) + aliceText := make([]string, 0) + for _, segment := range transcript.Segments { + if segment.Speaker == "Alice" { + aliceText = append(aliceText, segment.Text) + } + } + if strings.Join(aliceText, " ") != "full context prefix near inside" { + t.Fatalf("expected full absorbed context prefix in Alice output, got %#v", transcript.Segments) + } +} + func TestInvalidTimingFails(t *testing.T) { tests := []struct { name string diff --git a/internal/config/config.go b/internal/config/config.go index bd350b2..f154c60 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -17,7 +17,7 @@ const ( DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text" DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output" DefaultOverlapWordRunGap = 0.75 - DefaultWordRunReorderWindow = 0.4 + DefaultWordRunReorderWindow = 1.0 DefaultCoalesceGap = 3.0 DefaultCoalesceGapValue = "3.0" DefaultBackchannelMaxDuration = 2.0 diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 3033392..787a214 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -191,7 +191,7 @@ func TestOverlapWordRunGapRejectsInvalidEnvOverride(t *testing.T) { } } -func TestWordRunReorderWindowDefaultsTo04(t *testing.T) { +func TestWordRunReorderWindowDefaultsTo1(t *testing.T) { t.Setenv(WordRunReorderWindowEnv, "") dir := t.TempDir() input := writeTempFile(t, dir, "input.json") diff --git a/internal/overlap/resolve.go b/internal/overlap/resolve.go index d2371a6..db680ed 100644 --- a/internal/overlap/resolve.go +++ b/internal/overlap/resolve.go @@ -171,7 +171,7 @@ func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToInde speakers := groupSpeakerOrder(group, segmentsBySpeaker) resolved := resolvedGroup{} for speakerIndex, speaker := range speakers { - timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], expandedStart, expandedEnd) + timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker]) if len(timedWords) == 0 { continue } @@ -303,7 +303,7 @@ func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][] return speakers } -func gatherResolutionWords(segments []model.Segment, groupStart float64, groupEnd float64) ([]resolutionWord, []resolutionWord) { +func gatherResolutionWords(segments []model.Segment) ([]resolutionWord, []resolutionWord) { timedWords := make([]resolutionWord, 0) untimedWords := make([]resolutionWord, 0) sequence := 0 @@ -321,9 +321,6 @@ func gatherResolutionWords(segments []model.Segment, groupStart float64, groupEn untimedWords = append(untimedWords, candidate) continue } - if word.End <= groupStart || word.Start >= groupEnd { - continue - } timedWords = append(timedWords, candidate) } } diff --git a/internal/overlap/resolve_test.go b/internal/overlap/resolve_test.go index 120cc70..9d1922b 100644 --- a/internal/overlap/resolve_test.go +++ b/internal/overlap/resolve_test.go @@ -72,7 +72,7 @@ func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) { } } -func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) { +func TestResolvePreservesAllWordsFromSelectedOverlapSegment(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{ segmentWithWords( @@ -101,11 +101,98 @@ func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) { if len(got.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(got.Segments)) } - if got.Segments[0].Text != "left-edge inside right-edge" { + if got.Segments[0].Text != "before left-edge inside right-edge after" { t.Fatalf("text = %q", got.Segments[0].Text) } } +func TestResolvePreservesPrefixWordsFromAbsorbedContextSegment(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords( + "a.json", + 0, + "Alice", + 7, + 9.5, + word("prefix", 7.2, 7.4), + word("near", 9.2, 9.4), + ), + segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + + got, _, err := Resolve(merged, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if gotTexts(got.Segments) != "prefix near inside,bob" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } +} + +func TestResolvePreservesSuffixWordsFromAbsorbedContextSegment(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords("a.json", 0, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords( + "a.json", + 1, + "Alice", + 12.5, + 16, + word("near", 12.7, 12.9), + word("suffix", 15.6, 15.8), + ), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + + got, _, err := Resolve(merged, 10, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if gotTexts(got.Segments) != "bob,inside near suffix" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } +} + +func TestResolveOutOfWindowWordsAffectWordRunBoundaries(t *testing.T) { + merged := model.MergedTranscript{ + Segments: []model.Segment{ + segmentWithWords( + "a.json", + 0, + "Alice", + 7, + 9.5, + word("early", 7.2, 7.4), + word("near", 9.2, 9.4), + ), + segmentWithWords("a.json", 1, "Alice", 10, 12, word("inside", 10.5, 10.7)), + segmentWithWords("b.json", 0, "Bob", 10.2, 11.2, word("bob", 10.4, 10.6)), + }, + OverlapGroups: []model.OverlapGroup{ + group(1, 10, 12, []string{"a.json#1", "b.json#0"}, []string{"Alice", "Bob"}), + }, + } + + got, _, err := Resolve(merged, 0.75, 0.4, 3) + if err != nil { + t.Fatalf("resolve failed: %v", err) + } + if gotTexts(got.Segments) != "early,near,bob,inside" { + t.Fatalf("segment texts = %s", gotTexts(got.Segments)) + } +} + func TestResolveIncludesContextWordsAroundOverlapWindow(t *testing.T) { merged := model.MergedTranscript{ Segments: []model.Segment{