Added initial segment overlap resolution logic
This commit is contained in:
56
README.md
56
README.md
@@ -2,7 +2,7 @@
|
||||
|
||||
`seriatim` merges per-speaker WhisperX-style JSON transcripts into a single JSON transcript that preserves speaker identity and chronological order.
|
||||
|
||||
The current implementation supports the `merge` command. It reads one or more input JSON files, optionally maps each input file to a canonical speaker using `speakers.yml`, sorts all segments by timestamp, assigns consecutive numeric `id` values, and writes a merged JSON artifact.
|
||||
The current implementation supports the `merge` command. It reads one or more input JSON files, optionally maps each input file to a canonical speaker using `speakers.yml`, sorts all segments by timestamp, detects and resolves overlaps when word-level timing is available, assigns consecutive numeric `id` values, and writes a merged JSON artifact.
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -56,7 +56,11 @@ Each input file must be valid JSON with a top-level `segments` array. The curren
|
||||
{
|
||||
"start": 1.25,
|
||||
"end": 3.5,
|
||||
"text": "Hello there."
|
||||
"text": "Hello there.",
|
||||
"words": [
|
||||
{"word": "Hello", "start": 1.25, "end": 1.55, "score": 0.98},
|
||||
{"word": "there.", "start": 1.7, "end": 2.0}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -68,7 +72,16 @@ Required segment fields:
|
||||
- `end`: number, must be `>= start`.
|
||||
- `text`: string.
|
||||
|
||||
Other WhisperX fields, including `words` and raw diarization speaker labels, are ignored for now.
|
||||
Optional word fields:
|
||||
|
||||
- `words`: array of word timing objects.
|
||||
- `words[].word`: string.
|
||||
- `words[].start`: optional number, must be `>= 0` when present.
|
||||
- `words[].end`: optional number, must be `>= start` when present with `start`.
|
||||
- `words[].score`: optional number.
|
||||
- `words[].speaker`: optional raw speaker label string.
|
||||
|
||||
Word-level timing is preserved internally for overlap resolution. If a word is missing `start` or `end`, seriatim keeps the word text, emits a warning in the optional report, and does not use that word as a timing anchor. Word timing is not emitted in the final JSON artifact.
|
||||
|
||||
## Speaker Map Format
|
||||
|
||||
@@ -150,6 +163,16 @@ The merged output uses the current seriatim envelope:
|
||||
"end": 3.5,
|
||||
"text": "Hello there.",
|
||||
"overlap_group_id": 1
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"source": "eric.json",
|
||||
"source_ref": "word-run:1:1:1",
|
||||
"derived_from": ["eric.json#0"],
|
||||
"speaker": "Eric Rakestraw",
|
||||
"start": 2.0,
|
||||
"end": 2.5,
|
||||
"text": "Resolved word run"
|
||||
}
|
||||
],
|
||||
"overlap_groups": [
|
||||
@@ -169,7 +192,7 @@ The merged output uses the current seriatim envelope:
|
||||
Segments are sorted deterministically by:
|
||||
|
||||
```text
|
||||
(start, end, source, source_segment_index, speaker)
|
||||
(start, end, source, source_segment_index/source_ref, speaker)
|
||||
```
|
||||
|
||||
Final segment IDs are assigned after sorting and start at `1`.
|
||||
@@ -187,7 +210,27 @@ Overlap behavior:
|
||||
- Segments in detected groups receive `overlap_group_id`.
|
||||
- `overlap_groups[].segments` contains stable references in `source#source_segment_index` format.
|
||||
- `class` is currently `unknown`.
|
||||
- `resolution` is currently `unresolved`; overlap resolution is still a no-op.
|
||||
- `resolution` is `unresolved` until `resolve-overlaps` replaces the group.
|
||||
|
||||
## Overlap Resolution
|
||||
|
||||
The default postprocessing pipeline runs `resolve-overlaps` after `detect-overlaps`.
|
||||
|
||||
For each detected overlap group, `resolve-overlaps` uses preserved WhisperX word timing to build smaller word-run replacement segments:
|
||||
|
||||
- Words are included when their interval intersects the overlap window: `word.end > group.start && word.start < group.end`.
|
||||
- Untimed words are included in replacement text in original word order when nearby timed words create a replacement run.
|
||||
- Untimed words do not affect replacement segment start/end times or word-run gap splitting.
|
||||
- Words for the same speaker are merged into one run when the gap between adjacent words is no greater than `SERIATIM_OVERLAP_WORD_RUN_GAP`.
|
||||
- The default word-run gap is `0.75` seconds.
|
||||
- Set `SERIATIM_OVERLAP_WORD_RUN_GAP` to a positive number of seconds to override the default.
|
||||
- Replacement segment text is built by joining word text with single spaces.
|
||||
- Replacement segments include `source_ref` and `derived_from`.
|
||||
- Replacement segments omit `source_segment_index` because they are derived from one or more original segments.
|
||||
- Resolved overlap groups are removed from `overlap_groups`.
|
||||
- Replacement segments are left without `overlap_group_id`; future passes can detect any remaining overlap.
|
||||
- If a speaker has no usable word timing in a group, that speaker's original segment is kept.
|
||||
- If no speakers in a group have usable word timing, the original group and annotations remain unchanged.
|
||||
|
||||
## Autocorrect
|
||||
|
||||
@@ -227,6 +270,5 @@ Matching behavior:
|
||||
## Current Limitations
|
||||
|
||||
- Only JSON input is supported.
|
||||
- Word-level timing data is not preserved yet.
|
||||
- Overlap resolution is currently a no-op module.
|
||||
- Overlap resolution depends on WhisperX word timing; groups without usable word timing remain unresolved.
|
||||
- Coalescing and alternate output formats are not implemented yet.
|
||||
|
||||
@@ -23,17 +23,18 @@ func (jsonFilesReader) Read(ctx context.Context, cfg config.Config) ([]model.Raw
|
||||
}
|
||||
|
||||
raw := make([]model.RawTranscript, 0, len(cfg.InputFiles))
|
||||
events := make([]report.Event, 0, len(cfg.InputFiles)+1)
|
||||
for _, inputFile := range cfg.InputFiles {
|
||||
transcript, err := readRawTranscript(inputFile)
|
||||
transcript, newEvents, err := readRawTranscript(inputFile)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
raw = append(raw, transcript)
|
||||
events = append(events, newEvents...)
|
||||
}
|
||||
|
||||
return raw, []report.Event{
|
||||
report.Info("input", "json-files", fmt.Sprintf("decoded %d input file(s)", len(raw))),
|
||||
}, nil
|
||||
events = append(events, report.Info("input", "json-files", fmt.Sprintf("decoded %d input file(s)", len(raw))))
|
||||
return raw, events, nil
|
||||
}
|
||||
|
||||
type rawTranscriptFile struct {
|
||||
@@ -44,70 +45,163 @@ type rawSegmentFile struct {
|
||||
Start json.RawMessage `json:"start"`
|
||||
End json.RawMessage `json:"end"`
|
||||
Text json.RawMessage `json:"text"`
|
||||
Words json.RawMessage `json:"words"`
|
||||
}
|
||||
|
||||
func readRawTranscript(path string) (model.RawTranscript, error) {
|
||||
type rawWordFile struct {
|
||||
Word json.RawMessage `json:"word"`
|
||||
Start json.RawMessage `json:"start"`
|
||||
End json.RawMessage `json:"end"`
|
||||
Score json.RawMessage `json:"score"`
|
||||
Speaker json.RawMessage `json:"speaker"`
|
||||
}
|
||||
|
||||
func readRawTranscript(path string) (model.RawTranscript, []report.Event, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return model.RawTranscript{}, fmt.Errorf("read input file %q: %w", path, err)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("read input file %q: %w", path, err)
|
||||
}
|
||||
|
||||
var parsed rawTranscriptFile
|
||||
if err := json.Unmarshal(data, &parsed); err != nil {
|
||||
return model.RawTranscript{}, fmt.Errorf("parse input file %q: %w", path, err)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("parse input file %q: %w", path, err)
|
||||
}
|
||||
if parsed.Segments == nil || isJSONNull(parsed.Segments) {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q must contain top-level segments array", path)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q must contain top-level segments array", path)
|
||||
}
|
||||
|
||||
var rawSegments []rawSegmentFile
|
||||
if err := json.Unmarshal(parsed.Segments, &rawSegments); err != nil {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q top-level segments must be an array: %w", path, err)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q top-level segments must be an array: %w", path, err)
|
||||
}
|
||||
|
||||
segments := make([]model.RawSegment, 0, len(rawSegments))
|
||||
events := make([]report.Event, 0)
|
||||
for index, segment := range rawSegments {
|
||||
if segment.Start == nil || isJSONNull(segment.Start) {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing numeric start", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d missing numeric start", path, index)
|
||||
}
|
||||
if segment.End == nil || isJSONNull(segment.End) {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing numeric end", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d missing numeric end", path, index)
|
||||
}
|
||||
if segment.Text == nil || isJSONNull(segment.Text) {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d missing string text", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d missing string text", path, index)
|
||||
}
|
||||
|
||||
var start float64
|
||||
if err := json.Unmarshal(segment.Start, &start); err != nil {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d start must be numeric", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d start must be numeric", path, index)
|
||||
}
|
||||
var end float64
|
||||
if err := json.Unmarshal(segment.End, &end); err != nil {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d end must be numeric", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d end must be numeric", path, index)
|
||||
}
|
||||
var text string
|
||||
if err := json.Unmarshal(segment.Text, &text); err != nil {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d text must be a string", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d text must be a string", path, index)
|
||||
}
|
||||
|
||||
if start < 0 {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d has negative start", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d has negative start", path, index)
|
||||
}
|
||||
if end < start {
|
||||
return model.RawTranscript{}, fmt.Errorf("input file %q segment %d has end before start", path, index)
|
||||
return model.RawTranscript{}, nil, fmt.Errorf("input file %q segment %d has end before start", path, index)
|
||||
}
|
||||
|
||||
words, newEvents, err := parseRawWords(path, index, segment.Words)
|
||||
if err != nil {
|
||||
return model.RawTranscript{}, nil, err
|
||||
}
|
||||
events = append(events, newEvents...)
|
||||
|
||||
segments = append(segments, model.RawSegment{
|
||||
Start: start,
|
||||
End: end,
|
||||
Text: text,
|
||||
Words: words,
|
||||
})
|
||||
}
|
||||
|
||||
return model.RawTranscript{
|
||||
Source: path,
|
||||
Segments: segments,
|
||||
}, nil
|
||||
}, events, nil
|
||||
}
|
||||
|
||||
func parseRawWords(path string, segmentIndex int, raw json.RawMessage) ([]model.Word, []report.Event, error) {
|
||||
if raw == nil || isJSONNull(raw) {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
var rawWords []rawWordFile
|
||||
if err := json.Unmarshal(raw, &rawWords); err != nil {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d words must be an array: %w", path, segmentIndex, err)
|
||||
}
|
||||
|
||||
words := make([]model.Word, 0, len(rawWords))
|
||||
events := make([]report.Event, 0)
|
||||
for wordIndex, rawWord := range rawWords {
|
||||
if rawWord.Word == nil || isJSONNull(rawWord.Word) {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d missing string word", path, segmentIndex, wordIndex)
|
||||
}
|
||||
|
||||
var text string
|
||||
if err := json.Unmarshal(rawWord.Word, &text); err != nil {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d word must be a string", path, segmentIndex, wordIndex)
|
||||
}
|
||||
|
||||
word := model.Word{
|
||||
Text: text,
|
||||
}
|
||||
|
||||
hasStart := rawWord.Start != nil && !isJSONNull(rawWord.Start)
|
||||
hasEnd := rawWord.End != nil && !isJSONNull(rawWord.End)
|
||||
var start float64
|
||||
var end float64
|
||||
if hasStart {
|
||||
if err := json.Unmarshal(rawWord.Start, &start); err != nil {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d start must be numeric", path, segmentIndex, wordIndex)
|
||||
}
|
||||
if start < 0 {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d has negative start", path, segmentIndex, wordIndex)
|
||||
}
|
||||
}
|
||||
if hasEnd {
|
||||
if err := json.Unmarshal(rawWord.End, &end); err != nil {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d end must be numeric", path, segmentIndex, wordIndex)
|
||||
}
|
||||
if end < 0 {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d has negative end", path, segmentIndex, wordIndex)
|
||||
}
|
||||
}
|
||||
if hasStart && hasEnd {
|
||||
if end < start {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d has end before start", path, segmentIndex, wordIndex)
|
||||
}
|
||||
word.Start = start
|
||||
word.End = end
|
||||
word.Timed = true
|
||||
} else {
|
||||
events = append(events, report.Warning(
|
||||
"input",
|
||||
"json-files",
|
||||
fmt.Sprintf("input file %q segment %d word %d %q has no complete timing and will not anchor overlap resolution", path, segmentIndex, wordIndex, text),
|
||||
))
|
||||
}
|
||||
if rawWord.Score != nil && !isJSONNull(rawWord.Score) {
|
||||
if err := json.Unmarshal(rawWord.Score, &word.Score); err != nil {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d score must be numeric", path, segmentIndex, wordIndex)
|
||||
}
|
||||
}
|
||||
if rawWord.Speaker != nil && !isJSONNull(rawWord.Speaker) {
|
||||
if err := json.Unmarshal(rawWord.Speaker, &word.Speaker); err != nil {
|
||||
return nil, nil, fmt.Errorf("input file %q segment %d word %d speaker must be a string", path, segmentIndex, wordIndex)
|
||||
}
|
||||
}
|
||||
words = append(words, word)
|
||||
}
|
||||
|
||||
return words, events, nil
|
||||
}
|
||||
|
||||
func isJSONNull(value json.RawMessage) bool {
|
||||
|
||||
@@ -26,21 +26,7 @@ func (placeholderMerger) Merge(ctx context.Context, in []model.CanonicalTranscri
|
||||
}
|
||||
|
||||
sort.SliceStable(segments, func(i, j int) bool {
|
||||
left := segments[i]
|
||||
right := segments[j]
|
||||
if left.Start != right.Start {
|
||||
return left.Start < right.Start
|
||||
}
|
||||
if left.End != right.End {
|
||||
return left.End < right.End
|
||||
}
|
||||
if left.Source != right.Source {
|
||||
return left.Source < right.Source
|
||||
}
|
||||
if left.SourceSegmentIndex != right.SourceSegmentIndex {
|
||||
return left.SourceSegmentIndex < right.SourceSegmentIndex
|
||||
}
|
||||
return left.Speaker < right.Speaker
|
||||
return model.SegmentLess(segments[i], segments[j])
|
||||
})
|
||||
|
||||
return model.MergedTranscript{
|
||||
|
||||
@@ -66,6 +66,37 @@ func (detectOverlaps) Process(ctx context.Context, in model.MergedTranscript, cf
|
||||
}, nil
|
||||
}
|
||||
|
||||
type resolveOverlaps struct{}
|
||||
|
||||
func (resolveOverlaps) Name() string {
|
||||
return "resolve-overlaps"
|
||||
}
|
||||
|
||||
func (resolveOverlaps) Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
resolved, summary, err := overlap.Resolve(in, cfg.OverlapWordRunGap)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, nil, err
|
||||
}
|
||||
|
||||
return resolved, []report.Event{
|
||||
report.Info(
|
||||
"postprocessing",
|
||||
"resolve-overlaps",
|
||||
fmt.Sprintf(
|
||||
"processed %d overlap group(s); changed %d; removed %d original segment(s); created %d replacement segment(s)",
|
||||
summary.GroupsProcessed,
|
||||
summary.GroupsChanged,
|
||||
summary.OriginalsRemoved,
|
||||
summary.ReplacementsCreated,
|
||||
),
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type autocorrectPostprocessor struct{}
|
||||
|
||||
func (autocorrectPostprocessor) Name() string {
|
||||
|
||||
@@ -123,13 +123,15 @@ func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessStat
|
||||
|
||||
segments := make([]model.Segment, 0, len(raw.Segments))
|
||||
for index, rawSegment := range raw.Segments {
|
||||
sourceSegmentIndex := index
|
||||
segments = append(segments, model.Segment{
|
||||
Source: raw.Source,
|
||||
SourceSegmentIndex: index,
|
||||
SourceSegmentIndex: &sourceSegmentIndex,
|
||||
Speaker: canonicalSpeaker,
|
||||
Start: rawSegment.Start,
|
||||
End: rawSegment.End,
|
||||
Text: rawSegment.Text,
|
||||
Words: append([]model.Word(nil), rawSegment.Words...),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ func NewRegistry() *pipeline.Registry {
|
||||
registry.RegisterPreprocessor(trimText{})
|
||||
registry.RegisterMerger(placeholderMerger{})
|
||||
registry.RegisterPostprocessor(detectOverlaps{})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "resolve-overlaps"})
|
||||
registry.RegisterPostprocessor(resolveOverlaps{})
|
||||
registry.RegisterPostprocessor(assignIDs{})
|
||||
registry.RegisterPostprocessor(noopPostprocessor{name: "validate-output"})
|
||||
registry.RegisterPostprocessor(autocorrectPostprocessor{})
|
||||
|
||||
@@ -15,7 +15,7 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{"start": 10, "end": 11, "text": " second a ", "words": [{"word": "ignored"}]},
|
||||
{"start": 10, "end": 11, "text": " second a ", "words": [{"word": "ignored", "start": 10.1, "end": 10.2}]},
|
||||
{"start": 1, "end": 2, "text": "first a"}
|
||||
]
|
||||
}`)
|
||||
@@ -217,6 +217,107 @@ func TestMergeDetectsOverlapGroups(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeResolvesOverlapGroupsWithWordRuns(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1,
|
||||
"end": 5,
|
||||
"text": "alice original",
|
||||
"words": [
|
||||
{"word": "outside", "start": 0.5, "end": 1.0},
|
||||
{"word": "hello", "start": 1.1, "end": 1.2, "score": 0.98, "speaker": "SPEAKER_00"},
|
||||
{"word": "there", "start": 1.8, "end": 2.0},
|
||||
{"word": "later", "start": 3.0, "end": 3.1}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1.5,
|
||||
"end": 4,
|
||||
"text": "bob original",
|
||||
"words": [
|
||||
{"word": "bob", "start": 1.55, "end": 1.7},
|
||||
{"word": "reply", "start": 2.0, "end": 2.2}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
reportPath := filepath.Join(dir, "report.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputB,
|
||||
"--input-file", inputA,
|
||||
"--speakers", speakers,
|
||||
"--output-file", output,
|
||||
"--report-file", reportPath,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if len(transcript.OverlapGroups) != 0 {
|
||||
t.Fatalf("overlap groups = %#v, want none", transcript.OverlapGroups)
|
||||
}
|
||||
if got, want := len(transcript.Segments), 3; got != want {
|
||||
t.Fatalf("segment count = %d, want %d", got, want)
|
||||
}
|
||||
|
||||
wantTexts := []string{"hello there", "bob reply", "later"}
|
||||
wantSpeakers := []string{"Alice", "Bob", "Alice"}
|
||||
wantRefs := []string{"word-run:1:1:1", "word-run:1:2:1", "word-run:1:1:2"}
|
||||
for index, segment := range transcript.Segments {
|
||||
if segment.ID != index+1 {
|
||||
t.Fatalf("segment %d id = %d, want %d", index, segment.ID, index+1)
|
||||
}
|
||||
if segment.Text != wantTexts[index] {
|
||||
t.Fatalf("segment %d text = %q, want %q", index, segment.Text, wantTexts[index])
|
||||
}
|
||||
if segment.Speaker != wantSpeakers[index] {
|
||||
t.Fatalf("segment %d speaker = %q, want %q", index, segment.Speaker, wantSpeakers[index])
|
||||
}
|
||||
if segment.SourceRef != wantRefs[index] {
|
||||
t.Fatalf("segment %d source_ref = %q, want %q", index, segment.SourceRef, wantRefs[index])
|
||||
}
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
t.Fatalf("segment %d source_segment_index = %d, want nil", index, *segment.SourceSegmentIndex)
|
||||
}
|
||||
if segment.OverlapGroupID != 0 {
|
||||
t.Fatalf("segment %d overlap_group_id = %d, want 0", index, segment.OverlapGroupID)
|
||||
}
|
||||
}
|
||||
if !equalStrings(transcript.Segments[0].DerivedFrom, []string{inputA + "#0"}) {
|
||||
t.Fatalf("segment 0 derived_from = %v", transcript.Segments[0].DerivedFrom)
|
||||
}
|
||||
|
||||
outputBytes, err := os.ReadFile(output)
|
||||
if err != nil {
|
||||
t.Fatalf("read output bytes: %v", err)
|
||||
}
|
||||
if strings.Contains(string(outputBytes), "words") {
|
||||
t.Fatalf("did not expect word timing in output:\n%s", outputBytes)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
if !hasReportEvent(rpt, "postprocessing", "resolve-overlaps", "processed 1 overlap group(s); changed 1; removed 2 original segment(s); created 3 replacement segment(s)") {
|
||||
t.Fatal("expected resolve-overlaps summary report event")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpeakerMatchingUsesFirstMatchingRuleCaseInsensitive(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "2026-04-19-Adam_Rakestraw.json", `{
|
||||
@@ -650,6 +751,194 @@ func TestInvalidSegmentFieldsFailWithSourceAndIndex(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidWordFieldsFailWithSourceAndIndex(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
json string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "words not array",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":{}}]}`,
|
||||
want: "segment 0 words must be an array",
|
||||
},
|
||||
{
|
||||
name: "missing word",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"start":0,"end":0.1}]}]}`,
|
||||
want: "segment 0 word 0 missing string word",
|
||||
},
|
||||
{
|
||||
name: "wrong typed word",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":7,"start":0,"end":0.1}]}]}`,
|
||||
want: "segment 0 word 0 word must be a string",
|
||||
},
|
||||
{
|
||||
name: "wrong typed start",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":"x","start":"0","end":0.1}]}]}`,
|
||||
want: "segment 0 word 0 start must be numeric",
|
||||
},
|
||||
{
|
||||
name: "wrong typed end",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":"x","start":0,"end":"0.1"}]}]}`,
|
||||
want: "segment 0 word 0 end must be numeric",
|
||||
},
|
||||
{
|
||||
name: "wrong typed score",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":"x","start":0,"end":0.1,"score":"good"}]}]}`,
|
||||
want: "segment 0 word 0 score must be numeric",
|
||||
},
|
||||
{
|
||||
name: "wrong typed speaker",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":"x","start":0,"end":0.1,"speaker":7}]}]}`,
|
||||
want: "segment 0 word 0 speaker must be a string",
|
||||
},
|
||||
{
|
||||
name: "negative start",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":"x","start":-0.1,"end":0.1}]}]}`,
|
||||
want: "segment 0 word 0 has negative start",
|
||||
},
|
||||
{
|
||||
name: "end before start",
|
||||
json: `{"segments":[{"start":0,"end":1,"text":"x","words":[{"word":"x","start":0.2,"end":0.1}]}]}`,
|
||||
want: "segment 0 word 0 has end before start",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", test.json)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), input) {
|
||||
t.Fatalf("expected error to contain source path %q, got %v", input, err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), test.want) {
|
||||
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUntimedWordsAreAcceptedAndReported(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeJSONFile(t, dir, "input.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1,
|
||||
"end": 2,
|
||||
"text": "about 13",
|
||||
"words": [
|
||||
{"word": "about", "start": 1.1, "end": 1.2},
|
||||
{"word": "13"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
reportPath := filepath.Join(dir, "report.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--report-file", reportPath,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
if !hasReportEvent(rpt, "input", "json-files", `segment 0 word 1 "13" has no complete timing`) {
|
||||
t.Fatal("expected untimed word warning report event")
|
||||
}
|
||||
foundWarning := false
|
||||
for _, event := range rpt.Events {
|
||||
if event.Stage == "input" && event.Module == "json-files" && strings.Contains(event.Message, `"13" has no complete timing`) {
|
||||
foundWarning = event.Severity == report.SeverityWarning
|
||||
}
|
||||
}
|
||||
if !foundWarning {
|
||||
t.Fatal("expected untimed word event to use warning severity")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeResolutionPreservesUntimedWordText(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1,
|
||||
"end": 3,
|
||||
"text": "about 13 and a half",
|
||||
"words": [
|
||||
{"word": "about", "start": 1.1, "end": 1.2},
|
||||
{"word": "13"},
|
||||
{"word": "and", "start": 1.24, "end": 1.3},
|
||||
{"word": "a", "start": 1.32, "end": 1.34},
|
||||
{"word": "half", "start": 1.36, "end": 1.5}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 1.15,
|
||||
"end": 2,
|
||||
"text": "bob overlap",
|
||||
"words": [
|
||||
{"word": "bob", "start": 1.16, "end": 1.25},
|
||||
{"word": "overlap", "start": 1.3, "end": 1.5}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputA,
|
||||
"--input-file", inputB,
|
||||
"--speakers", speakers,
|
||||
"--output-file", output,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript model.FinalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
if len(transcript.OverlapGroups) != 0 {
|
||||
t.Fatalf("expected overlap group to be resolved, got %#v", transcript.OverlapGroups)
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, segment := range transcript.Segments {
|
||||
if segment.Speaker == "Alice" && segment.Text == "about 13 and a half" {
|
||||
found = true
|
||||
if segment.Start != 1.1 || segment.End != 1.5 {
|
||||
t.Fatalf("Alice replacement bounds = %f-%f, want 1.1-1.5", segment.Start, segment.End)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatalf("expected Alice replacement to preserve untimed word text, got %#v", transcript.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidTimingFails(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -761,8 +1050,11 @@ func assertSegment(t *testing.T, segment model.Segment, id int, source string, s
|
||||
if segment.Source != source {
|
||||
t.Fatalf("segment source = %q, want %q", segment.Source, source)
|
||||
}
|
||||
if segment.SourceSegmentIndex != sourceIndex {
|
||||
t.Fatalf("segment source index = %d, want %d", segment.SourceSegmentIndex, sourceIndex)
|
||||
if segment.SourceSegmentIndex == nil {
|
||||
t.Fatalf("segment source index = nil, want %d", sourceIndex)
|
||||
}
|
||||
if *segment.SourceSegmentIndex != sourceIndex {
|
||||
t.Fatalf("segment source index = %d, want %d", *segment.SourceSegmentIndex, sourceIndex)
|
||||
}
|
||||
if segment.Speaker != speaker {
|
||||
t.Fatalf("segment speaker = %q, want %q", segment.Speaker, speaker)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -14,6 +15,8 @@ const (
|
||||
DefaultOutputModules = "json"
|
||||
DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text"
|
||||
DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,autocorrect,assign-ids,validate-output"
|
||||
DefaultOverlapWordRunGap = 0.75
|
||||
OverlapWordRunGapEnv = "SERIATIM_OVERLAP_WORD_RUN_GAP"
|
||||
)
|
||||
|
||||
// MergeOptions captures raw CLI option values before validation.
|
||||
@@ -40,6 +43,7 @@ type Config struct {
|
||||
OutputModules []string
|
||||
PreprocessingModules []string
|
||||
PostprocessingModules []string
|
||||
OverlapWordRunGap float64
|
||||
}
|
||||
|
||||
// NewMergeConfig validates raw merge options and returns normalized config.
|
||||
@@ -49,6 +53,7 @@ func NewMergeConfig(opts MergeOptions) (Config, error) {
|
||||
OutputModules: nil,
|
||||
PreprocessingModules: nil,
|
||||
PostprocessingModules: nil,
|
||||
OverlapWordRunGap: DefaultOverlapWordRunGap,
|
||||
}
|
||||
|
||||
if cfg.InputReader == "" {
|
||||
@@ -110,6 +115,11 @@ func NewMergeConfig(opts MergeOptions) (Config, error) {
|
||||
}
|
||||
}
|
||||
|
||||
cfg.OverlapWordRunGap, err = parseOverlapWordRunGap()
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
@@ -187,6 +197,22 @@ func requireFile(path string, flag string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseOverlapWordRunGap() (float64, error) {
|
||||
value := strings.TrimSpace(os.Getenv(OverlapWordRunGapEnv))
|
||||
if value == "" {
|
||||
return DefaultOverlapWordRunGap, nil
|
||||
}
|
||||
|
||||
gap, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s must be a positive number of seconds: %w", OverlapWordRunGapEnv, err)
|
||||
}
|
||||
if gap <= 0 {
|
||||
return 0, fmt.Errorf("%s must be positive", OverlapWordRunGapEnv)
|
||||
}
|
||||
return gap, nil
|
||||
}
|
||||
|
||||
func contains(values []string, target string) bool {
|
||||
for _, value := range values {
|
||||
if value == target {
|
||||
|
||||
@@ -46,6 +46,86 @@ func TestDuplicateInputFilesFailValidation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestOverlapWordRunGapDefaultsTo075(t *testing.T) {
|
||||
t.Setenv(OverlapWordRunGapEnv, "")
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.OverlapWordRunGap != DefaultOverlapWordRunGap {
|
||||
t.Fatalf("gap = %f, want %f", cfg.OverlapWordRunGap, DefaultOverlapWordRunGap)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOverlapWordRunGapUsesValidEnvOverride(t *testing.T) {
|
||||
t.Setenv(OverlapWordRunGapEnv, "1.25")
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
cfg, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("config failed: %v", err)
|
||||
}
|
||||
if cfg.OverlapWordRunGap != 1.25 {
|
||||
t.Fatalf("gap = %f, want 1.25", cfg.OverlapWordRunGap)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOverlapWordRunGapRejectsInvalidEnvOverride(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value string
|
||||
want string
|
||||
}{
|
||||
{name: "non-numeric", value: "fast", want: "must be a positive number"},
|
||||
{name: "zero", value: "0", want: "must be positive"},
|
||||
{name: "negative", value: "-0.1", want: "must be positive"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
t.Setenv(OverlapWordRunGapEnv, test.value)
|
||||
dir := t.TempDir()
|
||||
input := writeTempFile(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
_, err := NewMergeConfig(MergeOptions{
|
||||
InputFiles: []string{input},
|
||||
OutputFile: output,
|
||||
InputReader: DefaultInputReader,
|
||||
OutputModules: DefaultOutputModules,
|
||||
PreprocessingModules: DefaultPreprocessingModules,
|
||||
PostprocessingModules: DefaultPostprocessingModules,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), test.want) {
|
||||
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func writeTempFile(t *testing.T, dir string, name string) string {
|
||||
t.Helper()
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ type RawSegment struct {
|
||||
Start float64 `json:"start"`
|
||||
End float64 `json:"end"`
|
||||
Text string `json:"text"`
|
||||
Words []Word `json:"words,omitempty"`
|
||||
}
|
||||
|
||||
// CanonicalTranscript is a per-speaker transcript in seriatim's internal model.
|
||||
@@ -45,16 +46,18 @@ type OutputMetadata struct {
|
||||
|
||||
// Segment is the canonical transcript segment shape used by the framework.
|
||||
type Segment struct {
|
||||
ID int `json:"id,omitempty"`
|
||||
InternalRef string `json:"internal_ref,omitempty"`
|
||||
Source string `json:"source"`
|
||||
SourceSegmentIndex int `json:"source_segment_index"`
|
||||
Speaker string `json:"speaker"`
|
||||
Start float64 `json:"start"`
|
||||
End float64 `json:"end"`
|
||||
Text string `json:"text"`
|
||||
Words []Word `json:"words,omitempty"`
|
||||
OverlapGroupID int `json:"overlap_group_id,omitempty"`
|
||||
ID int `json:"id,omitempty"`
|
||||
InternalRef string `json:"internal_ref,omitempty"`
|
||||
Source string `json:"source"`
|
||||
SourceSegmentIndex *int `json:"source_segment_index,omitempty"`
|
||||
SourceRef string `json:"source_ref,omitempty"`
|
||||
DerivedFrom []string `json:"derived_from,omitempty"`
|
||||
Speaker string `json:"speaker"`
|
||||
Start float64 `json:"start"`
|
||||
End float64 `json:"end"`
|
||||
Text string `json:"text"`
|
||||
Words []Word `json:"words,omitempty"`
|
||||
OverlapGroupID int `json:"overlap_group_id,omitempty"`
|
||||
}
|
||||
|
||||
// Word preserves optional word-level timing data.
|
||||
@@ -64,6 +67,7 @@ type Word struct {
|
||||
End float64 `json:"end"`
|
||||
Score float64 `json:"score,omitempty"`
|
||||
Speaker string `json:"speaker,omitempty"`
|
||||
Timed bool `json:"-"`
|
||||
}
|
||||
|
||||
// OverlapGroup describes a detected overlapping speech region.
|
||||
@@ -76,3 +80,30 @@ type OverlapGroup struct {
|
||||
Class string `json:"class"`
|
||||
Resolution string `json:"resolution"`
|
||||
}
|
||||
|
||||
// SegmentLess defines the deterministic chronological ordering used by merge
|
||||
// and postprocessing modules.
|
||||
func SegmentLess(left Segment, right Segment) bool {
|
||||
if left.Start != right.Start {
|
||||
return left.Start < right.Start
|
||||
}
|
||||
if left.End != right.End {
|
||||
return left.End < right.End
|
||||
}
|
||||
if left.Source != right.Source {
|
||||
return left.Source < right.Source
|
||||
}
|
||||
if left.SourceSegmentIndex != nil && right.SourceSegmentIndex != nil && *left.SourceSegmentIndex != *right.SourceSegmentIndex {
|
||||
return *left.SourceSegmentIndex < *right.SourceSegmentIndex
|
||||
}
|
||||
if left.SourceSegmentIndex == nil && right.SourceSegmentIndex != nil {
|
||||
return false
|
||||
}
|
||||
if left.SourceSegmentIndex != nil && right.SourceSegmentIndex == nil {
|
||||
return true
|
||||
}
|
||||
if left.SourceRef != right.SourceRef {
|
||||
return left.SourceRef < right.SourceRef
|
||||
}
|
||||
return left.Speaker < right.Speaker
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ func finalizeCandidate(in *model.MergedTranscript, candidate overlapCandidate, c
|
||||
refs := make([]string, 0, len(candidate.indices))
|
||||
for _, index := range candidate.indices {
|
||||
in.Segments[index].OverlapGroupID = groupID
|
||||
refs = append(refs, segmentRef(in.Segments[index]))
|
||||
refs = append(refs, SegmentRef(in.Segments[index]))
|
||||
}
|
||||
|
||||
in.OverlapGroups = append(in.OverlapGroups, model.OverlapGroup{
|
||||
@@ -106,8 +106,15 @@ func distinctSpeakers(segments []model.Segment, indices []int) []string {
|
||||
return speakers
|
||||
}
|
||||
|
||||
func segmentRef(segment model.Segment) string {
|
||||
return fmt.Sprintf("%s#%d", segment.Source, segment.SourceSegmentIndex)
|
||||
// SegmentRef returns the stable overlap reference for a segment.
|
||||
func SegmentRef(segment model.Segment) string {
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
return fmt.Sprintf("%s#%d", segment.Source, *segment.SourceSegmentIndex)
|
||||
}
|
||||
if segment.SourceRef != "" {
|
||||
return segment.SourceRef
|
||||
}
|
||||
return segment.Source
|
||||
}
|
||||
|
||||
func clearExisting(in *model.MergedTranscript) {
|
||||
|
||||
@@ -132,7 +132,7 @@ func TestDetectIsIdempotent(t *testing.T) {
|
||||
func segment(source string, sourceIndex int, speaker string, start float64, end float64) model.Segment {
|
||||
return model.Segment{
|
||||
Source: source,
|
||||
SourceSegmentIndex: sourceIndex,
|
||||
SourceSegmentIndex: intPtr(sourceIndex),
|
||||
Speaker: speaker,
|
||||
Start: start,
|
||||
End: end,
|
||||
@@ -140,6 +140,10 @@ func segment(source string, sourceIndex int, speaker string, start float64, end
|
||||
}
|
||||
}
|
||||
|
||||
func intPtr(value int) *int {
|
||||
return &value
|
||||
}
|
||||
|
||||
func assertGroup(t *testing.T, merged model.MergedTranscript, groupIndex int, id int, start float64, end float64, refs []string, speakers []string) {
|
||||
t.Helper()
|
||||
if len(merged.OverlapGroups) <= groupIndex {
|
||||
|
||||
344
internal/overlap/resolve.go
Normal file
344
internal/overlap/resolve.go
Normal file
@@ -0,0 +1,344 @@
|
||||
package overlap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
// ResolutionSummary records deterministic counters for a resolve-overlaps pass.
|
||||
type ResolutionSummary struct {
|
||||
GroupsProcessed int
|
||||
GroupsChanged int
|
||||
OriginalsRemoved int
|
||||
ReplacementsCreated int
|
||||
}
|
||||
|
||||
// Resolve replaces detected overlap-group segments with word-run segments when
|
||||
// word-level timing is available.
|
||||
func Resolve(in model.MergedTranscript, wordRunGap float64) (model.MergedTranscript, ResolutionSummary, error) {
|
||||
summary := ResolutionSummary{
|
||||
GroupsProcessed: len(in.OverlapGroups),
|
||||
}
|
||||
if len(in.OverlapGroups) == 0 {
|
||||
return in, summary, nil
|
||||
}
|
||||
|
||||
refToIndex := make(map[string]int, len(in.Segments))
|
||||
for index, segment := range in.Segments {
|
||||
refToIndex[SegmentRef(segment)] = index
|
||||
}
|
||||
|
||||
removeRefs := make(map[string]struct{})
|
||||
clearAnnotationRefs := make(map[string]struct{})
|
||||
removeGroupIDs := make(map[int]struct{})
|
||||
replacements := make([]model.Segment, 0)
|
||||
|
||||
for _, group := range in.OverlapGroups {
|
||||
resolved, err := resolveGroup(in, group, refToIndex, wordRunGap)
|
||||
if err != nil {
|
||||
return model.MergedTranscript{}, ResolutionSummary{}, err
|
||||
}
|
||||
if len(resolved.replacements) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
summary.GroupsChanged++
|
||||
removeGroupIDs[group.ID] = struct{}{}
|
||||
replacements = append(replacements, resolved.replacements...)
|
||||
|
||||
for _, ref := range group.Segments {
|
||||
clearAnnotationRefs[ref] = struct{}{}
|
||||
}
|
||||
for _, ref := range resolved.removeRefs {
|
||||
if _, exists := removeRefs[ref]; !exists {
|
||||
summary.OriginalsRemoved++
|
||||
}
|
||||
removeRefs[ref] = struct{}{}
|
||||
}
|
||||
summary.ReplacementsCreated += len(resolved.replacements)
|
||||
}
|
||||
|
||||
if summary.GroupsChanged == 0 {
|
||||
return in, summary, nil
|
||||
}
|
||||
|
||||
segments := make([]model.Segment, 0, len(in.Segments)-len(removeRefs)+len(replacements))
|
||||
for _, segment := range in.Segments {
|
||||
ref := SegmentRef(segment)
|
||||
if _, remove := removeRefs[ref]; remove {
|
||||
continue
|
||||
}
|
||||
if _, clear := clearAnnotationRefs[ref]; clear {
|
||||
segment.OverlapGroupID = 0
|
||||
}
|
||||
segments = append(segments, segment)
|
||||
}
|
||||
segments = append(segments, replacements...)
|
||||
sort.SliceStable(segments, func(i, j int) bool {
|
||||
return model.SegmentLess(segments[i], segments[j])
|
||||
})
|
||||
|
||||
overlapGroups := make([]model.OverlapGroup, 0, len(in.OverlapGroups)-len(removeGroupIDs))
|
||||
for _, group := range in.OverlapGroups {
|
||||
if _, remove := removeGroupIDs[group.ID]; remove {
|
||||
continue
|
||||
}
|
||||
overlapGroups = append(overlapGroups, group)
|
||||
}
|
||||
|
||||
return model.MergedTranscript{
|
||||
Segments: segments,
|
||||
OverlapGroups: overlapGroups,
|
||||
}, summary, nil
|
||||
}
|
||||
|
||||
type resolvedGroup struct {
|
||||
removeRefs []string
|
||||
replacements []model.Segment
|
||||
}
|
||||
|
||||
type resolutionWord struct {
|
||||
word model.Word
|
||||
source string
|
||||
ref string
|
||||
sequence int
|
||||
}
|
||||
|
||||
type wordRun struct {
|
||||
timedWords []resolutionWord
|
||||
untimedWords []resolutionWord
|
||||
start float64
|
||||
end float64
|
||||
}
|
||||
|
||||
func resolveGroup(in model.MergedTranscript, group model.OverlapGroup, refToIndex map[string]int, wordRunGap float64) (resolvedGroup, error) {
|
||||
segmentsBySpeaker := make(map[string][]model.Segment)
|
||||
refsBySpeaker := make(map[string][]string)
|
||||
for _, ref := range group.Segments {
|
||||
index, exists := refToIndex[ref]
|
||||
if !exists {
|
||||
return resolvedGroup{}, fmt.Errorf("overlap group %d references missing segment %q", group.ID, ref)
|
||||
}
|
||||
segment := in.Segments[index]
|
||||
segmentsBySpeaker[segment.Speaker] = append(segmentsBySpeaker[segment.Speaker], segment)
|
||||
refsBySpeaker[segment.Speaker] = append(refsBySpeaker[segment.Speaker], ref)
|
||||
}
|
||||
|
||||
speakers := groupSpeakerOrder(group, segmentsBySpeaker)
|
||||
resolved := resolvedGroup{}
|
||||
for speakerIndex, speaker := range speakers {
|
||||
timedWords, untimedWords := gatherResolutionWords(segmentsBySpeaker[speaker], group.Start, group.End)
|
||||
if len(timedWords) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
runs := buildWordRuns(timedWords, wordRunGap)
|
||||
if len(runs) == 0 {
|
||||
continue
|
||||
}
|
||||
attachUntimedWords(runs, untimedWords)
|
||||
|
||||
resolved.removeRefs = append(resolved.removeRefs, refsBySpeaker[speaker]...)
|
||||
for runIndex, run := range runs {
|
||||
resolved.replacements = append(resolved.replacements, replacementSegment(group.ID, speakerIndex+1, runIndex+1, speaker, run))
|
||||
}
|
||||
}
|
||||
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
func groupSpeakerOrder(group model.OverlapGroup, segmentsBySpeaker map[string][]model.Segment) []string {
|
||||
seen := make(map[string]struct{}, len(group.Speakers))
|
||||
speakers := make([]string, 0, len(group.Speakers))
|
||||
for _, speaker := range group.Speakers {
|
||||
if _, exists := segmentsBySpeaker[speaker]; !exists {
|
||||
continue
|
||||
}
|
||||
if _, exists := seen[speaker]; exists {
|
||||
continue
|
||||
}
|
||||
seen[speaker] = struct{}{}
|
||||
speakers = append(speakers, speaker)
|
||||
}
|
||||
|
||||
extra := make([]string, 0)
|
||||
for speaker := range segmentsBySpeaker {
|
||||
if _, exists := seen[speaker]; exists {
|
||||
continue
|
||||
}
|
||||
extra = append(extra, speaker)
|
||||
}
|
||||
sort.Strings(extra)
|
||||
speakers = append(speakers, extra...)
|
||||
return speakers
|
||||
}
|
||||
|
||||
func gatherResolutionWords(segments []model.Segment, groupStart float64, groupEnd float64) ([]resolutionWord, []resolutionWord) {
|
||||
timedWords := make([]resolutionWord, 0)
|
||||
untimedWords := make([]resolutionWord, 0)
|
||||
sequence := 0
|
||||
for _, segment := range segments {
|
||||
ref := SegmentRef(segment)
|
||||
for _, word := range segment.Words {
|
||||
candidate := resolutionWord{
|
||||
word: word,
|
||||
source: segment.Source,
|
||||
ref: ref,
|
||||
sequence: sequence,
|
||||
}
|
||||
sequence++
|
||||
if !word.Timed {
|
||||
untimedWords = append(untimedWords, candidate)
|
||||
continue
|
||||
}
|
||||
if word.End <= groupStart || word.Start >= groupEnd {
|
||||
continue
|
||||
}
|
||||
timedWords = append(timedWords, candidate)
|
||||
}
|
||||
}
|
||||
|
||||
sort.SliceStable(timedWords, func(i, j int) bool {
|
||||
left := timedWords[i].word
|
||||
right := timedWords[j].word
|
||||
if left.Start != right.Start {
|
||||
return left.Start < right.Start
|
||||
}
|
||||
if left.End != right.End {
|
||||
return left.End < right.End
|
||||
}
|
||||
return left.Text < right.Text
|
||||
})
|
||||
return timedWords, untimedWords
|
||||
}
|
||||
|
||||
func buildWordRuns(words []resolutionWord, wordRunGap float64) []wordRun {
|
||||
if len(words) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
runs := make([]wordRun, 0)
|
||||
current := newWordRun(words[0])
|
||||
previousEnd := words[0].word.End
|
||||
for _, word := range words[1:] {
|
||||
if word.word.Start-previousEnd <= wordRunGap {
|
||||
current.add(word)
|
||||
} else {
|
||||
runs = append(runs, current.finish())
|
||||
current = newWordRun(word)
|
||||
}
|
||||
previousEnd = word.word.End
|
||||
}
|
||||
runs = append(runs, current.finish())
|
||||
return runs
|
||||
}
|
||||
|
||||
func newWordRun(word resolutionWord) wordRun {
|
||||
return wordRun{
|
||||
timedWords: []resolutionWord{word},
|
||||
start: word.word.Start,
|
||||
end: word.word.End,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *wordRun) add(word resolutionWord) {
|
||||
r.timedWords = append(r.timedWords, word)
|
||||
if word.word.Start < r.start {
|
||||
r.start = word.word.Start
|
||||
}
|
||||
if word.word.End > r.end {
|
||||
r.end = word.word.End
|
||||
}
|
||||
}
|
||||
|
||||
func (r wordRun) finish() wordRun {
|
||||
return r
|
||||
}
|
||||
|
||||
func attachUntimedWords(runs []wordRun, untimedWords []resolutionWord) {
|
||||
if len(runs) == 0 || len(untimedWords) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for _, word := range untimedWords {
|
||||
target := 0
|
||||
for index, run := range runs {
|
||||
if word.sequence < run.firstSequence() {
|
||||
if index == 0 {
|
||||
target = 0
|
||||
} else {
|
||||
target = index - 1
|
||||
}
|
||||
break
|
||||
}
|
||||
target = index
|
||||
}
|
||||
runs[target].untimedWords = append(runs[target].untimedWords, word)
|
||||
}
|
||||
}
|
||||
|
||||
func (r wordRun) firstSequence() int {
|
||||
first := r.timedWords[0].sequence
|
||||
for _, word := range r.timedWords[1:] {
|
||||
if word.sequence < first {
|
||||
first = word.sequence
|
||||
}
|
||||
}
|
||||
return first
|
||||
}
|
||||
|
||||
func (r wordRun) allWordsInTextOrder() []resolutionWord {
|
||||
words := make([]resolutionWord, 0, len(r.timedWords)+len(r.untimedWords))
|
||||
words = append(words, r.timedWords...)
|
||||
words = append(words, r.untimedWords...)
|
||||
sort.SliceStable(words, func(i, j int) bool {
|
||||
return words[i].sequence < words[j].sequence
|
||||
})
|
||||
return words
|
||||
}
|
||||
|
||||
func replacementSegment(groupID int, speakerIndex int, runIndex int, speaker string, run wordRun) model.Segment {
|
||||
orderedWords := run.allWordsInTextOrder()
|
||||
words := make([]model.Word, 0, len(orderedWords))
|
||||
text := make([]string, 0, len(orderedWords))
|
||||
refs := make([]string, 0, len(orderedWords))
|
||||
source := ""
|
||||
for _, word := range orderedWords {
|
||||
words = append(words, word.word)
|
||||
text = append(text, word.word.Text)
|
||||
refs = append(refs, word.ref)
|
||||
if source == "" {
|
||||
source = word.source
|
||||
} else if source != word.source {
|
||||
source = "derived"
|
||||
}
|
||||
}
|
||||
|
||||
return model.Segment{
|
||||
Source: source,
|
||||
SourceRef: fmt.Sprintf("word-run:%d:%d:%d", groupID, speakerIndex, runIndex),
|
||||
DerivedFrom: uniqueSortedStrings(refs),
|
||||
Speaker: speaker,
|
||||
Start: run.start,
|
||||
End: run.end,
|
||||
Text: strings.Join(text, " "),
|
||||
Words: words,
|
||||
}
|
||||
}
|
||||
|
||||
func uniqueSortedStrings(values []string) []string {
|
||||
seen := make(map[string]struct{}, len(values))
|
||||
unique := make([]string, 0, len(values))
|
||||
for _, value := range values {
|
||||
if _, exists := seen[value]; exists {
|
||||
continue
|
||||
}
|
||||
seen[value] = struct{}{}
|
||||
unique = append(unique, value)
|
||||
}
|
||||
sort.Strings(unique)
|
||||
return unique
|
||||
}
|
||||
345
internal/overlap/resolve_test.go
Normal file
345
internal/overlap/resolve_test.go
Normal file
@@ -0,0 +1,345 @@
|
||||
package overlap
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
)
|
||||
|
||||
func TestResolveNoOverlapGroupsIsNoOp(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 2, word("hello", 1.1, 1.2)),
|
||||
},
|
||||
}
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(got, merged) {
|
||||
t.Fatalf("expected no-op result:\ngot %#v\nwant %#v", got, merged)
|
||||
}
|
||||
if summary.GroupsProcessed != 0 || summary.GroupsChanged != 0 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCreatesChronologicalWordRunSegments(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 5, word("A1", 1.1, 1.2), word("A2", 1.8, 2.0)),
|
||||
segmentWithWords("b.json", 0, "Bob", 1.5, 4, word("B1", 1.55, 1.7), word("B2", 2.6, 2.8)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.GroupsProcessed != 1 || summary.GroupsChanged != 1 || summary.OriginalsRemoved != 2 || summary.ReplacementsCreated != 3 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if len(got.OverlapGroups) != 0 {
|
||||
t.Fatalf("expected resolved group to be removed, got %#v", got.OverlapGroups)
|
||||
}
|
||||
|
||||
gotTexts := []string{got.Segments[0].Text, got.Segments[1].Text, got.Segments[2].Text}
|
||||
wantTexts := []string{"A1 A2", "B1", "B2"}
|
||||
if !reflect.DeepEqual(gotTexts, wantTexts) {
|
||||
t.Fatalf("texts = %v, want %v", gotTexts, wantTexts)
|
||||
}
|
||||
for _, segment := range got.Segments {
|
||||
if segment.ID != 0 {
|
||||
t.Fatalf("replacement segment has ID %d, want 0", segment.ID)
|
||||
}
|
||||
if segment.SourceSegmentIndex != nil {
|
||||
t.Fatalf("replacement segment source index = %d, want nil", *segment.SourceSegmentIndex)
|
||||
}
|
||||
if segment.OverlapGroupID != 0 {
|
||||
t.Fatalf("replacement segment overlap group ID = %d, want 0", segment.OverlapGroupID)
|
||||
}
|
||||
if segment.SourceRef == "" {
|
||||
t.Fatal("replacement segment missing source_ref")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveIncludesWordsByIntervalIntersection(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
9,
|
||||
21,
|
||||
word("before", 9.5, 10),
|
||||
word("left-edge", 9.9, 10.1),
|
||||
word("inside", 11, 11.2),
|
||||
word("right-edge", 19.9, 20.1),
|
||||
word("after", 20, 20.2),
|
||||
),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 10, 20, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "left-edge inside right-edge" {
|
||||
t.Fatalf("text = %q", got.Segments[0].Text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveWordRunGapThreshold(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 4, word("one", 1, 1.1), word("two", 1.85, 2), word("three", 2.8, 3)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "one two" || got.Segments[1].Text != "three" {
|
||||
t.Fatalf("unexpected replacement texts: %#v", got.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolvePartialResolutionKeepsNoWordSpeakerOriginals(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 5, word("hello", 1.2, 1.4)),
|
||||
segmentWithWords("b.json", 0, "Bob", 2, 4),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.OriginalsRemoved != 1 || summary.ReplacementsCreated != 1 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if len(got.OverlapGroups) != 0 {
|
||||
t.Fatalf("expected changed group to be removed, got %#v", got.OverlapGroups)
|
||||
}
|
||||
if len(got.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "hello" || got.Segments[1].Text != "Bob" {
|
||||
t.Fatalf("unexpected segment texts: %#v", got.Segments)
|
||||
}
|
||||
if got.Segments[1].SourceSegmentIndex == nil {
|
||||
t.Fatal("kept original should retain source_segment_index")
|
||||
}
|
||||
if got.Segments[1].OverlapGroupID != 0 {
|
||||
t.Fatalf("kept original overlap group ID = %d, want 0", got.Segments[1].OverlapGroupID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveGroupWithNoUsableWordsRemainsUnchanged(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 5),
|
||||
segmentWithWords("b.json", 0, "Bob", 2, 4),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 5, []string{"a.json#0", "b.json#0"}, []string{"Alice", "Bob"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
merged.Segments[1].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.GroupsChanged != 0 || summary.OriginalsRemoved != 0 || summary.ReplacementsCreated != 0 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if !reflect.DeepEqual(got, merged) {
|
||||
t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveReplacementProvenanceIsDeterministic(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 1, "Alice", 1, 3, word("second", 1.5, 1.6)),
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, word("first", 1.1, 1.2)),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#1", "a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
segment := got.Segments[0]
|
||||
if segment.SourceRef != "word-run:1:1:1" {
|
||||
t.Fatalf("source_ref = %q", segment.SourceRef)
|
||||
}
|
||||
if !reflect.DeepEqual(segment.DerivedFrom, []string{"a.json#0", "a.json#1"}) {
|
||||
t.Fatalf("derived_from = %v", segment.DerivedFrom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveIncludesUntimedWordsInTextWithoutChangingBounds(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
1,
|
||||
3,
|
||||
untimedWord("pre"),
|
||||
word("one", 1.1, 1.2),
|
||||
untimedWord("middle"),
|
||||
word("two", 1.4, 1.5),
|
||||
untimedWord("post"),
|
||||
),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 1 {
|
||||
t.Fatalf("segment count = %d, want 1", len(got.Segments))
|
||||
}
|
||||
segment := got.Segments[0]
|
||||
if segment.Text != "pre one middle two post" {
|
||||
t.Fatalf("text = %q", segment.Text)
|
||||
}
|
||||
if segment.Start != 1.1 || segment.End != 1.5 {
|
||||
t.Fatalf("bounds = %f-%f, want 1.1-1.5", segment.Start, segment.End)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveUntimedWordsDoNotBridgeWordRunGap(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords(
|
||||
"a.json",
|
||||
0,
|
||||
"Alice",
|
||||
1,
|
||||
4,
|
||||
word("one", 1, 1.1),
|
||||
untimedWord("middle"),
|
||||
word("two", 2, 2.1),
|
||||
),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 4, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
|
||||
got, _, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if len(got.Segments) != 2 {
|
||||
t.Fatalf("segment count = %d, want 2", len(got.Segments))
|
||||
}
|
||||
if got.Segments[0].Text != "one middle" || got.Segments[1].Text != "two" {
|
||||
t.Fatalf("unexpected texts: %#v", got.Segments)
|
||||
}
|
||||
if got.Segments[0].End != 1.1 || got.Segments[1].Start != 2 {
|
||||
t.Fatalf("untimed word changed bounds: %#v", got.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveSpeakerWithOnlyUntimedWordsIsNotReplaced(t *testing.T) {
|
||||
merged := model.MergedTranscript{
|
||||
Segments: []model.Segment{
|
||||
segmentWithWords("a.json", 0, "Alice", 1, 3, untimedWord("hello")),
|
||||
},
|
||||
OverlapGroups: []model.OverlapGroup{
|
||||
group(1, 1, 3, []string{"a.json#0"}, []string{"Alice"}),
|
||||
},
|
||||
}
|
||||
merged.Segments[0].OverlapGroupID = 1
|
||||
|
||||
got, summary, err := Resolve(merged, 0.75)
|
||||
if err != nil {
|
||||
t.Fatalf("resolve failed: %v", err)
|
||||
}
|
||||
if summary.GroupsChanged != 0 {
|
||||
t.Fatalf("unexpected summary: %#v", summary)
|
||||
}
|
||||
if !reflect.DeepEqual(got, merged) {
|
||||
t.Fatalf("expected unchanged transcript:\ngot %#v\nwant %#v", got, merged)
|
||||
}
|
||||
}
|
||||
|
||||
func segmentWithWords(source string, sourceIndex int, speaker string, start float64, end float64, words ...model.Word) model.Segment {
|
||||
segment := segment(source, sourceIndex, speaker, start, end)
|
||||
segment.Words = words
|
||||
return segment
|
||||
}
|
||||
|
||||
func word(text string, start float64, end float64) model.Word {
|
||||
return model.Word{
|
||||
Text: text,
|
||||
Start: start,
|
||||
End: end,
|
||||
Timed: true,
|
||||
}
|
||||
}
|
||||
|
||||
func untimedWord(text string) model.Word {
|
||||
return model.Word{
|
||||
Text: text,
|
||||
}
|
||||
}
|
||||
|
||||
func group(id int, start float64, end float64, refs []string, speakers []string) model.OverlapGroup {
|
||||
return model.OverlapGroup{
|
||||
ID: id,
|
||||
Start: start,
|
||||
End: end,
|
||||
Segments: refs,
|
||||
Speakers: speakers,
|
||||
Class: defaultClass,
|
||||
Resolution: defaultResolution,
|
||||
}
|
||||
}
|
||||
@@ -142,6 +142,10 @@ func validatePreprocessors(modules []Preprocessor) error {
|
||||
func finalizeTranscript(cfg config.Config, merged model.MergedTranscript) model.FinalTranscript {
|
||||
segments := make([]model.Segment, len(merged.Segments))
|
||||
copy(segments, merged.Segments)
|
||||
for index := range segments {
|
||||
segments[index].Words = nil
|
||||
segments[index].DerivedFrom = append([]string(nil), segments[index].DerivedFrom...)
|
||||
}
|
||||
overlapGroups := make([]model.OverlapGroup, len(merged.OverlapGroups))
|
||||
copy(overlapGroups, merged.OverlapGroups)
|
||||
|
||||
|
||||
@@ -50,6 +50,16 @@ func Info(stage string, module string, message string) Event {
|
||||
}
|
||||
}
|
||||
|
||||
// Warning constructs a warning report event.
|
||||
func Warning(stage string, module string, message string) Event {
|
||||
return Event{
|
||||
Severity: SeverityWarning,
|
||||
Stage: stage,
|
||||
Module: module,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// WriteJSON writes a deterministic JSON report.
|
||||
func WriteJSON(path string, rpt Report) error {
|
||||
file, err := os.Create(path)
|
||||
|
||||
Reference in New Issue
Block a user