Updated DefaultWordRunReorderWindow to 1.0
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
||||
"gitea.maximumdirect.net/eric/seriatim/schema"
|
||||
@@ -342,6 +343,8 @@ func TestMergeDetectsOverlapGroups(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestMergeResolvesOverlapGroupsWithWordRuns(t *testing.T) {
|
||||
t.Setenv(config.WordRunReorderWindowEnv, "0.4")
|
||||
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
@@ -1677,6 +1680,75 @@ func TestMergeResolveOverlapsAbsorbsNearbyContext(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeResolveOverlapsPreservesAbsorbedContextPrefix(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
inputA := writeJSONFile(t, dir, "a.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 7,
|
||||
"end": 9.95,
|
||||
"text": "full context prefix near",
|
||||
"words": [
|
||||
{"word": "full", "start": 7.1, "end": 7.2},
|
||||
{"word": "context", "start": 7.3, "end": 7.4},
|
||||
{"word": "prefix", "start": 7.5, "end": 7.6},
|
||||
{"word": "near", "start": 9.7, "end": 9.9}
|
||||
]
|
||||
},
|
||||
{
|
||||
"start": 10,
|
||||
"end": 11,
|
||||
"text": "inside",
|
||||
"words": [
|
||||
{"word": "inside", "start": 10.5, "end": 10.7}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
inputB := writeJSONFile(t, dir, "b.json", `{
|
||||
"segments": [
|
||||
{
|
||||
"start": 10.2,
|
||||
"end": 11,
|
||||
"text": "bob",
|
||||
"words": [
|
||||
{"word": "bob", "start": 10.4, "end": 10.6}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||
- speaker: Alice
|
||||
match: ["a.json"]
|
||||
- speaker: Bob
|
||||
match: ["b.json"]
|
||||
`)
|
||||
output := filepath.Join(dir, "merged.json")
|
||||
|
||||
err := executeMerge(
|
||||
"--input-file", inputA,
|
||||
"--input-file", inputB,
|
||||
"--speakers", speakers,
|
||||
"--output-schema", "minimal",
|
||||
"--output-file", output,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("merge failed: %v", err)
|
||||
}
|
||||
|
||||
var transcript schema.MinimalTranscript
|
||||
readJSON(t, output, &transcript)
|
||||
aliceText := make([]string, 0)
|
||||
for _, segment := range transcript.Segments {
|
||||
if segment.Speaker == "Alice" {
|
||||
aliceText = append(aliceText, segment.Text)
|
||||
}
|
||||
}
|
||||
if strings.Join(aliceText, " ") != "full context prefix near inside" {
|
||||
t.Fatalf("expected full absorbed context prefix in Alice output, got %#v", transcript.Segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidTimingFails(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
Reference in New Issue
Block a user