Implemented substring matching for speakers.yml
This commit is contained in:
52
README.md
52
README.md
@@ -74,22 +74,56 @@ Other WhisperX fields, including `words` and raw diarization speaker labels, are
|
|||||||
|
|
||||||
## Speaker Map Format
|
## Speaker Map Format
|
||||||
|
|
||||||
`speakers.yml` maps each input file basename to one canonical speaker name:
|
`speakers.yml` maps input files to canonical speaker names using ordered substring rules:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
inputs:
|
match:
|
||||||
2026-04-19-Eric_Rakestraw.json:
|
- speaker: "Eric Rakestraw"
|
||||||
speaker: "Eric Rakestraw"
|
match:
|
||||||
|
- "Eric_Rakestraw"
|
||||||
|
- "Eric"
|
||||||
|
|
||||||
2026-04-19-Mike_Brown.json:
|
- speaker: "Mike Brown"
|
||||||
speaker: "Mike Brown"
|
match:
|
||||||
|
- "Mike_Brown"
|
||||||
|
- "mb"
|
||||||
|
```
|
||||||
|
|
||||||
|
For each `--input-file`, `seriatim` takes the file basename and evaluates the rules in order. The first rule with a matching substring wins, and no later rules are evaluated.
|
||||||
|
|
||||||
|
For example, this input:
|
||||||
|
|
||||||
|
```text
|
||||||
|
samples/raw/2026-04-19-Eric_Rakestraw.json
|
||||||
|
```
|
||||||
|
|
||||||
|
matches this rule because the basename contains `Eric_Rakestraw`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- speaker: "Eric Rakestraw"
|
||||||
|
match:
|
||||||
|
- "Eric_Rakestraw"
|
||||||
```
|
```
|
||||||
|
|
||||||
Important details:
|
Important details:
|
||||||
|
|
||||||
- Keys are matched against the basename of each `--input-file`, not the full path.
|
- Matching is against the input file basename, not the full path.
|
||||||
- Every input file must have exactly one matching entry.
|
- Matching is case-insensitive.
|
||||||
- `speaker` is required and must be non-empty.
|
- Rules are evaluated from first to last.
|
||||||
|
- Each rule must have a non-empty `speaker`.
|
||||||
|
- Each rule must have at least one non-empty `match` string.
|
||||||
|
- Duplicate speaker names are invalid.
|
||||||
|
- Every input file must match at least one rule or the command fails.
|
||||||
|
|
||||||
|
Deprecated old format:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
inputs:
|
||||||
|
eric.json:
|
||||||
|
speaker: "Eric Rakestraw"
|
||||||
|
```
|
||||||
|
|
||||||
|
The old `inputs:` direct mapping format is no longer supported.
|
||||||
|
|
||||||
## Output JSON Format
|
## Output JSON Format
|
||||||
|
|
||||||
|
|||||||
@@ -24,11 +24,11 @@ func TestMergeWritesMergedOutputAndReport(t *testing.T) {
|
|||||||
{"start": 5, "end": 6, "text": "first b"}
|
{"start": 5, "end": 6, "text": "first b"}
|
||||||
]
|
]
|
||||||
}`)
|
}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
a.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["a.json"]
|
||||||
b.json:
|
- speaker: Bob
|
||||||
speaker: Bob
|
match: ["b.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
reportPath := filepath.Join(dir, "report.json")
|
reportPath := filepath.Join(dir, "report.json")
|
||||||
@@ -112,11 +112,11 @@ func TestMergeTieBreakOrder(t *testing.T) {
|
|||||||
{"start": 1, "end": 2, "text": "b-same-time"}
|
{"start": 1, "end": 2, "text": "b-same-time"}
|
||||||
]
|
]
|
||||||
}`)
|
}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
a.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["a.json"]
|
||||||
b.json:
|
- speaker: Bob
|
||||||
speaker: Bob
|
match: ["b.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -148,12 +148,43 @@ func TestMergeTieBreakOrder(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSpeakerMatchingUsesFirstMatchingRuleCaseInsensitive(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
input := writeJSONFile(t, dir, "2026-04-19-Adam_Rakestraw.json", `{
|
||||||
|
"segments": [
|
||||||
|
{"start": 1, "end": 2, "text": "hello"}
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
|
- speaker: First Match
|
||||||
|
match: ["adam"]
|
||||||
|
- speaker: Later Match
|
||||||
|
match: ["Adam_Rakestraw"]
|
||||||
|
`)
|
||||||
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
|
err := executeMerge(
|
||||||
|
"--input-file", input,
|
||||||
|
"--speakers", speakers,
|
||||||
|
"--output-file", output,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("merge failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var transcript model.FinalTranscript
|
||||||
|
readJSON(t, output, &transcript)
|
||||||
|
if got, want := transcript.Segments[0].Speaker, "First Match"; got != want {
|
||||||
|
t.Fatalf("speaker = %q, want %q", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestUnknownModulesFailDuringValidation(t *testing.T) {
|
func TestUnknownModulesFailDuringValidation(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
input.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["input.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -224,9 +255,9 @@ func TestInvalidPreprocessingOrderFails(t *testing.T) {
|
|||||||
|
|
||||||
func TestMissingInputFileFailsBeforePipelineExecution(t *testing.T) {
|
func TestMissingInputFileFailsBeforePipelineExecution(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
missing.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["missing.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -263,9 +294,9 @@ func TestNormalizeSpeakersRequiresSpeakersFile(t *testing.T) {
|
|||||||
func TestAutocorrectRequiresAutocorrectFile(t *testing.T) {
|
func TestAutocorrectRequiresAutocorrectFile(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
input.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["input.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -287,11 +318,11 @@ func TestOutputJSONIsByteStable(t *testing.T) {
|
|||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
inputA := writeJSONFile(t, dir, "a.json", `{"segments":[{"start":2,"end":3,"text":"a"}]}`)
|
inputA := writeJSONFile(t, dir, "a.json", `{"segments":[{"start":2,"end":3,"text":"a"}]}`)
|
||||||
inputB := writeJSONFile(t, dir, "b.json", `{"segments":[{"start":1,"end":2,"text":"b"}]}`)
|
inputB := writeJSONFile(t, dir, "b.json", `{"segments":[{"start":1,"end":2,"text":"b"}]}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
a.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["a.json"]
|
||||||
b.json:
|
- speaker: Bob
|
||||||
speaker: Bob
|
match: ["b.json"]
|
||||||
`)
|
`)
|
||||||
outputA := filepath.Join(dir, "merged-a.json")
|
outputA := filepath.Join(dir, "merged-a.json")
|
||||||
outputB := filepath.Join(dir, "merged-b.json")
|
outputB := filepath.Join(dir, "merged-b.json")
|
||||||
@@ -327,9 +358,9 @@ func TestOutputJSONIsByteStable(t *testing.T) {
|
|||||||
func TestMissingSpeakerMappingFails(t *testing.T) {
|
func TestMissingSpeakerMappingFails(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[]}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
other.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["other.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -341,7 +372,7 @@ func TestMissingSpeakerMappingFails(t *testing.T) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("expected error")
|
t.Fatal("expected error")
|
||||||
}
|
}
|
||||||
if !strings.Contains(err.Error(), `speaker map has no entry for input basename "input.json"`) {
|
if !strings.Contains(err.Error(), `speaker map has no match for input basename "input.json"`) {
|
||||||
t.Fatalf("unexpected error: %v", err)
|
t.Fatalf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -349,9 +380,9 @@ func TestMissingSpeakerMappingFails(t *testing.T) {
|
|||||||
func TestMalformedJSONFails(t *testing.T) {
|
func TestMalformedJSONFails(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", `{"segments":[`)
|
input := writeJSONFile(t, dir, "input.json", `{"segments":[`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
input.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["input.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -371,9 +402,9 @@ func TestMalformedJSONFails(t *testing.T) {
|
|||||||
func TestMissingTopLevelSegmentsFails(t *testing.T) {
|
func TestMissingTopLevelSegmentsFails(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", `{}`)
|
input := writeJSONFile(t, dir, "input.json", `{}`)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
input.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["input.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -422,9 +453,9 @@ func TestInvalidSegmentFieldsFailWithSourceAndIndex(t *testing.T) {
|
|||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", test.json)
|
input := writeJSONFile(t, dir, "input.json", test.json)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
input.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["input.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
@@ -468,9 +499,9 @@ func TestInvalidTimingFails(t *testing.T) {
|
|||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
input := writeJSONFile(t, dir, "input.json", test.json)
|
input := writeJSONFile(t, dir, "input.json", test.json)
|
||||||
speakers := writeYAMLFile(t, dir, "speakers.yml", `inputs:
|
speakers := writeYAMLFile(t, dir, "speakers.yml", `match:
|
||||||
input.json:
|
- speaker: Alice
|
||||||
speaker: Alice
|
match: ["input.json"]
|
||||||
`)
|
`)
|
||||||
output := filepath.Join(dir, "merged.json")
|
output := filepath.Join(dir, "merged.json")
|
||||||
|
|
||||||
|
|||||||
@@ -9,18 +9,19 @@ import (
|
|||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Map resolves input file basenames to canonical speaker names.
|
// Map resolves input file basenames to canonical speaker names using ordered substring rules.
|
||||||
type Map struct {
|
type Map struct {
|
||||||
inputs map[string]Input
|
rules []Rule
|
||||||
}
|
}
|
||||||
|
|
||||||
// Input describes one input entry in speakers.yml.
|
// Rule describes one ordered speaker matching rule.
|
||||||
type Input struct {
|
type Rule struct {
|
||||||
Speaker string `yaml:"speaker"`
|
Speaker string `yaml:"speaker"`
|
||||||
|
Match []string `yaml:"match"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type fileSchema struct {
|
type fileSchema struct {
|
||||||
Inputs map[string]Input `yaml:"inputs"`
|
Match []Rule `yaml:"match"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadMap parses a speakers.yml file.
|
// LoadMap parses a speakers.yml file.
|
||||||
@@ -34,36 +35,49 @@ func LoadMap(path string) (Map, error) {
|
|||||||
if err := yaml.Unmarshal(data, &parsed); err != nil {
|
if err := yaml.Unmarshal(data, &parsed); err != nil {
|
||||||
return Map{}, fmt.Errorf("parse speaker map %q: %w", path, err)
|
return Map{}, fmt.Errorf("parse speaker map %q: %w", path, err)
|
||||||
}
|
}
|
||||||
if len(parsed.Inputs) == 0 {
|
if len(parsed.Match) == 0 {
|
||||||
return Map{}, fmt.Errorf("speaker map %q must contain at least one inputs entry", path)
|
return Map{}, fmt.Errorf("speaker map %q must contain at least one match rule", path)
|
||||||
}
|
}
|
||||||
|
|
||||||
inputs := make(map[string]Input, len(parsed.Inputs))
|
seenSpeakers := make(map[string]struct{}, len(parsed.Match))
|
||||||
for key, input := range parsed.Inputs {
|
rules := make([]Rule, 0, len(parsed.Match))
|
||||||
basename := filepath.Base(strings.TrimSpace(key))
|
for index, rule := range parsed.Match {
|
||||||
if basename == "." || basename == "" {
|
rule.Speaker = strings.TrimSpace(rule.Speaker)
|
||||||
return Map{}, fmt.Errorf("speaker map %q contains an empty input key", path)
|
if rule.Speaker == "" {
|
||||||
|
return Map{}, fmt.Errorf("speaker map rule %d must include speaker", index)
|
||||||
}
|
}
|
||||||
if _, exists := inputs[basename]; exists {
|
if _, exists := seenSpeakers[rule.Speaker]; exists {
|
||||||
return Map{}, fmt.Errorf("speaker map %q contains duplicate basename mapping for %q", path, basename)
|
return Map{}, fmt.Errorf("speaker map contains duplicate speaker %q", rule.Speaker)
|
||||||
|
}
|
||||||
|
seenSpeakers[rule.Speaker] = struct{}{}
|
||||||
|
|
||||||
|
if len(rule.Match) == 0 {
|
||||||
|
return Map{}, fmt.Errorf("speaker map rule %d for speaker %q must include at least one match string", index, rule.Speaker)
|
||||||
|
}
|
||||||
|
for matchIndex, match := range rule.Match {
|
||||||
|
match = strings.TrimSpace(match)
|
||||||
|
if match == "" {
|
||||||
|
return Map{}, fmt.Errorf("speaker map rule %d for speaker %q contains empty match string at index %d", index, rule.Speaker, matchIndex)
|
||||||
|
}
|
||||||
|
rule.Match[matchIndex] = match
|
||||||
}
|
}
|
||||||
|
|
||||||
input.Speaker = strings.TrimSpace(input.Speaker)
|
rules = append(rules, rule)
|
||||||
if input.Speaker == "" {
|
|
||||||
return Map{}, fmt.Errorf("speaker map entry %q must include speaker", basename)
|
|
||||||
}
|
|
||||||
inputs[basename] = input
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Map{inputs: inputs}, nil
|
return Map{rules: rules}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SpeakerForSource returns the canonical speaker for a transcript source path.
|
// SpeakerForSource returns the canonical speaker for a transcript source path.
|
||||||
func (m Map) SpeakerForSource(source string) (string, error) {
|
func (m Map) SpeakerForSource(source string) (string, error) {
|
||||||
basename := filepath.Base(source)
|
basename := filepath.Base(source)
|
||||||
input, ok := m.inputs[basename]
|
normalized := strings.ToLower(basename)
|
||||||
if !ok {
|
for _, rule := range m.rules {
|
||||||
return "", fmt.Errorf("speaker map has no entry for input basename %q", basename)
|
for _, match := range rule.Match {
|
||||||
|
if strings.Contains(normalized, strings.ToLower(match)) {
|
||||||
|
return rule.Speaker, nil
|
||||||
}
|
}
|
||||||
return input.Speaker, nil
|
}
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("speaker map has no match for input basename %q", basename)
|
||||||
}
|
}
|
||||||
|
|||||||
155
internal/speaker/map_test.go
Normal file
155
internal/speaker/map_test.go
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
package speaker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSpeakerForSourceMatchesBasenameCaseInsensitive(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := writeSpeakerMap(t, dir, `match:
|
||||||
|
- speaker: "Eric Rakestraw"
|
||||||
|
match:
|
||||||
|
- "eric_rakestraw"
|
||||||
|
`)
|
||||||
|
|
||||||
|
speakers, err := LoadMap(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load speaker map: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, err := speakers.SpeakerForSource(filepath.Join(dir, "2026-04-19-Eric_Rakestraw.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("resolve speaker: %v", err)
|
||||||
|
}
|
||||||
|
if got != "Eric Rakestraw" {
|
||||||
|
t.Fatalf("speaker = %q, want %q", got, "Eric Rakestraw")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpeakerForSourceUsesBasenameOnly(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := writeSpeakerMap(t, dir, `match:
|
||||||
|
- speaker: "Directory Match"
|
||||||
|
match:
|
||||||
|
- "speaker-dir"
|
||||||
|
`)
|
||||||
|
|
||||||
|
speakers, err := LoadMap(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load speaker map: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = speakers.SpeakerForSource(filepath.Join(dir, "speaker-dir", "input.json"))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected no match")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), `input.json`) {
|
||||||
|
t.Fatalf("expected basename in error, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpeakerForSourceUsesFirstMatchingRule(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := writeSpeakerMap(t, dir, `match:
|
||||||
|
- speaker: "First"
|
||||||
|
match:
|
||||||
|
- "adam"
|
||||||
|
- speaker: "Second"
|
||||||
|
match:
|
||||||
|
- "adam_rakestraw"
|
||||||
|
`)
|
||||||
|
|
||||||
|
speakers, err := LoadMap(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load speaker map: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, err := speakers.SpeakerForSource("2026-04-19-Adam_Rakestraw.json")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("resolve speaker: %v", err)
|
||||||
|
}
|
||||||
|
if got != "First" {
|
||||||
|
t.Fatalf("speaker = %q, want %q", got, "First")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadMapValidation(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
content string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "missing top-level match",
|
||||||
|
content: `inputs: {}`,
|
||||||
|
want: "must contain at least one match rule",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty match list",
|
||||||
|
content: `match: []`,
|
||||||
|
want: "must contain at least one match rule",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty speaker",
|
||||||
|
content: `match:
|
||||||
|
- speaker: ""
|
||||||
|
match: ["eric"]
|
||||||
|
`,
|
||||||
|
want: "must include speaker",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty rule match list",
|
||||||
|
content: `match:
|
||||||
|
- speaker: "Eric"
|
||||||
|
match: []
|
||||||
|
`,
|
||||||
|
want: "must include at least one match string",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty match string",
|
||||||
|
content: `match:
|
||||||
|
- speaker: "Eric"
|
||||||
|
match: [" "]
|
||||||
|
`,
|
||||||
|
want: "contains empty match string",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "duplicate speaker",
|
||||||
|
content: `match:
|
||||||
|
- speaker: "Eric"
|
||||||
|
match: ["eric"]
|
||||||
|
- speaker: "Eric"
|
||||||
|
match: ["rakestraw"]
|
||||||
|
`,
|
||||||
|
want: `duplicate speaker "Eric"`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := writeSpeakerMap(t, dir, test.content)
|
||||||
|
|
||||||
|
_, err := LoadMap(path)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), test.want) {
|
||||||
|
t.Fatalf("expected error to contain %q, got %v", test.want, err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSpeakerMap(t *testing.T, dir string, content string) string {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "speakers.yml")
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o600); err != nil {
|
||||||
|
t.Fatalf("write speaker map: %v", err)
|
||||||
|
}
|
||||||
|
return path
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user