84 lines
2.3 KiB
Go
84 lines
2.3 KiB
Go
package speaker
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// Map resolves input file basenames to canonical speaker names using ordered substring rules.
|
|
type Map struct {
|
|
rules []Rule
|
|
}
|
|
|
|
// Rule describes one ordered speaker matching rule.
|
|
type Rule struct {
|
|
Speaker string `yaml:"speaker"`
|
|
Match []string `yaml:"match"`
|
|
}
|
|
|
|
type fileSchema struct {
|
|
Match []Rule `yaml:"match"`
|
|
}
|
|
|
|
// LoadMap parses a speakers.yml file.
|
|
func LoadMap(path string) (Map, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return Map{}, err
|
|
}
|
|
|
|
var parsed fileSchema
|
|
if err := yaml.Unmarshal(data, &parsed); err != nil {
|
|
return Map{}, fmt.Errorf("parse speaker map %q: %w", path, err)
|
|
}
|
|
if len(parsed.Match) == 0 {
|
|
return Map{}, fmt.Errorf("speaker map %q must contain at least one match rule", path)
|
|
}
|
|
|
|
seenSpeakers := make(map[string]struct{}, len(parsed.Match))
|
|
rules := make([]Rule, 0, len(parsed.Match))
|
|
for index, rule := range parsed.Match {
|
|
rule.Speaker = strings.TrimSpace(rule.Speaker)
|
|
if rule.Speaker == "" {
|
|
return Map{}, fmt.Errorf("speaker map rule %d must include speaker", index)
|
|
}
|
|
if _, exists := seenSpeakers[rule.Speaker]; exists {
|
|
return Map{}, fmt.Errorf("speaker map contains duplicate speaker %q", rule.Speaker)
|
|
}
|
|
seenSpeakers[rule.Speaker] = struct{}{}
|
|
|
|
if len(rule.Match) == 0 {
|
|
return Map{}, fmt.Errorf("speaker map rule %d for speaker %q must include at least one match string", index, rule.Speaker)
|
|
}
|
|
for matchIndex, match := range rule.Match {
|
|
match = strings.TrimSpace(match)
|
|
if match == "" {
|
|
return Map{}, fmt.Errorf("speaker map rule %d for speaker %q contains empty match string at index %d", index, rule.Speaker, matchIndex)
|
|
}
|
|
rule.Match[matchIndex] = match
|
|
}
|
|
|
|
rules = append(rules, rule)
|
|
}
|
|
|
|
return Map{rules: rules}, nil
|
|
}
|
|
|
|
// SpeakerForSource returns the canonical speaker for a transcript source path.
|
|
func (m Map) SpeakerForSource(source string) (string, error) {
|
|
basename := filepath.Base(source)
|
|
normalized := strings.ToLower(basename)
|
|
for _, rule := range m.rules {
|
|
for _, match := range rule.Match {
|
|
if strings.Contains(normalized, strings.ToLower(match)) {
|
|
return rule.Speaker, nil
|
|
}
|
|
}
|
|
}
|
|
return "", fmt.Errorf("speaker map has no match for input basename %q", basename)
|
|
}
|