package speaker import ( "fmt" "os" "path/filepath" "strings" "gopkg.in/yaml.v3" ) // Map resolves input file basenames to canonical speaker names using ordered substring rules. type Map struct { rules []Rule } // Rule describes one ordered speaker matching rule. type Rule struct { Speaker string `yaml:"speaker"` Match []string `yaml:"match"` } type fileSchema struct { Match []Rule `yaml:"match"` } // LoadMap parses a speakers.yml file. func LoadMap(path string) (Map, error) { data, err := os.ReadFile(path) if err != nil { return Map{}, err } var parsed fileSchema if err := yaml.Unmarshal(data, &parsed); err != nil { return Map{}, fmt.Errorf("parse speaker map %q: %w", path, err) } if len(parsed.Match) == 0 { return Map{}, fmt.Errorf("speaker map %q must contain at least one match rule", path) } seenSpeakers := make(map[string]struct{}, len(parsed.Match)) rules := make([]Rule, 0, len(parsed.Match)) for index, rule := range parsed.Match { rule.Speaker = strings.TrimSpace(rule.Speaker) if rule.Speaker == "" { return Map{}, fmt.Errorf("speaker map rule %d must include speaker", index) } if _, exists := seenSpeakers[rule.Speaker]; exists { return Map{}, fmt.Errorf("speaker map contains duplicate speaker %q", rule.Speaker) } seenSpeakers[rule.Speaker] = struct{}{} if len(rule.Match) == 0 { return Map{}, fmt.Errorf("speaker map rule %d for speaker %q must include at least one match string", index, rule.Speaker) } for matchIndex, match := range rule.Match { match = strings.TrimSpace(match) if match == "" { return Map{}, fmt.Errorf("speaker map rule %d for speaker %q contains empty match string at index %d", index, rule.Speaker, matchIndex) } rule.Match[matchIndex] = match } rules = append(rules, rule) } return Map{rules: rules}, nil } // SpeakerForSource returns the canonical speaker for a transcript source path. func (m Map) SpeakerForSource(source string) (string, error) { basename := filepath.Base(source) normalized := strings.ToLower(basename) for _, rule := range m.rules { for _, match := range rule.Match { if strings.Contains(normalized, strings.ToLower(match)) { return rule.Speaker, nil } } } return "", fmt.Errorf("speaker map has no match for input basename %q", basename) }