package autocorrect import ( "fmt" "os" "strings" "gopkg.in/yaml.v3" ) // Rules stores ordered autocorrect replacement rules. type Rules struct { rules []Rule } // Rule replaces ordered match strings with a canonical target. type Rule struct { Target string `yaml:"target"` Match []string `yaml:"match"` } type fileSchema struct { Autocorrect []Rule `yaml:"autocorrect"` } // Load parses and validates an autocorrect.yml file. func Load(path string) (Rules, error) { data, err := os.ReadFile(path) if err != nil { return Rules{}, err } var parsed fileSchema if err := yaml.Unmarshal(data, &parsed); err != nil { return Rules{}, fmt.Errorf("parse autocorrect file %q: %w", path, err) } if len(parsed.Autocorrect) == 0 { return Rules{}, fmt.Errorf("autocorrect file %q must contain at least one autocorrect rule", path) } seenMatches := make(map[string]int) rules := make([]Rule, 0, len(parsed.Autocorrect)) for ruleIndex, rule := range parsed.Autocorrect { rule.Target = strings.TrimSpace(rule.Target) if rule.Target == "" { return Rules{}, fmt.Errorf("autocorrect rule %d must include target", ruleIndex) } if len(rule.Match) == 0 { return Rules{}, fmt.Errorf("autocorrect rule %d for target %q must include at least one match string", ruleIndex, rule.Target) } localMatches := make(map[string]struct{}, len(rule.Match)) for matchIndex, match := range rule.Match { match = strings.TrimSpace(match) if match == "" { return Rules{}, fmt.Errorf("autocorrect rule %d for target %q contains empty match string at index %d", ruleIndex, rule.Target, matchIndex) } if _, exists := localMatches[match]; exists { return Rules{}, fmt.Errorf("autocorrect rule %d for target %q contains duplicate match string %q", ruleIndex, rule.Target, match) } localMatches[match] = struct{}{} if previousRuleIndex, exists := seenMatches[match]; exists { return Rules{}, fmt.Errorf("autocorrect match string %q appears in both rule %d and rule %d", match, previousRuleIndex, ruleIndex) } seenMatches[match] = ruleIndex rule.Match[matchIndex] = match } rules = append(rules, rule) } return Rules{rules: rules}, nil } // Apply replaces configured whole-token matches and returns the updated text and replacement count. func (r Rules) Apply(text string) (string, int) { total := 0 for _, rule := range r.rules { for _, match := range rule.Match { var count int text, count = replaceWholeToken(text, match, rule.Target) total += count } } return text, total } func replaceWholeToken(text string, match string, target string) (string, int) { if text == "" || match == "" { return text, 0 } var builder strings.Builder replacements := 0 searchStart := 0 writeStart := 0 for { index := strings.Index(text[searchStart:], match) if index == -1 { break } index += searchStart end := index + len(match) if isTokenBoundary(text, index-1) && isTokenBoundary(text, end) { builder.WriteString(text[writeStart:index]) builder.WriteString(target) replacements++ writeStart = end searchStart = end continue } searchStart = index + 1 } if replacements == 0 { return text, 0 } builder.WriteString(text[writeStart:]) return builder.String(), replacements } func isTokenBoundary(text string, index int) bool { if index < 0 || index >= len(text) { return true } char := text[index] return !((char >= 'A' && char <= 'Z') || (char >= 'a' && char <= 'z') || (char >= '0' && char <= '9') || char == '_') }