Files
seriatim/internal/autocorrect/autocorrect.go

133 lines
3.5 KiB
Go

package autocorrect
import (
"fmt"
"os"
"strings"
"gopkg.in/yaml.v3"
)
// Rules stores ordered autocorrect replacement rules.
type Rules struct {
rules []Rule
}
// Rule replaces ordered match strings with a canonical target.
type Rule struct {
Target string `yaml:"target"`
Match []string `yaml:"match"`
}
type fileSchema struct {
Autocorrect []Rule `yaml:"autocorrect"`
}
// Load parses and validates an autocorrect.yml file.
func Load(path string) (Rules, error) {
data, err := os.ReadFile(path)
if err != nil {
return Rules{}, err
}
var parsed fileSchema
if err := yaml.Unmarshal(data, &parsed); err != nil {
return Rules{}, fmt.Errorf("parse autocorrect file %q: %w", path, err)
}
if len(parsed.Autocorrect) == 0 {
return Rules{}, fmt.Errorf("autocorrect file %q must contain at least one autocorrect rule", path)
}
seenMatches := make(map[string]int)
rules := make([]Rule, 0, len(parsed.Autocorrect))
for ruleIndex, rule := range parsed.Autocorrect {
rule.Target = strings.TrimSpace(rule.Target)
if rule.Target == "" {
return Rules{}, fmt.Errorf("autocorrect rule %d must include target", ruleIndex)
}
if len(rule.Match) == 0 {
return Rules{}, fmt.Errorf("autocorrect rule %d for target %q must include at least one match string", ruleIndex, rule.Target)
}
localMatches := make(map[string]struct{}, len(rule.Match))
for matchIndex, match := range rule.Match {
match = strings.TrimSpace(match)
if match == "" {
return Rules{}, fmt.Errorf("autocorrect rule %d for target %q contains empty match string at index %d", ruleIndex, rule.Target, matchIndex)
}
if _, exists := localMatches[match]; exists {
return Rules{}, fmt.Errorf("autocorrect rule %d for target %q contains duplicate match string %q", ruleIndex, rule.Target, match)
}
localMatches[match] = struct{}{}
if previousRuleIndex, exists := seenMatches[match]; exists {
return Rules{}, fmt.Errorf("autocorrect match string %q appears in both rule %d and rule %d", match, previousRuleIndex, ruleIndex)
}
seenMatches[match] = ruleIndex
rule.Match[matchIndex] = match
}
rules = append(rules, rule)
}
return Rules{rules: rules}, nil
}
// Apply replaces configured whole-token matches and returns the updated text and replacement count.
func (r Rules) Apply(text string) (string, int) {
total := 0
for _, rule := range r.rules {
for _, match := range rule.Match {
var count int
text, count = replaceWholeToken(text, match, rule.Target)
total += count
}
}
return text, total
}
func replaceWholeToken(text string, match string, target string) (string, int) {
if text == "" || match == "" {
return text, 0
}
var builder strings.Builder
replacements := 0
searchStart := 0
writeStart := 0
for {
index := strings.Index(text[searchStart:], match)
if index == -1 {
break
}
index += searchStart
end := index + len(match)
if isTokenBoundary(text, index-1) && isTokenBoundary(text, end) {
builder.WriteString(text[writeStart:index])
builder.WriteString(target)
replacements++
writeStart = end
searchStart = end
continue
}
searchStart = index + 1
}
if replacements == 0 {
return text, 0
}
builder.WriteString(text[writeStart:])
return builder.String(), replacements
}
func isTokenBoundary(text string, index int) bool {
if index < 0 || index >= len(text) {
return true
}
char := text[index]
return !((char >= 'A' && char <= 'Z') || (char >= 'a' && char <= 'z') || (char >= '0' && char <= '9') || char == '_')
}