Implemented the initial Go framework

This commit is contained in:
2026-04-26 13:03:07 -05:00
parent f8a40ea47c
commit 4e027263a4
19 changed files with 1353 additions and 0 deletions

206
internal/config/config.go Normal file
View File

@@ -0,0 +1,206 @@
package config
import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
)
const (
DefaultInputReader = "json-files"
DefaultOutputModules = "json"
DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text"
DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,assign-ids,validate-output"
)
// MergeOptions captures raw CLI option values before validation.
type MergeOptions struct {
InputFiles []string
OutputFile string
ReportFile string
SpeakersFile string
AutocorrectFile string
InputReader string
OutputModules string
PreprocessingModules string
PostprocessingModules string
}
// Config is the validated runtime configuration for a merge invocation.
type Config struct {
InputFiles []string
OutputFile string
ReportFile string
SpeakersFile string
AutocorrectFile string
InputReader string
OutputModules []string
PreprocessingModules []string
PostprocessingModules []string
}
// NewMergeConfig validates raw merge options and returns normalized config.
func NewMergeConfig(opts MergeOptions) (Config, error) {
cfg := Config{
InputReader: strings.TrimSpace(opts.InputReader),
OutputModules: nil,
PreprocessingModules: nil,
PostprocessingModules: nil,
}
if cfg.InputReader == "" {
return Config{}, errors.New("--input-reader is required")
}
var err error
cfg.OutputModules, err = parseModuleList(opts.OutputModules)
if err != nil {
return Config{}, fmt.Errorf("--output-modules: %w", err)
}
cfg.PreprocessingModules, err = parseModuleList(opts.PreprocessingModules)
if err != nil {
return Config{}, fmt.Errorf("--preprocessing-modules: %w", err)
}
cfg.PostprocessingModules, err = parseModuleList(opts.PostprocessingModules)
if err != nil {
return Config{}, fmt.Errorf("--postprocessing-modules: %w", err)
}
if len(cfg.OutputModules) == 0 {
return Config{}, errors.New("--output-modules must include at least one module")
}
cfg.InputFiles, err = normalizeInputFiles(opts.InputFiles)
if err != nil {
return Config{}, err
}
cfg.OutputFile, err = normalizeOutputPath(opts.OutputFile, "--output-file")
if err != nil {
return Config{}, err
}
if opts.ReportFile != "" {
cfg.ReportFile, err = normalizeOutputPath(opts.ReportFile, "--report-file")
if err != nil {
return Config{}, err
}
}
cfg.SpeakersFile = filepath.Clean(strings.TrimSpace(opts.SpeakersFile))
if opts.SpeakersFile == "" {
cfg.SpeakersFile = ""
}
cfg.AutocorrectFile = filepath.Clean(strings.TrimSpace(opts.AutocorrectFile))
if opts.AutocorrectFile == "" {
cfg.AutocorrectFile = ""
}
if contains(cfg.PreprocessingModules, "normalize-speakers") {
if cfg.SpeakersFile == "" {
return Config{}, errors.New("--speakers is required when normalize-speakers is enabled")
}
if err := requireFile(cfg.SpeakersFile, "--speakers"); err != nil {
return Config{}, err
}
} else if cfg.SpeakersFile != "" {
if err := requireFile(cfg.SpeakersFile, "--speakers"); err != nil {
return Config{}, err
}
}
if contains(cfg.PreprocessingModules, "autocorrect") || contains(cfg.PostprocessingModules, "autocorrect") {
if cfg.AutocorrectFile == "" {
return Config{}, errors.New("--autocorrect is required when autocorrect is enabled")
}
if err := requireFile(cfg.AutocorrectFile, "--autocorrect"); err != nil {
return Config{}, err
}
} else if cfg.AutocorrectFile != "" {
if err := requireFile(cfg.AutocorrectFile, "--autocorrect"); err != nil {
return Config{}, err
}
}
return cfg, nil
}
func parseModuleList(value string) ([]string, error) {
value = strings.TrimSpace(value)
if value == "" {
return nil, nil
}
parts := strings.Split(value, ",")
names := make([]string, 0, len(parts))
for _, part := range parts {
name := strings.TrimSpace(part)
if name == "" {
return nil, errors.New("module names cannot be empty")
}
names = append(names, name)
}
return names, nil
}
func normalizeInputFiles(paths []string) ([]string, error) {
if len(paths) == 0 {
return nil, errors.New("at least one --input-file is required")
}
normalized := make([]string, 0, len(paths))
for _, path := range paths {
path = strings.TrimSpace(path)
if path == "" {
return nil, errors.New("--input-file cannot be empty")
}
clean := filepath.Clean(path)
if err := requireFile(clean, "--input-file"); err != nil {
return nil, err
}
normalized = append(normalized, clean)
}
sort.Strings(normalized)
return normalized, nil
}
func normalizeOutputPath(path string, flag string) (string, error) {
path = strings.TrimSpace(path)
if path == "" {
return "", fmt.Errorf("%s is required", flag)
}
clean := filepath.Clean(path)
parent := filepath.Dir(clean)
stat, err := os.Stat(parent)
if err != nil {
return "", fmt.Errorf("%s parent directory %q: %w", flag, parent, err)
}
if !stat.IsDir() {
return "", fmt.Errorf("%s parent path %q is not a directory", flag, parent)
}
return clean, nil
}
func requireFile(path string, flag string) error {
stat, err := os.Stat(path)
if err != nil {
return fmt.Errorf("%s %q: %w", flag, path, err)
}
if stat.IsDir() {
return fmt.Errorf("%s %q is a directory, not a file", flag, path)
}
return nil
}
func contains(values []string, target string) bool {
for _, value := range values {
if value == target {
return true
}
}
return false
}