310 lines
8.4 KiB
Go
310 lines
8.4 KiB
Go
package config
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
const (
|
|
DefaultInputReader = "json-files"
|
|
DefaultOutputModules = "json"
|
|
DefaultOutputSchema = OutputSchemaSeriatim
|
|
DefaultPreprocessingModules = "validate-raw,normalize-speakers,trim-text"
|
|
DefaultPostprocessingModules = "detect-overlaps,resolve-overlaps,backchannel,filler,coalesce,detect-overlaps,autocorrect,assign-ids,validate-output"
|
|
DefaultOverlapWordRunGap = 0.75
|
|
DefaultWordRunReorderWindow = 1.0
|
|
DefaultCoalesceGap = 3.0
|
|
DefaultCoalesceGapValue = "3.0"
|
|
DefaultBackchannelMaxDuration = 2.0
|
|
DefaultFillerMaxDuration = 1.25
|
|
OverlapWordRunGapEnv = "SERIATIM_OVERLAP_WORD_RUN_GAP"
|
|
WordRunReorderWindowEnv = "SERIATIM_OVERLAP_WORD_RUN_REORDER_WINDOW"
|
|
BackchannelMaxDurationEnv = "SERIATIM_BACKCHANNEL_MAX_DURATION"
|
|
FillerMaxDurationEnv = "SERIATIM_FILLER_MAX_DURATION"
|
|
OutputSchemaSeriatim = "seriatim"
|
|
OutputSchemaMinimal = "minimal"
|
|
)
|
|
|
|
// MergeOptions captures raw CLI option values before validation.
|
|
type MergeOptions struct {
|
|
InputFiles []string
|
|
OutputFile string
|
|
ReportFile string
|
|
SpeakersFile string
|
|
AutocorrectFile string
|
|
InputReader string
|
|
OutputModules string
|
|
OutputSchema string
|
|
PreprocessingModules string
|
|
PostprocessingModules string
|
|
CoalesceGap string
|
|
}
|
|
|
|
// Config is the validated runtime configuration for a merge invocation.
|
|
type Config struct {
|
|
InputFiles []string
|
|
OutputFile string
|
|
ReportFile string
|
|
SpeakersFile string
|
|
AutocorrectFile string
|
|
InputReader string
|
|
OutputModules []string
|
|
OutputSchema string
|
|
PreprocessingModules []string
|
|
PostprocessingModules []string
|
|
OverlapWordRunGap float64
|
|
WordRunReorderWindow float64
|
|
CoalesceGap float64
|
|
BackchannelMaxDuration float64
|
|
FillerMaxDuration float64
|
|
}
|
|
|
|
// NewMergeConfig validates raw merge options and returns normalized config.
|
|
func NewMergeConfig(opts MergeOptions) (Config, error) {
|
|
cfg := Config{
|
|
InputReader: strings.TrimSpace(opts.InputReader),
|
|
OutputModules: nil,
|
|
OutputSchema: strings.TrimSpace(opts.OutputSchema),
|
|
PreprocessingModules: nil,
|
|
PostprocessingModules: nil,
|
|
OverlapWordRunGap: DefaultOverlapWordRunGap,
|
|
WordRunReorderWindow: DefaultWordRunReorderWindow,
|
|
CoalesceGap: DefaultCoalesceGap,
|
|
BackchannelMaxDuration: DefaultBackchannelMaxDuration,
|
|
FillerMaxDuration: DefaultFillerMaxDuration,
|
|
}
|
|
|
|
if cfg.InputReader == "" {
|
|
return Config{}, errors.New("--input-reader is required")
|
|
}
|
|
if cfg.OutputSchema == "" {
|
|
cfg.OutputSchema = DefaultOutputSchema
|
|
}
|
|
if err := validateOutputSchema(cfg.OutputSchema); err != nil {
|
|
return Config{}, err
|
|
}
|
|
|
|
var err error
|
|
cfg.OutputModules, err = parseModuleList(opts.OutputModules)
|
|
if err != nil {
|
|
return Config{}, fmt.Errorf("--output-modules: %w", err)
|
|
}
|
|
cfg.PreprocessingModules, err = parseModuleList(opts.PreprocessingModules)
|
|
if err != nil {
|
|
return Config{}, fmt.Errorf("--preprocessing-modules: %w", err)
|
|
}
|
|
cfg.PostprocessingModules, err = parseModuleList(opts.PostprocessingModules)
|
|
if err != nil {
|
|
return Config{}, fmt.Errorf("--postprocessing-modules: %w", err)
|
|
}
|
|
if len(cfg.OutputModules) == 0 {
|
|
return Config{}, errors.New("--output-modules must include at least one module")
|
|
}
|
|
|
|
cfg.InputFiles, err = normalizeInputFiles(opts.InputFiles)
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
|
|
cfg.OutputFile, err = normalizeOutputPath(opts.OutputFile, "--output-file")
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
|
|
if opts.ReportFile != "" {
|
|
cfg.ReportFile, err = normalizeOutputPath(opts.ReportFile, "--report-file")
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
}
|
|
|
|
cfg.SpeakersFile = filepath.Clean(strings.TrimSpace(opts.SpeakersFile))
|
|
if opts.SpeakersFile == "" {
|
|
cfg.SpeakersFile = ""
|
|
}
|
|
cfg.AutocorrectFile = filepath.Clean(strings.TrimSpace(opts.AutocorrectFile))
|
|
if opts.AutocorrectFile == "" {
|
|
cfg.AutocorrectFile = ""
|
|
}
|
|
|
|
if cfg.SpeakersFile != "" {
|
|
if err := requireFile(cfg.SpeakersFile, "--speakers"); err != nil {
|
|
return Config{}, err
|
|
}
|
|
}
|
|
|
|
if cfg.AutocorrectFile != "" {
|
|
if err := requireFile(cfg.AutocorrectFile, "--autocorrect"); err != nil {
|
|
return Config{}, err
|
|
}
|
|
}
|
|
|
|
cfg.OverlapWordRunGap, err = parseOverlapWordRunGap()
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
cfg.WordRunReorderWindow, err = parseWordRunReorderWindow()
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
cfg.CoalesceGap, err = parseCoalesceGap(opts.CoalesceGap)
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
cfg.BackchannelMaxDuration, err = parseBackchannelMaxDuration()
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
cfg.FillerMaxDuration, err = parseFillerMaxDuration()
|
|
if err != nil {
|
|
return Config{}, err
|
|
}
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
func parseModuleList(value string) ([]string, error) {
|
|
value = strings.TrimSpace(value)
|
|
if value == "" {
|
|
return nil, nil
|
|
}
|
|
|
|
parts := strings.Split(value, ",")
|
|
names := make([]string, 0, len(parts))
|
|
for _, part := range parts {
|
|
name := strings.TrimSpace(part)
|
|
if name == "" {
|
|
return nil, errors.New("module names cannot be empty")
|
|
}
|
|
names = append(names, name)
|
|
}
|
|
return names, nil
|
|
}
|
|
|
|
func validateOutputSchema(value string) error {
|
|
switch value {
|
|
case OutputSchemaSeriatim, OutputSchemaMinimal:
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("--output-schema must be one of %q or %q", OutputSchemaSeriatim, OutputSchemaMinimal)
|
|
}
|
|
}
|
|
|
|
func normalizeInputFiles(paths []string) ([]string, error) {
|
|
if len(paths) == 0 {
|
|
return nil, errors.New("at least one --input-file is required")
|
|
}
|
|
|
|
normalized := make([]string, 0, len(paths))
|
|
seen := make(map[string]struct{}, len(paths))
|
|
for _, path := range paths {
|
|
path = strings.TrimSpace(path)
|
|
if path == "" {
|
|
return nil, errors.New("--input-file cannot be empty")
|
|
}
|
|
|
|
clean := filepath.Clean(path)
|
|
if err := requireFile(clean, "--input-file"); err != nil {
|
|
return nil, err
|
|
}
|
|
if _, exists := seen[clean]; exists {
|
|
return nil, fmt.Errorf("duplicate --input-file %q", clean)
|
|
}
|
|
seen[clean] = struct{}{}
|
|
normalized = append(normalized, clean)
|
|
}
|
|
sort.Strings(normalized)
|
|
return normalized, nil
|
|
}
|
|
|
|
func normalizeOutputPath(path string, flag string) (string, error) {
|
|
path = strings.TrimSpace(path)
|
|
if path == "" {
|
|
return "", fmt.Errorf("%s is required", flag)
|
|
}
|
|
|
|
clean := filepath.Clean(path)
|
|
parent := filepath.Dir(clean)
|
|
stat, err := os.Stat(parent)
|
|
if err != nil {
|
|
return "", fmt.Errorf("%s parent directory %q: %w", flag, parent, err)
|
|
}
|
|
if !stat.IsDir() {
|
|
return "", fmt.Errorf("%s parent path %q is not a directory", flag, parent)
|
|
}
|
|
return clean, nil
|
|
}
|
|
|
|
func requireFile(path string, flag string) error {
|
|
stat, err := os.Stat(path)
|
|
if err != nil {
|
|
return fmt.Errorf("%s %q: %w", flag, path, err)
|
|
}
|
|
if stat.IsDir() {
|
|
return fmt.Errorf("%s %q is a directory, not a file", flag, path)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func parseOverlapWordRunGap() (float64, error) {
|
|
return parsePositiveFloatEnv(OverlapWordRunGapEnv, DefaultOverlapWordRunGap)
|
|
}
|
|
|
|
func parseWordRunReorderWindow() (float64, error) {
|
|
return parsePositiveFloatEnv(WordRunReorderWindowEnv, DefaultWordRunReorderWindow)
|
|
}
|
|
|
|
func parseBackchannelMaxDuration() (float64, error) {
|
|
return parsePositiveFloatEnv(BackchannelMaxDurationEnv, DefaultBackchannelMaxDuration)
|
|
}
|
|
|
|
func parseFillerMaxDuration() (float64, error) {
|
|
return parsePositiveFloatEnv(FillerMaxDurationEnv, DefaultFillerMaxDuration)
|
|
}
|
|
|
|
func parseCoalesceGap(value string) (float64, error) {
|
|
value = strings.TrimSpace(value)
|
|
if value == "" {
|
|
return DefaultCoalesceGap, nil
|
|
}
|
|
|
|
gap, err := strconv.ParseFloat(value, 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("--coalesce-gap must be a non-negative number of seconds: %w", err)
|
|
}
|
|
if gap < 0 {
|
|
return 0, fmt.Errorf("--coalesce-gap must be non-negative")
|
|
}
|
|
return gap, nil
|
|
}
|
|
|
|
func parsePositiveFloatEnv(name string, defaultValue float64) (float64, error) {
|
|
value := strings.TrimSpace(os.Getenv(name))
|
|
if value == "" {
|
|
return defaultValue, nil
|
|
}
|
|
|
|
parsed, err := strconv.ParseFloat(value, 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("%s must be a positive number of seconds: %w", name, err)
|
|
}
|
|
if parsed <= 0 {
|
|
return 0, fmt.Errorf("%s must be positive", name)
|
|
}
|
|
return parsed, nil
|
|
}
|
|
|
|
func contains(values []string, target string) bool {
|
|
for _, value := range values {
|
|
if value == target {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|