package normalize import ( "fmt" "path/filepath" "sort" "strings" "gitea.maximumdirect.net/eric/seriatim/internal/artifact" "gitea.maximumdirect.net/eric/seriatim/internal/buildinfo" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/schema" ) // Build converts parsed normalize input into a selected seriatim output schema. func Build(parsed ParsedTranscript, cfg config.NormalizeConfig) (any, error) { ordered := sortedSegments(parsed.Segments) switch cfg.OutputSchema { case config.OutputSchemaMinimal: output := buildMinimal(ordered) if err := schema.ValidateMinimalTranscript(output); err != nil { return nil, fmt.Errorf("validate normalize output: %w", err) } return output, nil case config.OutputSchemaIntermediate: output := buildIntermediate(ordered) if err := schema.ValidateIntermediateTranscript(output); err != nil { return nil, fmt.Errorf("validate normalize output: %w", err) } return output, nil case config.OutputSchemaFull: output := buildFull(ordered, cfg) if err := schema.ValidateTranscript(output); err != nil { return nil, fmt.Errorf("validate normalize output: %w", err) } return output, nil default: return nil, fmt.Errorf("unsupported output schema %q", cfg.OutputSchema) } } func sortedSegments(input []InputSegment) []InputSegment { ordered := make([]InputSegment, len(input)) copy(ordered, input) sort.SliceStable(ordered, func(i, j int) bool { left := ordered[i] right := ordered[j] if left.Start != right.Start { return left.Start < right.Start } if left.End != right.End { return left.End < right.End } if left.InputIndex != right.InputIndex { return left.InputIndex < right.InputIndex } return left.Speaker < right.Speaker }) return ordered } func buildMinimal(segments []InputSegment) schema.MinimalTranscript { outputSegments := make([]schema.MinimalSegment, len(segments)) for index, segment := range segments { outputSegments[index] = schema.MinimalSegment{ ID: index + 1, Start: segment.Start, End: segment.End, Speaker: segment.Speaker, Text: segment.Text, } } return schema.MinimalTranscript{ Metadata: schema.MinimalMetadata{ Application: artifact.ApplicationName, Version: buildinfo.Version, OutputSchema: config.OutputSchemaMinimal, }, Segments: outputSegments, } } func buildIntermediate(segments []InputSegment) schema.IntermediateTranscript { outputSegments := make([]schema.IntermediateSegment, len(segments)) for index, segment := range segments { outputSegments[index] = schema.IntermediateSegment{ ID: index + 1, Start: segment.Start, End: segment.End, Speaker: segment.Speaker, Text: segment.Text, Categories: append([]string(nil), segment.Categories...), } } return schema.IntermediateTranscript{ Metadata: schema.IntermediateMetadata{ Application: artifact.ApplicationName, Version: buildinfo.Version, OutputSchema: config.OutputSchemaIntermediate, }, Segments: outputSegments, } } func buildFull(segments []InputSegment, cfg config.NormalizeConfig) schema.Transcript { defaultSource := filepath.Base(cfg.InputFile) outputSegments := make([]schema.Segment, len(segments)) for index, segment := range segments { source := strings.TrimSpace(segment.Source) if source == "" { source = defaultSource } sourceSegmentIndex := copyIntPtr(segment.SourceSegmentIndex) if sourceSegmentIndex == nil { fallback := segment.InputIndex sourceSegmentIndex = &fallback } outputSegments[index] = schema.Segment{ ID: index + 1, Source: source, SourceSegmentIndex: sourceSegmentIndex, SourceRef: segment.SourceRef, DerivedFrom: append([]string(nil), segment.DerivedFrom...), Speaker: segment.Speaker, Start: segment.Start, End: segment.End, Text: segment.Text, Categories: append([]string(nil), segment.Categories...), } } return schema.Transcript{ Metadata: schema.Metadata{ Application: artifact.ApplicationName, Version: buildinfo.Version, InputReader: "normalize-input", InputFiles: []string{cfg.InputFile}, PreprocessingModules: []string{}, PostprocessingModules: []string{}, OutputModules: append([]string(nil), cfg.OutputModules...), }, Segments: outputSegments, OverlapGroups: []schema.OverlapGroup{}, } } func copyIntPtr(value *int) *int { if value == nil { return nil } copied := *value return &copied }