package builtin import ( "context" "fmt" "gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/model" "gitea.maximumdirect.net/eric/seriatim/internal/pipeline" "gitea.maximumdirect.net/eric/seriatim/internal/report" ) type noopPreprocessor struct { name string requires pipeline.ModelState produces pipeline.ModelState } func (p noopPreprocessor) Name() string { return p.name } func (p noopPreprocessor) Requires() pipeline.ModelState { return p.requires } func (p noopPreprocessor) Produces() pipeline.ModelState { return p.produces } func (p noopPreprocessor) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) { if err := ctx.Err(); err != nil { return pipeline.PreprocessState{}, nil, err } if in.State != p.requires { return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", p.name, p.requires, in.State) } in.State = p.produces return in, []report.Event{ report.Info("preprocessing", p.name, "completed no-op preprocessing module"), }, nil } type normalizeSpeakers struct{} func (normalizeSpeakers) Name() string { return "normalize-speakers" } func (normalizeSpeakers) Requires() pipeline.ModelState { return pipeline.StateRaw } func (normalizeSpeakers) Produces() pipeline.ModelState { return pipeline.StateCanonical } func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) { if err := ctx.Err(); err != nil { return pipeline.PreprocessState{}, nil, err } if in.State != pipeline.StateRaw { return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "normalize-speakers", pipeline.StateRaw, in.State) } canonical := make([]model.CanonicalTranscript, 0, len(in.Raw)) for _, raw := range in.Raw { canonical = append(canonical, model.CanonicalTranscript{ Source: raw.Source, Segments: nil, }) } return pipeline.PreprocessState{ State: pipeline.StateCanonical, Raw: append([]model.RawTranscript(nil), in.Raw...), Canonical: canonical, }, []report.Event{ report.Info("preprocessing", "normalize-speakers", "created placeholder canonical transcript(s)"), }, nil }