83 lines
2.3 KiB
Go
83 lines
2.3 KiB
Go
package builtin
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/pipeline"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
|
)
|
|
|
|
type noopPreprocessor struct {
|
|
name string
|
|
requires pipeline.ModelState
|
|
produces pipeline.ModelState
|
|
}
|
|
|
|
func (p noopPreprocessor) Name() string {
|
|
return p.name
|
|
}
|
|
|
|
func (p noopPreprocessor) Requires() pipeline.ModelState {
|
|
return p.requires
|
|
}
|
|
|
|
func (p noopPreprocessor) Produces() pipeline.ModelState {
|
|
return p.produces
|
|
}
|
|
|
|
func (p noopPreprocessor) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return pipeline.PreprocessState{}, nil, err
|
|
}
|
|
if in.State != p.requires {
|
|
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", p.name, p.requires, in.State)
|
|
}
|
|
|
|
in.State = p.produces
|
|
return in, []report.Event{
|
|
report.Info("preprocessing", p.name, "completed no-op preprocessing module"),
|
|
}, nil
|
|
}
|
|
|
|
type normalizeSpeakers struct{}
|
|
|
|
func (normalizeSpeakers) Name() string {
|
|
return "normalize-speakers"
|
|
}
|
|
|
|
func (normalizeSpeakers) Requires() pipeline.ModelState {
|
|
return pipeline.StateRaw
|
|
}
|
|
|
|
func (normalizeSpeakers) Produces() pipeline.ModelState {
|
|
return pipeline.StateCanonical
|
|
}
|
|
|
|
func (normalizeSpeakers) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
|
|
if err := ctx.Err(); err != nil {
|
|
return pipeline.PreprocessState{}, nil, err
|
|
}
|
|
if in.State != pipeline.StateRaw {
|
|
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "normalize-speakers", pipeline.StateRaw, in.State)
|
|
}
|
|
|
|
canonical := make([]model.CanonicalTranscript, 0, len(in.Raw))
|
|
for _, raw := range in.Raw {
|
|
canonical = append(canonical, model.CanonicalTranscript{
|
|
Source: raw.Source,
|
|
Segments: nil,
|
|
})
|
|
}
|
|
|
|
return pipeline.PreprocessState{
|
|
State: pipeline.StateCanonical,
|
|
Raw: append([]model.RawTranscript(nil), in.Raw...),
|
|
Canonical: canonical,
|
|
}, []report.Event{
|
|
report.Info("preprocessing", "normalize-speakers", "created placeholder canonical transcript(s)"),
|
|
}, nil
|
|
}
|