57 lines
1.7 KiB
Go
57 lines
1.7 KiB
Go
package pipeline
|
|
|
|
import (
|
|
"context"
|
|
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/model"
|
|
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
|
)
|
|
|
|
// ModelState identifies which representation a preprocessing module consumes.
|
|
type ModelState string
|
|
|
|
const (
|
|
StateRaw ModelState = "raw"
|
|
StateCanonical ModelState = "canonical"
|
|
)
|
|
|
|
// PreprocessState carries transcript data as it moves from raw to canonical.
|
|
type PreprocessState struct {
|
|
State ModelState
|
|
Raw []model.RawTranscript
|
|
Canonical []model.CanonicalTranscript
|
|
}
|
|
|
|
// InputReader loads external input specs into raw transcript documents.
|
|
type InputReader interface {
|
|
Name() string
|
|
Read(ctx context.Context, cfg config.Config) ([]model.RawTranscript, []report.Event, error)
|
|
}
|
|
|
|
// Preprocessor transforms preprocessing state.
|
|
type Preprocessor interface {
|
|
Name() string
|
|
Requires() ModelState
|
|
Produces() ModelState
|
|
Process(ctx context.Context, in PreprocessState, cfg config.Config) (PreprocessState, []report.Event, error)
|
|
}
|
|
|
|
// Merger combines canonical transcripts into one merged transcript.
|
|
type Merger interface {
|
|
Name() string
|
|
Merge(ctx context.Context, in []model.CanonicalTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error)
|
|
}
|
|
|
|
// Postprocessor transforms a merged transcript.
|
|
type Postprocessor interface {
|
|
Name() string
|
|
Process(ctx context.Context, in model.MergedTranscript, cfg config.Config) (model.MergedTranscript, []report.Event, error)
|
|
}
|
|
|
|
// OutputWriter emits final artifacts.
|
|
type OutputWriter interface {
|
|
Name() string
|
|
Write(ctx context.Context, out model.FinalTranscript, rpt report.Report, cfg config.Config) ([]report.Event, error)
|
|
}
|