Cleaned up documentation and development artifcats in advance of release
This commit is contained in:
@@ -13,35 +13,58 @@ import (
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/speaker"
|
||||
)
|
||||
|
||||
type noopPreprocessor struct {
|
||||
name string
|
||||
requires pipeline.ModelState
|
||||
produces pipeline.ModelState
|
||||
type validateRaw struct{}
|
||||
|
||||
func (validateRaw) Name() string {
|
||||
return "validate-raw"
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Name() string {
|
||||
return p.name
|
||||
func (validateRaw) Requires() pipeline.ModelState {
|
||||
return pipeline.StateRaw
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Requires() pipeline.ModelState {
|
||||
return p.requires
|
||||
func (validateRaw) Produces() pipeline.ModelState {
|
||||
return pipeline.StateRaw
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Produces() pipeline.ModelState {
|
||||
return p.produces
|
||||
}
|
||||
|
||||
func (p noopPreprocessor) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
|
||||
func (validateRaw) Process(ctx context.Context, in pipeline.PreprocessState, cfg config.Config) (pipeline.PreprocessState, []report.Event, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return pipeline.PreprocessState{}, nil, err
|
||||
}
|
||||
if in.State != p.requires {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", p.name, p.requires, in.State)
|
||||
if in.State != pipeline.StateRaw {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("preprocessing module %q requires state %q but received %q", "validate-raw", pipeline.StateRaw, in.State)
|
||||
}
|
||||
if len(in.Raw) == 0 {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: no raw transcript(s) to validate")
|
||||
}
|
||||
|
||||
for transcriptIndex, transcript := range in.Raw {
|
||||
if strings.TrimSpace(transcript.Source) == "" {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %d has empty source", transcriptIndex)
|
||||
}
|
||||
for segmentIndex, segment := range transcript.Segments {
|
||||
if segment.Start < 0 {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has negative start", transcript.Source, segmentIndex)
|
||||
}
|
||||
if segment.End < segment.Start {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d has end before start", transcript.Source, segmentIndex)
|
||||
}
|
||||
for wordIndex, word := range segment.Words {
|
||||
if !word.Timed {
|
||||
continue
|
||||
}
|
||||
if word.Start < 0 {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has negative start", transcript.Source, segmentIndex, wordIndex)
|
||||
}
|
||||
if word.End < word.Start {
|
||||
return pipeline.PreprocessState{}, nil, fmt.Errorf("validate-raw: raw transcript %q segment %d word %d has end before start", transcript.Source, segmentIndex, wordIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in.State = p.produces
|
||||
return in, []report.Event{
|
||||
report.Info("preprocessing", p.name, "completed no-op preprocessing module"),
|
||||
report.Info("preprocessing", "validate-raw", fmt.Sprintf("validated %d raw transcript(s)", len(in.Raw))),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user