Files
seriatim/internal/model/model.go

79 lines
2.7 KiB
Go

package model
// RawTranscript is a loaded input document before canonical normalization.
type RawTranscript struct {
Source string `json:"source"`
Segments []RawSegment `json:"segments"`
}
// RawSegment is the supported WhisperX segment subset.
type RawSegment struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
}
// CanonicalTranscript is a per-speaker transcript in seriatim's internal model.
type CanonicalTranscript struct {
Source string `json:"source"`
Segments []Segment `json:"segments"`
}
// MergedTranscript is the globally merged in-memory transcript.
type MergedTranscript struct {
Segments []Segment `json:"segments"`
OverlapGroups []OverlapGroup `json:"overlap_groups"`
}
// FinalTranscript is the serialized transcript artifact.
type FinalTranscript struct {
Metadata OutputMetadata `json:"metadata"`
Segments []Segment `json:"segments"`
OverlapGroups []OverlapGroup `json:"overlap_groups"`
}
// OutputMetadata records the pipeline configuration that produced an artifact.
type OutputMetadata struct {
Application string `json:"application"`
Version string `json:"version"`
InputReader string `json:"input_reader"`
InputFiles []string `json:"input_files"`
PreprocessingModules []string `json:"preprocessing_modules"`
PostprocessingModules []string `json:"postprocessing_modules"`
OutputModules []string `json:"output_modules"`
}
// Segment is the canonical transcript segment shape used by the framework.
type Segment struct {
ID int `json:"id,omitempty"`
InternalRef string `json:"internal_ref,omitempty"`
Source string `json:"source"`
SourceSegmentIndex int `json:"source_segment_index"`
Speaker string `json:"speaker"`
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
Words []Word `json:"words,omitempty"`
OverlapGroupID int `json:"overlap_group_id,omitempty"`
}
// Word preserves optional word-level timing data.
type Word struct {
Text string `json:"text"`
Start float64 `json:"start"`
End float64 `json:"end"`
Score float64 `json:"score,omitempty"`
Speaker string `json:"speaker,omitempty"`
}
// OverlapGroup describes a detected overlapping speech region.
type OverlapGroup struct {
ID int `json:"id"`
Start float64 `json:"start"`
End float64 `json:"end"`
Segments []string `json:"segments"`
Speakers []string `json:"speakers"`
Class string `json:"class"`
Resolution string `json:"resolution"`
}