package model // RawTranscript is a loaded input document before canonical normalization. type RawTranscript struct { Source string `json:"source"` Segments []RawSegment `json:"segments"` } // RawSegment is the supported WhisperX segment subset. type RawSegment struct { Start float64 `json:"start"` End float64 `json:"end"` Text string `json:"text"` Words []Word `json:"words,omitempty"` } // CanonicalTranscript is a per-speaker transcript in seriatim's internal model. type CanonicalTranscript struct { Source string `json:"source"` Segments []Segment `json:"segments"` } // MergedTranscript is the globally merged in-memory transcript. type MergedTranscript struct { Segments []Segment `json:"segments"` OverlapGroups []OverlapGroup `json:"overlap_groups"` } // FinalTranscript is the serialized transcript artifact. type FinalTranscript struct { Metadata OutputMetadata `json:"metadata"` Segments []Segment `json:"segments"` OverlapGroups []OverlapGroup `json:"overlap_groups"` } // OutputMetadata records the pipeline configuration that produced an artifact. type OutputMetadata struct { Application string `json:"application"` Version string `json:"version"` InputReader string `json:"input_reader"` InputFiles []string `json:"input_files"` PreprocessingModules []string `json:"preprocessing_modules"` PostprocessingModules []string `json:"postprocessing_modules"` OutputModules []string `json:"output_modules"` } // Segment is the canonical transcript segment shape used by the framework. type Segment struct { ID int `json:"id,omitempty"` InternalRef string `json:"internal_ref,omitempty"` Source string `json:"source"` SourceSegmentIndex *int `json:"source_segment_index,omitempty"` SourceRef string `json:"source_ref,omitempty"` DerivedFrom []string `json:"derived_from,omitempty"` Speaker string `json:"speaker"` Start float64 `json:"start"` End float64 `json:"end"` Text string `json:"text"` Categories []string `json:"categories,omitempty"` Words []Word `json:"words,omitempty"` OverlapGroupID int `json:"overlap_group_id,omitempty"` } // Word preserves optional word-level timing data. type Word struct { Text string `json:"text"` Start float64 `json:"start"` End float64 `json:"end"` Score float64 `json:"score,omitempty"` Speaker string `json:"speaker,omitempty"` Timed bool `json:"-"` } // OverlapGroup describes a detected overlapping speech region. type OverlapGroup struct { ID int `json:"id"` Start float64 `json:"start"` End float64 `json:"end"` Segments []string `json:"segments"` Speakers []string `json:"speakers"` Class string `json:"class"` Resolution string `json:"resolution"` } // SegmentLess defines the deterministic chronological ordering used by merge // and postprocessing modules. func SegmentLess(left Segment, right Segment) bool { if left.Start != right.Start { return left.Start < right.Start } if left.End != right.End { return left.End < right.End } if left.Source != right.Source { return left.Source < right.Source } if left.SourceSegmentIndex != nil && right.SourceSegmentIndex != nil && *left.SourceSegmentIndex != *right.SourceSegmentIndex { return *left.SourceSegmentIndex < *right.SourceSegmentIndex } if left.SourceSegmentIndex == nil && right.SourceSegmentIndex != nil { return false } if left.SourceSegmentIndex != nil && right.SourceSegmentIndex == nil { return true } if left.SourceRef != right.SourceRef { return left.SourceRef < right.SourceRef } return left.Speaker < right.Speaker }