seriatim/internal/model/model.go

package model

// RawTranscript is a loaded input document before canonical normalization.
type RawTranscript struct {
	Source   string       `json:"source"`
	Segments []RawSegment `json:"segments"`
}

// RawSegment is the supported WhisperX segment subset.
type RawSegment struct {
	Start float64 `json:"start"`
	End   float64 `json:"end"`
	Text  string  `json:"text"`
	Words []Word  `json:"words,omitempty"`
}

// CanonicalTranscript is a per-speaker transcript in seriatim's internal model.
type CanonicalTranscript struct {
	Source   string    `json:"source"`
	Segments []Segment `json:"segments"`
}

// MergedTranscript is the globally merged in-memory transcript.
type MergedTranscript struct {
	Segments      []Segment      `json:"segments"`
	OverlapGroups []OverlapGroup `json:"overlap_groups"`
}

// FinalTranscript is the serialized transcript artifact.
type FinalTranscript struct {
	Metadata      OutputMetadata `json:"metadata"`
	Segments      []Segment      `json:"segments"`
	OverlapGroups []OverlapGroup `json:"overlap_groups"`
}

// OutputMetadata records the pipeline configuration that produced an artifact.
type OutputMetadata struct {
	Application           string   `json:"application"`
	Version               string   `json:"version"`
	InputReader           string   `json:"input_reader"`
	InputFiles            []string `json:"input_files"`
	PreprocessingModules  []string `json:"preprocessing_modules"`
	PostprocessingModules []string `json:"postprocessing_modules"`
	OutputModules         []string `json:"output_modules"`
}

// Segment is the canonical transcript segment shape used by the framework.
type Segment struct {
	ID                 int      `json:"id,omitempty"`
	InternalRef        string   `json:"internal_ref,omitempty"`
	Source             string   `json:"source"`
	SourceSegmentIndex *int     `json:"source_segment_index,omitempty"`
	SourceRef          string   `json:"source_ref,omitempty"`
	DerivedFrom        []string `json:"derived_from,omitempty"`
	Speaker            string   `json:"speaker"`
	Start              float64  `json:"start"`
	End                float64  `json:"end"`
	Text               string   `json:"text"`
	Categories         []string `json:"categories,omitempty"`
	Words              []Word   `json:"words,omitempty"`
	OverlapGroupID     int      `json:"overlap_group_id,omitempty"`
}

// Word preserves optional word-level timing data.
type Word struct {
	Text    string  `json:"text"`
	Start   float64 `json:"start"`
	End     float64 `json:"end"`
	Score   float64 `json:"score,omitempty"`
	Speaker string  `json:"speaker,omitempty"`
	Timed   bool    `json:"-"`
}

// OverlapGroup describes a detected overlapping speech region.
type OverlapGroup struct {
	ID         int      `json:"id"`
	Start      float64  `json:"start"`
	End        float64  `json:"end"`
	Segments   []string `json:"segments"`
	Speakers   []string `json:"speakers"`
	Class      string   `json:"class"`
	Resolution string   `json:"resolution"`
}

// SegmentLess defines the deterministic chronological ordering used by merge
// and postprocessing modules.
func SegmentLess(left Segment, right Segment) bool {
	if left.Start != right.Start {
		return left.Start < right.Start
	}
	if left.End != right.End {
		return left.End < right.End
	}
	if left.Source != right.Source {
		return left.Source < right.Source
	}
	if left.SourceSegmentIndex != nil && right.SourceSegmentIndex != nil && *left.SourceSegmentIndex != *right.SourceSegmentIndex {
		return *left.SourceSegmentIndex < *right.SourceSegmentIndex
	}
	if left.SourceSegmentIndex == nil && right.SourceSegmentIndex != nil {
		return false
	}
	if left.SourceSegmentIndex != nil && right.SourceSegmentIndex == nil {
		return true
	}
	if left.SourceRef != right.SourceRef {
		return left.SourceRef < right.SourceRef
	}
	return left.Speaker < right.Speaker
}