Files
seriatim/internal/artifact/transcript.go

102 lines
3.2 KiB
Go

package artifact
import (
"gitea.maximumdirect.net/eric/seriatim/internal/buildinfo"
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/model"
"gitea.maximumdirect.net/eric/seriatim/schema"
)
const ApplicationName = "seriatim"
// FromMerged converts the internal merged transcript model into the public
// serialized output contract.
func FromMerged(cfg config.Config, merged model.MergedTranscript) schema.Transcript {
segments := make([]schema.Segment, len(merged.Segments))
for index, segment := range merged.Segments {
segments[index] = schema.Segment{
ID: segment.ID,
Source: segment.Source,
SourceSegmentIndex: copyIntPtr(segment.SourceSegmentIndex),
SourceRef: segment.SourceRef,
DerivedFrom: append([]string(nil), segment.DerivedFrom...),
Speaker: segment.Speaker,
Start: segment.Start,
End: segment.End,
Text: segment.Text,
Categories: append([]string(nil), segment.Categories...),
OverlapGroupID: segment.OverlapGroupID,
}
}
overlapGroups := make([]schema.OverlapGroup, len(merged.OverlapGroups))
for index, group := range merged.OverlapGroups {
overlapGroups[index] = schema.OverlapGroup{
ID: group.ID,
Start: group.Start,
End: group.End,
Segments: append([]string(nil), group.Segments...),
Speakers: append([]string(nil), group.Speakers...),
Class: group.Class,
Resolution: group.Resolution,
}
}
return schema.Transcript{
Metadata: schema.Metadata{
Application: ApplicationName,
Version: buildinfo.Version,
InputReader: cfg.InputReader,
InputFiles: append([]string(nil), cfg.InputFiles...),
PreprocessingModules: append([]string(nil), cfg.PreprocessingModules...),
PostprocessingModules: append([]string(nil), cfg.PostprocessingModules...),
OutputModules: append([]string(nil), cfg.OutputModules...),
},
Segments: segments,
OverlapGroups: overlapGroups,
}
}
// MinimalFromMerged converts the internal merged transcript model into the
// compact public serialized output contract.
func MinimalFromMerged(cfg config.Config, merged model.MergedTranscript) schema.MinimalTranscript {
segments := make([]schema.MinimalSegment, len(merged.Segments))
for index, segment := range merged.Segments {
segments[index] = schema.MinimalSegment{
ID: segment.ID,
Start: segment.Start,
End: segment.End,
Speaker: segment.Speaker,
Text: segment.Text,
}
}
return schema.MinimalTranscript{
Metadata: schema.MinimalMetadata{
Application: ApplicationName,
Version: buildinfo.Version,
OutputSchema: config.OutputSchemaMinimal,
},
Segments: segments,
}
}
// SelectedFromMerged converts the internal merged transcript model into the
// runtime-selected public output contract.
func SelectedFromMerged(cfg config.Config, merged model.MergedTranscript) any {
switch cfg.OutputSchema {
case config.OutputSchemaMinimal:
return MinimalFromMerged(cfg, merged)
default:
return FromMerged(cfg, merged)
}
}
func copyIntPtr(value *int) *int {
if value == nil {
return nil
}
copied := *value
return &copied
}