Files
seriatim/internal/trim/artifact.go
Eric Rakestraw e6d3b4a46e
All checks were successful
ci/woodpecker/tag/release Pipeline was successful
Harden trim integration
2026-05-08 15:00:46 +00:00

397 lines
11 KiB
Go

package trim
import (
"encoding/json"
"fmt"
"gitea.maximumdirect.net/eric/seriatim/schema"
)
const (
SchemaMinimal = "seriatim-minimal"
SchemaIntermediate = "seriatim-intermediate"
SchemaFull = "seriatim-full"
)
// Artifact stores a parsed seriatim output artifact of one supported schema.
type Artifact struct {
Schema string
Full *schema.Transcript
Intermediate *schema.IntermediateTranscript
Minimal *schema.MinimalTranscript
}
// ApplyArtifactResult contains trimmed artifact output and ID mapping metadata.
type ApplyArtifactResult struct {
Artifact Artifact
OldToNewID map[int]int
RemovedIDs []int
OverlapGroupsRecomputed bool
}
// ParseArtifactJSON parses and validates a serialized seriatim output artifact.
func ParseArtifactJSON(data []byte) (Artifact, error) {
var decoded any
if err := json.Unmarshal(data, &decoded); err != nil {
return Artifact{}, fmt.Errorf("input JSON is malformed: %w", err)
}
var full schema.Transcript
if err := json.Unmarshal(data, &full); err == nil {
if err := schema.ValidateTranscript(full); err == nil {
return Artifact{
Schema: SchemaFull,
Full: &full,
}, nil
}
}
var intermediate schema.IntermediateTranscript
if err := json.Unmarshal(data, &intermediate); err == nil {
if err := schema.ValidateIntermediateTranscript(intermediate); err == nil {
return Artifact{
Schema: SchemaIntermediate,
Intermediate: &intermediate,
}, nil
}
}
var minimal schema.MinimalTranscript
if err := json.Unmarshal(data, &minimal); err == nil {
if err := schema.ValidateMinimalTranscript(minimal); err == nil {
return Artifact{
Schema: SchemaMinimal,
Minimal: &minimal,
}, nil
}
}
return Artifact{}, fmt.Errorf("input JSON is not a valid seriatim output artifact")
}
// ValidateArtifact validates an artifact against its declared schema.
func ValidateArtifact(artifact Artifact) error {
switch artifact.Schema {
case SchemaFull:
if artifact.Full == nil {
return fmt.Errorf("full artifact payload is missing")
}
return schema.ValidateTranscript(*artifact.Full)
case SchemaIntermediate:
if artifact.Intermediate == nil {
return fmt.Errorf("intermediate artifact payload is missing")
}
return schema.ValidateIntermediateTranscript(*artifact.Intermediate)
case SchemaMinimal:
if artifact.Minimal == nil {
return fmt.Errorf("minimal artifact payload is missing")
}
return schema.ValidateMinimalTranscript(*artifact.Minimal)
default:
return fmt.Errorf("unsupported artifact schema %q", artifact.Schema)
}
}
// Value returns the artifact value for JSON serialization.
func (artifact Artifact) Value() any {
switch artifact.Schema {
case SchemaFull:
if artifact.Full == nil {
return schema.Transcript{}
}
return *artifact.Full
case SchemaIntermediate:
if artifact.Intermediate == nil {
return schema.IntermediateTranscript{}
}
return *artifact.Intermediate
case SchemaMinimal:
if artifact.Minimal == nil {
return schema.MinimalTranscript{}
}
return *artifact.Minimal
default:
return nil
}
}
// SegmentCount returns the number of segments in the artifact.
func (artifact Artifact) SegmentCount() int {
switch artifact.Schema {
case SchemaFull:
if artifact.Full == nil {
return 0
}
return len(artifact.Full.Segments)
case SchemaIntermediate:
if artifact.Intermediate == nil {
return 0
}
return len(artifact.Intermediate.Segments)
case SchemaMinimal:
if artifact.Minimal == nil {
return 0
}
return len(artifact.Minimal.Segments)
default:
return 0
}
}
// Application returns artifact metadata application name.
func (artifact Artifact) Application() string {
switch artifact.Schema {
case SchemaFull:
if artifact.Full == nil {
return ""
}
return artifact.Full.Metadata.Application
case SchemaIntermediate:
if artifact.Intermediate == nil {
return ""
}
return artifact.Intermediate.Metadata.Application
case SchemaMinimal:
if artifact.Minimal == nil {
return ""
}
return artifact.Minimal.Metadata.Application
default:
return ""
}
}
// Version returns artifact metadata version.
func (artifact Artifact) Version() string {
switch artifact.Schema {
case SchemaFull:
if artifact.Full == nil {
return ""
}
return artifact.Full.Metadata.Version
case SchemaIntermediate:
if artifact.Intermediate == nil {
return ""
}
return artifact.Intermediate.Metadata.Version
case SchemaMinimal:
if artifact.Minimal == nil {
return ""
}
return artifact.Minimal.Metadata.Version
default:
return ""
}
}
// ApplyArtifact trims a parsed artifact while preserving its input schema.
func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
switch input.Schema {
case SchemaFull:
if input.Full == nil {
return ApplyArtifactResult{}, fmt.Errorf("full artifact payload is missing")
}
result, err := Apply(*input.Full, opts)
if err != nil {
return ApplyArtifactResult{}, err
}
out := result.Transcript
return ApplyArtifactResult{
Artifact: Artifact{
Schema: SchemaFull,
Full: &out,
},
OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: true,
}, nil
case SchemaIntermediate:
if input.Intermediate == nil {
return ApplyArtifactResult{}, fmt.Errorf("intermediate artifact payload is missing")
}
result, err := ApplyIntermediate(*input.Intermediate, opts)
if err != nil {
return ApplyArtifactResult{}, err
}
out := result.Transcript
return ApplyArtifactResult{
Artifact: Artifact{
Schema: SchemaIntermediate,
Intermediate: &out,
},
OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: false,
}, nil
case SchemaMinimal:
if input.Minimal == nil {
return ApplyArtifactResult{}, fmt.Errorf("minimal artifact payload is missing")
}
result, err := ApplyMinimal(*input.Minimal, opts)
if err != nil {
return ApplyArtifactResult{}, err
}
out := result.Transcript
return ApplyArtifactResult{
Artifact: Artifact{
Schema: SchemaMinimal,
Minimal: &out,
},
OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: false,
}, nil
default:
return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema)
}
}
// ConvertArtifact converts a parsed artifact to another supported output schema.
func ConvertArtifact(input Artifact, outputSchema string) (Artifact, error) {
if outputSchema == "" || outputSchema == input.Schema {
return input, nil
}
switch input.Schema {
case SchemaFull:
if input.Full == nil {
return Artifact{}, fmt.Errorf("full artifact payload is missing")
}
switch outputSchema {
case SchemaIntermediate:
out := intermediateFromFull(*input.Full)
return Artifact{
Schema: SchemaIntermediate,
Intermediate: &out,
}, nil
case SchemaMinimal:
out := minimalFromFull(*input.Full)
return Artifact{
Schema: SchemaMinimal,
Minimal: &out,
}, nil
default:
return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema)
}
case SchemaIntermediate:
if input.Intermediate == nil {
return Artifact{}, fmt.Errorf("intermediate artifact payload is missing")
}
switch outputSchema {
case SchemaMinimal:
out := minimalFromIntermediate(*input.Intermediate)
return Artifact{
Schema: SchemaMinimal,
Minimal: &out,
}, nil
case SchemaFull:
return Artifact{}, fmt.Errorf("cannot emit %q from %q input artifact", SchemaFull, SchemaIntermediate)
default:
return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema)
}
case SchemaMinimal:
if input.Minimal == nil {
return Artifact{}, fmt.Errorf("minimal artifact payload is missing")
}
switch outputSchema {
case SchemaIntermediate:
out := intermediateFromMinimal(*input.Minimal)
return Artifact{
Schema: SchemaIntermediate,
Intermediate: &out,
}, nil
case SchemaFull:
return Artifact{}, fmt.Errorf("cannot emit %q from %q input artifact", SchemaFull, SchemaMinimal)
default:
return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema)
}
default:
return Artifact{}, fmt.Errorf("unsupported input schema %q", input.Schema)
}
}
func intermediateFromFull(input schema.Transcript) schema.IntermediateTranscript {
segments := make([]schema.IntermediateSegment, len(input.Segments))
for index, segment := range input.Segments {
segments[index] = schema.IntermediateSegment{
ID: segment.ID,
Start: segment.Start,
End: segment.End,
Speaker: segment.Speaker,
Text: segment.Text,
Categories: append([]string(nil), segment.Categories...),
}
}
return schema.IntermediateTranscript{
Metadata: schema.IntermediateMetadata{
Application: input.Metadata.Application,
Version: input.Metadata.Version,
OutputSchema: SchemaIntermediate,
},
Segments: segments,
}
}
func minimalFromFull(input schema.Transcript) schema.MinimalTranscript {
segments := make([]schema.MinimalSegment, len(input.Segments))
for index, segment := range input.Segments {
segments[index] = schema.MinimalSegment{
ID: segment.ID,
Start: segment.Start,
End: segment.End,
Speaker: segment.Speaker,
Text: segment.Text,
}
}
return schema.MinimalTranscript{
Metadata: schema.MinimalMetadata{
Application: input.Metadata.Application,
Version: input.Metadata.Version,
OutputSchema: SchemaMinimal,
},
Segments: segments,
}
}
func minimalFromIntermediate(input schema.IntermediateTranscript) schema.MinimalTranscript {
segments := make([]schema.MinimalSegment, len(input.Segments))
for index, segment := range input.Segments {
segments[index] = schema.MinimalSegment{
ID: segment.ID,
Start: segment.Start,
End: segment.End,
Speaker: segment.Speaker,
Text: segment.Text,
}
}
return schema.MinimalTranscript{
Metadata: schema.MinimalMetadata{
Application: input.Metadata.Application,
Version: input.Metadata.Version,
OutputSchema: SchemaMinimal,
},
Segments: segments,
}
}
func intermediateFromMinimal(input schema.MinimalTranscript) schema.IntermediateTranscript {
segments := make([]schema.IntermediateSegment, len(input.Segments))
for index, segment := range input.Segments {
segments[index] = schema.IntermediateSegment{
ID: segment.ID,
Start: segment.Start,
End: segment.End,
Speaker: segment.Speaker,
Text: segment.Text,
}
}
return schema.IntermediateTranscript{
Metadata: schema.IntermediateMetadata{
Application: input.Metadata.Application,
Version: input.Metadata.Version,
OutputSchema: SchemaIntermediate,
},
Segments: segments,
}
}