package trim import ( "encoding/json" "fmt" "gitea.maximumdirect.net/eric/seriatim/schema" ) const ( SchemaMinimal = "seriatim-minimal" SchemaIntermediate = "seriatim-intermediate" SchemaFull = "seriatim-full" ) // Artifact stores a parsed seriatim output artifact of one supported schema. type Artifact struct { Schema string Full *schema.Transcript Intermediate *schema.IntermediateTranscript Minimal *schema.MinimalTranscript } // ApplyArtifactResult contains trimmed artifact output and ID mapping metadata. type ApplyArtifactResult struct { Artifact Artifact OldToNewID map[int]int RemovedIDs []int OverlapGroupsRecomputed bool } // ParseArtifactJSON parses and validates a serialized seriatim output artifact. func ParseArtifactJSON(data []byte) (Artifact, error) { var full schema.Transcript if err := json.Unmarshal(data, &full); err == nil { if err := schema.ValidateTranscript(full); err == nil { return Artifact{ Schema: SchemaFull, Full: &full, }, nil } } var intermediate schema.IntermediateTranscript if err := json.Unmarshal(data, &intermediate); err == nil { if err := schema.ValidateIntermediateTranscript(intermediate); err == nil { return Artifact{ Schema: SchemaIntermediate, Intermediate: &intermediate, }, nil } } var minimal schema.MinimalTranscript if err := json.Unmarshal(data, &minimal); err == nil { if err := schema.ValidateMinimalTranscript(minimal); err == nil { return Artifact{ Schema: SchemaMinimal, Minimal: &minimal, }, nil } } return Artifact{}, fmt.Errorf("input JSON is not a valid seriatim output artifact") } // ValidateArtifact validates an artifact against its declared schema. func ValidateArtifact(artifact Artifact) error { switch artifact.Schema { case SchemaFull: if artifact.Full == nil { return fmt.Errorf("full artifact payload is missing") } return schema.ValidateTranscript(*artifact.Full) case SchemaIntermediate: if artifact.Intermediate == nil { return fmt.Errorf("intermediate artifact payload is missing") } return schema.ValidateIntermediateTranscript(*artifact.Intermediate) case SchemaMinimal: if artifact.Minimal == nil { return fmt.Errorf("minimal artifact payload is missing") } return schema.ValidateMinimalTranscript(*artifact.Minimal) default: return fmt.Errorf("unsupported artifact schema %q", artifact.Schema) } } // Value returns the artifact value for JSON serialization. func (artifact Artifact) Value() any { switch artifact.Schema { case SchemaFull: if artifact.Full == nil { return schema.Transcript{} } return *artifact.Full case SchemaIntermediate: if artifact.Intermediate == nil { return schema.IntermediateTranscript{} } return *artifact.Intermediate case SchemaMinimal: if artifact.Minimal == nil { return schema.MinimalTranscript{} } return *artifact.Minimal default: return nil } } // SegmentCount returns the number of segments in the artifact. func (artifact Artifact) SegmentCount() int { switch artifact.Schema { case SchemaFull: if artifact.Full == nil { return 0 } return len(artifact.Full.Segments) case SchemaIntermediate: if artifact.Intermediate == nil { return 0 } return len(artifact.Intermediate.Segments) case SchemaMinimal: if artifact.Minimal == nil { return 0 } return len(artifact.Minimal.Segments) default: return 0 } } // Application returns artifact metadata application name. func (artifact Artifact) Application() string { switch artifact.Schema { case SchemaFull: if artifact.Full == nil { return "" } return artifact.Full.Metadata.Application case SchemaIntermediate: if artifact.Intermediate == nil { return "" } return artifact.Intermediate.Metadata.Application case SchemaMinimal: if artifact.Minimal == nil { return "" } return artifact.Minimal.Metadata.Application default: return "" } } // Version returns artifact metadata version. func (artifact Artifact) Version() string { switch artifact.Schema { case SchemaFull: if artifact.Full == nil { return "" } return artifact.Full.Metadata.Version case SchemaIntermediate: if artifact.Intermediate == nil { return "" } return artifact.Intermediate.Metadata.Version case SchemaMinimal: if artifact.Minimal == nil { return "" } return artifact.Minimal.Metadata.Version default: return "" } } // ApplyArtifact trims a parsed artifact while preserving its input schema. func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) { switch input.Schema { case SchemaFull: if input.Full == nil { return ApplyArtifactResult{}, fmt.Errorf("full artifact payload is missing") } result, err := Apply(*input.Full, opts) if err != nil { return ApplyArtifactResult{}, err } out := result.Transcript return ApplyArtifactResult{ Artifact: Artifact{ Schema: SchemaFull, Full: &out, }, OldToNewID: result.OldToNewID, RemovedIDs: result.RemovedIDs, OverlapGroupsRecomputed: true, }, nil case SchemaIntermediate: if input.Intermediate == nil { return ApplyArtifactResult{}, fmt.Errorf("intermediate artifact payload is missing") } result, err := ApplyIntermediate(*input.Intermediate, opts) if err != nil { return ApplyArtifactResult{}, err } out := result.Transcript return ApplyArtifactResult{ Artifact: Artifact{ Schema: SchemaIntermediate, Intermediate: &out, }, OldToNewID: result.OldToNewID, RemovedIDs: result.RemovedIDs, OverlapGroupsRecomputed: false, }, nil case SchemaMinimal: if input.Minimal == nil { return ApplyArtifactResult{}, fmt.Errorf("minimal artifact payload is missing") } result, err := ApplyMinimal(*input.Minimal, opts) if err != nil { return ApplyArtifactResult{}, err } out := result.Transcript return ApplyArtifactResult{ Artifact: Artifact{ Schema: SchemaMinimal, Minimal: &out, }, OldToNewID: result.OldToNewID, RemovedIDs: result.RemovedIDs, OverlapGroupsRecomputed: false, }, nil default: return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema) } } // ConvertArtifact converts a parsed artifact to another supported output schema. func ConvertArtifact(input Artifact, outputSchema string) (Artifact, error) { if outputSchema == "" || outputSchema == input.Schema { return input, nil } switch input.Schema { case SchemaFull: if input.Full == nil { return Artifact{}, fmt.Errorf("full artifact payload is missing") } switch outputSchema { case SchemaIntermediate: out := intermediateFromFull(*input.Full) return Artifact{ Schema: SchemaIntermediate, Intermediate: &out, }, nil case SchemaMinimal: out := minimalFromFull(*input.Full) return Artifact{ Schema: SchemaMinimal, Minimal: &out, }, nil default: return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema) } case SchemaIntermediate: if input.Intermediate == nil { return Artifact{}, fmt.Errorf("intermediate artifact payload is missing") } switch outputSchema { case SchemaMinimal: out := minimalFromIntermediate(*input.Intermediate) return Artifact{ Schema: SchemaMinimal, Minimal: &out, }, nil case SchemaFull: return Artifact{}, fmt.Errorf("cannot emit %q from %q input artifact", SchemaFull, SchemaIntermediate) default: return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema) } case SchemaMinimal: if input.Minimal == nil { return Artifact{}, fmt.Errorf("minimal artifact payload is missing") } switch outputSchema { case SchemaIntermediate: out := intermediateFromMinimal(*input.Minimal) return Artifact{ Schema: SchemaIntermediate, Intermediate: &out, }, nil case SchemaFull: return Artifact{}, fmt.Errorf("cannot emit %q from %q input artifact", SchemaFull, SchemaMinimal) default: return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema) } default: return Artifact{}, fmt.Errorf("unsupported input schema %q", input.Schema) } } func intermediateFromFull(input schema.Transcript) schema.IntermediateTranscript { segments := make([]schema.IntermediateSegment, len(input.Segments)) for index, segment := range input.Segments { segments[index] = schema.IntermediateSegment{ ID: segment.ID, Start: segment.Start, End: segment.End, Speaker: segment.Speaker, Text: segment.Text, Categories: append([]string(nil), segment.Categories...), } } return schema.IntermediateTranscript{ Metadata: schema.IntermediateMetadata{ Application: input.Metadata.Application, Version: input.Metadata.Version, OutputSchema: SchemaIntermediate, }, Segments: segments, } } func minimalFromFull(input schema.Transcript) schema.MinimalTranscript { segments := make([]schema.MinimalSegment, len(input.Segments)) for index, segment := range input.Segments { segments[index] = schema.MinimalSegment{ ID: segment.ID, Start: segment.Start, End: segment.End, Speaker: segment.Speaker, Text: segment.Text, } } return schema.MinimalTranscript{ Metadata: schema.MinimalMetadata{ Application: input.Metadata.Application, Version: input.Metadata.Version, OutputSchema: SchemaMinimal, }, Segments: segments, } } func minimalFromIntermediate(input schema.IntermediateTranscript) schema.MinimalTranscript { segments := make([]schema.MinimalSegment, len(input.Segments)) for index, segment := range input.Segments { segments[index] = schema.MinimalSegment{ ID: segment.ID, Start: segment.Start, End: segment.End, Speaker: segment.Speaker, Text: segment.Text, } } return schema.MinimalTranscript{ Metadata: schema.MinimalMetadata{ Application: input.Metadata.Application, Version: input.Metadata.Version, OutputSchema: SchemaMinimal, }, Segments: segments, } } func intermediateFromMinimal(input schema.MinimalTranscript) schema.IntermediateTranscript { segments := make([]schema.IntermediateSegment, len(input.Segments)) for index, segment := range input.Segments { segments[index] = schema.IntermediateSegment{ ID: segment.ID, Start: segment.Start, End: segment.End, Speaker: segment.Speaker, Text: segment.Text, } } return schema.IntermediateTranscript{ Metadata: schema.IntermediateMetadata{ Application: input.Metadata.Application, Version: input.Metadata.Version, OutputSchema: SchemaIntermediate, }, Segments: segments, } }