Add trim CLI command
This commit is contained in:
@@ -95,6 +95,9 @@ func Apply(input schema.Transcript, opts Options) (Result, error) {
|
||||
}
|
||||
|
||||
kept, groups := recomputeOverlapGroups(kept)
|
||||
if groups == nil {
|
||||
groups = make([]schema.OverlapGroup, 0)
|
||||
}
|
||||
|
||||
out := copyTranscript(input)
|
||||
out.Segments = kept
|
||||
@@ -278,7 +281,7 @@ func validateSelectedIDsExist(selected []int, idIndex map[int]int) error {
|
||||
|
||||
func recomputeOverlapGroups(segments []schema.Segment) ([]schema.Segment, []schema.OverlapGroup) {
|
||||
if len(segments) == 0 {
|
||||
return segments, nil
|
||||
return segments, make([]schema.OverlapGroup, 0)
|
||||
}
|
||||
|
||||
modelSegments := make([]model.Segment, len(segments))
|
||||
|
||||
387
internal/trim/artifact.go
Normal file
387
internal/trim/artifact.go
Normal file
@@ -0,0 +1,387 @@
|
||||
package trim
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
SchemaMinimal = "seriatim-minimal"
|
||||
SchemaIntermediate = "seriatim-intermediate"
|
||||
SchemaFull = "seriatim-full"
|
||||
)
|
||||
|
||||
// Artifact stores a parsed seriatim output artifact of one supported schema.
|
||||
type Artifact struct {
|
||||
Schema string
|
||||
Full *schema.Transcript
|
||||
Intermediate *schema.IntermediateTranscript
|
||||
Minimal *schema.MinimalTranscript
|
||||
}
|
||||
|
||||
// ApplyArtifactResult contains trimmed artifact output and ID mapping metadata.
|
||||
type ApplyArtifactResult struct {
|
||||
Artifact Artifact
|
||||
OldToNewID map[int]int
|
||||
RemovedIDs []int
|
||||
}
|
||||
|
||||
// ParseArtifactJSON parses and validates a serialized seriatim output artifact.
|
||||
func ParseArtifactJSON(data []byte) (Artifact, error) {
|
||||
var full schema.Transcript
|
||||
if err := json.Unmarshal(data, &full); err == nil {
|
||||
if err := schema.ValidateTranscript(full); err == nil {
|
||||
return Artifact{
|
||||
Schema: SchemaFull,
|
||||
Full: &full,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
var intermediate schema.IntermediateTranscript
|
||||
if err := json.Unmarshal(data, &intermediate); err == nil {
|
||||
if err := schema.ValidateIntermediateTranscript(intermediate); err == nil {
|
||||
return Artifact{
|
||||
Schema: SchemaIntermediate,
|
||||
Intermediate: &intermediate,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
var minimal schema.MinimalTranscript
|
||||
if err := json.Unmarshal(data, &minimal); err == nil {
|
||||
if err := schema.ValidateMinimalTranscript(minimal); err == nil {
|
||||
return Artifact{
|
||||
Schema: SchemaMinimal,
|
||||
Minimal: &minimal,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return Artifact{}, fmt.Errorf("input JSON is not a valid seriatim output artifact")
|
||||
}
|
||||
|
||||
// ValidateArtifact validates an artifact against its declared schema.
|
||||
func ValidateArtifact(artifact Artifact) error {
|
||||
switch artifact.Schema {
|
||||
case SchemaFull:
|
||||
if artifact.Full == nil {
|
||||
return fmt.Errorf("full artifact payload is missing")
|
||||
}
|
||||
return schema.ValidateTranscript(*artifact.Full)
|
||||
case SchemaIntermediate:
|
||||
if artifact.Intermediate == nil {
|
||||
return fmt.Errorf("intermediate artifact payload is missing")
|
||||
}
|
||||
return schema.ValidateIntermediateTranscript(*artifact.Intermediate)
|
||||
case SchemaMinimal:
|
||||
if artifact.Minimal == nil {
|
||||
return fmt.Errorf("minimal artifact payload is missing")
|
||||
}
|
||||
return schema.ValidateMinimalTranscript(*artifact.Minimal)
|
||||
default:
|
||||
return fmt.Errorf("unsupported artifact schema %q", artifact.Schema)
|
||||
}
|
||||
}
|
||||
|
||||
// Value returns the artifact value for JSON serialization.
|
||||
func (artifact Artifact) Value() any {
|
||||
switch artifact.Schema {
|
||||
case SchemaFull:
|
||||
if artifact.Full == nil {
|
||||
return schema.Transcript{}
|
||||
}
|
||||
return *artifact.Full
|
||||
case SchemaIntermediate:
|
||||
if artifact.Intermediate == nil {
|
||||
return schema.IntermediateTranscript{}
|
||||
}
|
||||
return *artifact.Intermediate
|
||||
case SchemaMinimal:
|
||||
if artifact.Minimal == nil {
|
||||
return schema.MinimalTranscript{}
|
||||
}
|
||||
return *artifact.Minimal
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SegmentCount returns the number of segments in the artifact.
|
||||
func (artifact Artifact) SegmentCount() int {
|
||||
switch artifact.Schema {
|
||||
case SchemaFull:
|
||||
if artifact.Full == nil {
|
||||
return 0
|
||||
}
|
||||
return len(artifact.Full.Segments)
|
||||
case SchemaIntermediate:
|
||||
if artifact.Intermediate == nil {
|
||||
return 0
|
||||
}
|
||||
return len(artifact.Intermediate.Segments)
|
||||
case SchemaMinimal:
|
||||
if artifact.Minimal == nil {
|
||||
return 0
|
||||
}
|
||||
return len(artifact.Minimal.Segments)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Application returns artifact metadata application name.
|
||||
func (artifact Artifact) Application() string {
|
||||
switch artifact.Schema {
|
||||
case SchemaFull:
|
||||
if artifact.Full == nil {
|
||||
return ""
|
||||
}
|
||||
return artifact.Full.Metadata.Application
|
||||
case SchemaIntermediate:
|
||||
if artifact.Intermediate == nil {
|
||||
return ""
|
||||
}
|
||||
return artifact.Intermediate.Metadata.Application
|
||||
case SchemaMinimal:
|
||||
if artifact.Minimal == nil {
|
||||
return ""
|
||||
}
|
||||
return artifact.Minimal.Metadata.Application
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Version returns artifact metadata version.
|
||||
func (artifact Artifact) Version() string {
|
||||
switch artifact.Schema {
|
||||
case SchemaFull:
|
||||
if artifact.Full == nil {
|
||||
return ""
|
||||
}
|
||||
return artifact.Full.Metadata.Version
|
||||
case SchemaIntermediate:
|
||||
if artifact.Intermediate == nil {
|
||||
return ""
|
||||
}
|
||||
return artifact.Intermediate.Metadata.Version
|
||||
case SchemaMinimal:
|
||||
if artifact.Minimal == nil {
|
||||
return ""
|
||||
}
|
||||
return artifact.Minimal.Metadata.Version
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// ApplyArtifact trims a parsed artifact while preserving its input schema.
|
||||
func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
|
||||
switch input.Schema {
|
||||
case SchemaFull:
|
||||
if input.Full == nil {
|
||||
return ApplyArtifactResult{}, fmt.Errorf("full artifact payload is missing")
|
||||
}
|
||||
result, err := Apply(*input.Full, opts)
|
||||
if err != nil {
|
||||
return ApplyArtifactResult{}, err
|
||||
}
|
||||
out := result.Transcript
|
||||
return ApplyArtifactResult{
|
||||
Artifact: Artifact{
|
||||
Schema: SchemaFull,
|
||||
Full: &out,
|
||||
},
|
||||
OldToNewID: result.OldToNewID,
|
||||
RemovedIDs: result.RemovedIDs,
|
||||
}, nil
|
||||
case SchemaIntermediate:
|
||||
if input.Intermediate == nil {
|
||||
return ApplyArtifactResult{}, fmt.Errorf("intermediate artifact payload is missing")
|
||||
}
|
||||
result, err := ApplyIntermediate(*input.Intermediate, opts)
|
||||
if err != nil {
|
||||
return ApplyArtifactResult{}, err
|
||||
}
|
||||
out := result.Transcript
|
||||
return ApplyArtifactResult{
|
||||
Artifact: Artifact{
|
||||
Schema: SchemaIntermediate,
|
||||
Intermediate: &out,
|
||||
},
|
||||
OldToNewID: result.OldToNewID,
|
||||
RemovedIDs: result.RemovedIDs,
|
||||
}, nil
|
||||
case SchemaMinimal:
|
||||
if input.Minimal == nil {
|
||||
return ApplyArtifactResult{}, fmt.Errorf("minimal artifact payload is missing")
|
||||
}
|
||||
result, err := ApplyMinimal(*input.Minimal, opts)
|
||||
if err != nil {
|
||||
return ApplyArtifactResult{}, err
|
||||
}
|
||||
out := result.Transcript
|
||||
return ApplyArtifactResult{
|
||||
Artifact: Artifact{
|
||||
Schema: SchemaMinimal,
|
||||
Minimal: &out,
|
||||
},
|
||||
OldToNewID: result.OldToNewID,
|
||||
RemovedIDs: result.RemovedIDs,
|
||||
}, nil
|
||||
default:
|
||||
return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema)
|
||||
}
|
||||
}
|
||||
|
||||
// ConvertArtifact converts a parsed artifact to another supported output schema.
|
||||
func ConvertArtifact(input Artifact, outputSchema string) (Artifact, error) {
|
||||
if outputSchema == "" || outputSchema == input.Schema {
|
||||
return input, nil
|
||||
}
|
||||
|
||||
switch input.Schema {
|
||||
case SchemaFull:
|
||||
if input.Full == nil {
|
||||
return Artifact{}, fmt.Errorf("full artifact payload is missing")
|
||||
}
|
||||
switch outputSchema {
|
||||
case SchemaIntermediate:
|
||||
out := intermediateFromFull(*input.Full)
|
||||
return Artifact{
|
||||
Schema: SchemaIntermediate,
|
||||
Intermediate: &out,
|
||||
}, nil
|
||||
case SchemaMinimal:
|
||||
out := minimalFromFull(*input.Full)
|
||||
return Artifact{
|
||||
Schema: SchemaMinimal,
|
||||
Minimal: &out,
|
||||
}, nil
|
||||
default:
|
||||
return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema)
|
||||
}
|
||||
case SchemaIntermediate:
|
||||
if input.Intermediate == nil {
|
||||
return Artifact{}, fmt.Errorf("intermediate artifact payload is missing")
|
||||
}
|
||||
switch outputSchema {
|
||||
case SchemaMinimal:
|
||||
out := minimalFromIntermediate(*input.Intermediate)
|
||||
return Artifact{
|
||||
Schema: SchemaMinimal,
|
||||
Minimal: &out,
|
||||
}, nil
|
||||
case SchemaFull:
|
||||
return Artifact{}, fmt.Errorf("cannot emit %q from %q input artifact", SchemaFull, SchemaIntermediate)
|
||||
default:
|
||||
return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema)
|
||||
}
|
||||
case SchemaMinimal:
|
||||
if input.Minimal == nil {
|
||||
return Artifact{}, fmt.Errorf("minimal artifact payload is missing")
|
||||
}
|
||||
switch outputSchema {
|
||||
case SchemaIntermediate:
|
||||
out := intermediateFromMinimal(*input.Minimal)
|
||||
return Artifact{
|
||||
Schema: SchemaIntermediate,
|
||||
Intermediate: &out,
|
||||
}, nil
|
||||
case SchemaFull:
|
||||
return Artifact{}, fmt.Errorf("cannot emit %q from %q input artifact", SchemaFull, SchemaMinimal)
|
||||
default:
|
||||
return Artifact{}, fmt.Errorf("unsupported output schema %q", outputSchema)
|
||||
}
|
||||
default:
|
||||
return Artifact{}, fmt.Errorf("unsupported input schema %q", input.Schema)
|
||||
}
|
||||
}
|
||||
|
||||
func intermediateFromFull(input schema.Transcript) schema.IntermediateTranscript {
|
||||
segments := make([]schema.IntermediateSegment, len(input.Segments))
|
||||
for index, segment := range input.Segments {
|
||||
segments[index] = schema.IntermediateSegment{
|
||||
ID: segment.ID,
|
||||
Start: segment.Start,
|
||||
End: segment.End,
|
||||
Speaker: segment.Speaker,
|
||||
Text: segment.Text,
|
||||
Categories: append([]string(nil), segment.Categories...),
|
||||
}
|
||||
}
|
||||
return schema.IntermediateTranscript{
|
||||
Metadata: schema.IntermediateMetadata{
|
||||
Application: input.Metadata.Application,
|
||||
Version: input.Metadata.Version,
|
||||
OutputSchema: SchemaIntermediate,
|
||||
},
|
||||
Segments: segments,
|
||||
}
|
||||
}
|
||||
|
||||
func minimalFromFull(input schema.Transcript) schema.MinimalTranscript {
|
||||
segments := make([]schema.MinimalSegment, len(input.Segments))
|
||||
for index, segment := range input.Segments {
|
||||
segments[index] = schema.MinimalSegment{
|
||||
ID: segment.ID,
|
||||
Start: segment.Start,
|
||||
End: segment.End,
|
||||
Speaker: segment.Speaker,
|
||||
Text: segment.Text,
|
||||
}
|
||||
}
|
||||
return schema.MinimalTranscript{
|
||||
Metadata: schema.MinimalMetadata{
|
||||
Application: input.Metadata.Application,
|
||||
Version: input.Metadata.Version,
|
||||
OutputSchema: SchemaMinimal,
|
||||
},
|
||||
Segments: segments,
|
||||
}
|
||||
}
|
||||
|
||||
func minimalFromIntermediate(input schema.IntermediateTranscript) schema.MinimalTranscript {
|
||||
segments := make([]schema.MinimalSegment, len(input.Segments))
|
||||
for index, segment := range input.Segments {
|
||||
segments[index] = schema.MinimalSegment{
|
||||
ID: segment.ID,
|
||||
Start: segment.Start,
|
||||
End: segment.End,
|
||||
Speaker: segment.Speaker,
|
||||
Text: segment.Text,
|
||||
}
|
||||
}
|
||||
return schema.MinimalTranscript{
|
||||
Metadata: schema.MinimalMetadata{
|
||||
Application: input.Metadata.Application,
|
||||
Version: input.Metadata.Version,
|
||||
OutputSchema: SchemaMinimal,
|
||||
},
|
||||
Segments: segments,
|
||||
}
|
||||
}
|
||||
|
||||
func intermediateFromMinimal(input schema.MinimalTranscript) schema.IntermediateTranscript {
|
||||
segments := make([]schema.IntermediateSegment, len(input.Segments))
|
||||
for index, segment := range input.Segments {
|
||||
segments[index] = schema.IntermediateSegment{
|
||||
ID: segment.ID,
|
||||
Start: segment.Start,
|
||||
End: segment.End,
|
||||
Speaker: segment.Speaker,
|
||||
Text: segment.Text,
|
||||
}
|
||||
}
|
||||
return schema.IntermediateTranscript{
|
||||
Metadata: schema.IntermediateMetadata{
|
||||
Application: input.Metadata.Application,
|
||||
Version: input.Metadata.Version,
|
||||
OutputSchema: SchemaIntermediate,
|
||||
},
|
||||
Segments: segments,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user