Add trim report output

This commit is contained in:
2026-05-08 14:56:24 +00:00
parent ac3dcf2557
commit c48b02d2ec
3 changed files with 228 additions and 10 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"os"
"sort"
"github.com/spf13/cobra"
@@ -12,6 +13,29 @@ import (
triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim"
)
type trimAuditReport struct {
Operation string `json:"operation"`
InputFile string `json:"input_file"`
OutputFile string `json:"output_file"`
InputSchema string `json:"input_schema"`
OutputSchema string `json:"output_schema"`
Mode string `json:"mode"`
Selector string `json:"selector"`
SelectedIDs []int `json:"selected_ids"`
AllowEmpty bool `json:"allow_empty"`
InputSegmentCount int `json:"input_segment_count"`
RetainedSegmentCount int `json:"retained_segment_count"`
RemovedSegmentCount int `json:"removed_segment_count"`
RemovedInputIDs []int `json:"removed_input_ids"`
OldToNewIDMapping []trimIDMapping `json:"old_to_new_id_mapping"`
OverlapGroupsRecomputed bool `json:"overlap_groups_recomputed"`
}
type trimIDMapping struct {
OldID int `json:"old_id"`
NewID int `json:"new_id"`
}
func newTrimCommand() *cobra.Command {
var opts config.TrimOptions
@@ -43,6 +67,8 @@ func newTrimCommand() *cobra.Command {
if err != nil {
return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err)
}
inputSegmentCount := artifact.SegmentCount()
inputSchema := artifact.Schema
mode := triminternal.ModeKeep
if cfg.Mode == "remove" {
@@ -77,6 +103,28 @@ func newTrimCommand() *cobra.Command {
}
if cfg.ReportFile != "" {
audit := trimAuditReport{
Operation: "trim",
InputFile: cfg.InputFile,
OutputFile: cfg.OutputFile,
InputSchema: inputSchema,
OutputSchema: outputArtifact.Schema,
Mode: cfg.Mode,
Selector: cfg.Selector,
SelectedIDs: selector.IDs(),
AllowEmpty: cfg.AllowEmpty,
InputSegmentCount: inputSegmentCount,
RetainedSegmentCount: len(trimmed.OldToNewID),
RemovedSegmentCount: len(trimmed.RemovedIDs),
RemovedInputIDs: append([]int(nil), trimmed.RemovedIDs...),
OldToNewIDMapping: orderedIDMapping(trimmed.OldToNewID),
OverlapGroupsRecomputed: trimmed.OverlapGroupsRecomputed,
}
auditJSON, err := json.Marshal(audit)
if err != nil {
return fmt.Errorf("marshal trim audit report: %w", err)
}
rpt := report.Report{
Metadata: report.Metadata{
Application: outputArtifact.Application(),
@@ -86,7 +134,8 @@ func newTrimCommand() *cobra.Command {
OutputModules: []string{"json"},
},
Events: []report.Event{
report.Info("trim", "trim", fmt.Sprintf("mode=%s retained %d segment(s), removed %d segment(s)", cfg.Mode, len(trimmed.OldToNewID), len(trimmed.RemovedIDs))),
report.Info("trim", "trim", fmt.Sprintf("trimmed %d input segment(s) into %d output segment(s) with mode=%s", inputSegmentCount, outputArtifact.SegmentCount(), cfg.Mode)),
report.Info("trim", "trim-audit", string(auditJSON)),
report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())),
report.Info("output", "json", "wrote transcript JSON"),
},
@@ -123,3 +172,20 @@ func writeOutputJSON(path string, value any) error {
enc.SetIndent("", " ")
return enc.Encode(value)
}
func orderedIDMapping(mapping map[int]int) []trimIDMapping {
keys := make([]int, 0, len(mapping))
for oldID := range mapping {
keys = append(keys, oldID)
}
sort.Ints(keys)
pairs := make([]trimIDMapping, 0, len(keys))
for _, oldID := range keys {
pairs = append(pairs, trimIDMapping{
OldID: oldID,
NewID: mapping[oldID],
})
}
return pairs
}