Add trim report output
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
@@ -12,6 +13,29 @@ import (
|
||||
triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim"
|
||||
)
|
||||
|
||||
type trimAuditReport struct {
|
||||
Operation string `json:"operation"`
|
||||
InputFile string `json:"input_file"`
|
||||
OutputFile string `json:"output_file"`
|
||||
InputSchema string `json:"input_schema"`
|
||||
OutputSchema string `json:"output_schema"`
|
||||
Mode string `json:"mode"`
|
||||
Selector string `json:"selector"`
|
||||
SelectedIDs []int `json:"selected_ids"`
|
||||
AllowEmpty bool `json:"allow_empty"`
|
||||
InputSegmentCount int `json:"input_segment_count"`
|
||||
RetainedSegmentCount int `json:"retained_segment_count"`
|
||||
RemovedSegmentCount int `json:"removed_segment_count"`
|
||||
RemovedInputIDs []int `json:"removed_input_ids"`
|
||||
OldToNewIDMapping []trimIDMapping `json:"old_to_new_id_mapping"`
|
||||
OverlapGroupsRecomputed bool `json:"overlap_groups_recomputed"`
|
||||
}
|
||||
|
||||
type trimIDMapping struct {
|
||||
OldID int `json:"old_id"`
|
||||
NewID int `json:"new_id"`
|
||||
}
|
||||
|
||||
func newTrimCommand() *cobra.Command {
|
||||
var opts config.TrimOptions
|
||||
|
||||
@@ -43,6 +67,8 @@ func newTrimCommand() *cobra.Command {
|
||||
if err != nil {
|
||||
return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err)
|
||||
}
|
||||
inputSegmentCount := artifact.SegmentCount()
|
||||
inputSchema := artifact.Schema
|
||||
|
||||
mode := triminternal.ModeKeep
|
||||
if cfg.Mode == "remove" {
|
||||
@@ -77,6 +103,28 @@ func newTrimCommand() *cobra.Command {
|
||||
}
|
||||
|
||||
if cfg.ReportFile != "" {
|
||||
audit := trimAuditReport{
|
||||
Operation: "trim",
|
||||
InputFile: cfg.InputFile,
|
||||
OutputFile: cfg.OutputFile,
|
||||
InputSchema: inputSchema,
|
||||
OutputSchema: outputArtifact.Schema,
|
||||
Mode: cfg.Mode,
|
||||
Selector: cfg.Selector,
|
||||
SelectedIDs: selector.IDs(),
|
||||
AllowEmpty: cfg.AllowEmpty,
|
||||
InputSegmentCount: inputSegmentCount,
|
||||
RetainedSegmentCount: len(trimmed.OldToNewID),
|
||||
RemovedSegmentCount: len(trimmed.RemovedIDs),
|
||||
RemovedInputIDs: append([]int(nil), trimmed.RemovedIDs...),
|
||||
OldToNewIDMapping: orderedIDMapping(trimmed.OldToNewID),
|
||||
OverlapGroupsRecomputed: trimmed.OverlapGroupsRecomputed,
|
||||
}
|
||||
auditJSON, err := json.Marshal(audit)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal trim audit report: %w", err)
|
||||
}
|
||||
|
||||
rpt := report.Report{
|
||||
Metadata: report.Metadata{
|
||||
Application: outputArtifact.Application(),
|
||||
@@ -86,7 +134,8 @@ func newTrimCommand() *cobra.Command {
|
||||
OutputModules: []string{"json"},
|
||||
},
|
||||
Events: []report.Event{
|
||||
report.Info("trim", "trim", fmt.Sprintf("mode=%s retained %d segment(s), removed %d segment(s)", cfg.Mode, len(trimmed.OldToNewID), len(trimmed.RemovedIDs))),
|
||||
report.Info("trim", "trim", fmt.Sprintf("trimmed %d input segment(s) into %d output segment(s) with mode=%s", inputSegmentCount, outputArtifact.SegmentCount(), cfg.Mode)),
|
||||
report.Info("trim", "trim-audit", string(auditJSON)),
|
||||
report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())),
|
||||
report.Info("output", "json", "wrote transcript JSON"),
|
||||
},
|
||||
@@ -123,3 +172,20 @@ func writeOutputJSON(path string, value any) error {
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(value)
|
||||
}
|
||||
|
||||
func orderedIDMapping(mapping map[int]int) []trimIDMapping {
|
||||
keys := make([]int, 0, len(mapping))
|
||||
for oldID := range mapping {
|
||||
keys = append(keys, oldID)
|
||||
}
|
||||
sort.Ints(keys)
|
||||
|
||||
pairs := make([]trimIDMapping, 0, len(keys))
|
||||
for _, oldID := range keys {
|
||||
pairs = append(pairs, trimIDMapping{
|
||||
OldID: oldID,
|
||||
NewID: mapping[oldID],
|
||||
})
|
||||
}
|
||||
return pairs
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user