Add trim report output

This commit is contained in:
2026-05-08 14:56:24 +00:00
parent ac3dcf2557
commit c48b02d2ec
3 changed files with 228 additions and 10 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"os"
"sort"
"github.com/spf13/cobra"
@@ -12,6 +13,29 @@ import (
triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim"
)
type trimAuditReport struct {
Operation string `json:"operation"`
InputFile string `json:"input_file"`
OutputFile string `json:"output_file"`
InputSchema string `json:"input_schema"`
OutputSchema string `json:"output_schema"`
Mode string `json:"mode"`
Selector string `json:"selector"`
SelectedIDs []int `json:"selected_ids"`
AllowEmpty bool `json:"allow_empty"`
InputSegmentCount int `json:"input_segment_count"`
RetainedSegmentCount int `json:"retained_segment_count"`
RemovedSegmentCount int `json:"removed_segment_count"`
RemovedInputIDs []int `json:"removed_input_ids"`
OldToNewIDMapping []trimIDMapping `json:"old_to_new_id_mapping"`
OverlapGroupsRecomputed bool `json:"overlap_groups_recomputed"`
}
type trimIDMapping struct {
OldID int `json:"old_id"`
NewID int `json:"new_id"`
}
func newTrimCommand() *cobra.Command {
var opts config.TrimOptions
@@ -43,6 +67,8 @@ func newTrimCommand() *cobra.Command {
if err != nil {
return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err)
}
inputSegmentCount := artifact.SegmentCount()
inputSchema := artifact.Schema
mode := triminternal.ModeKeep
if cfg.Mode == "remove" {
@@ -77,6 +103,28 @@ func newTrimCommand() *cobra.Command {
}
if cfg.ReportFile != "" {
audit := trimAuditReport{
Operation: "trim",
InputFile: cfg.InputFile,
OutputFile: cfg.OutputFile,
InputSchema: inputSchema,
OutputSchema: outputArtifact.Schema,
Mode: cfg.Mode,
Selector: cfg.Selector,
SelectedIDs: selector.IDs(),
AllowEmpty: cfg.AllowEmpty,
InputSegmentCount: inputSegmentCount,
RetainedSegmentCount: len(trimmed.OldToNewID),
RemovedSegmentCount: len(trimmed.RemovedIDs),
RemovedInputIDs: append([]int(nil), trimmed.RemovedIDs...),
OldToNewIDMapping: orderedIDMapping(trimmed.OldToNewID),
OverlapGroupsRecomputed: trimmed.OverlapGroupsRecomputed,
}
auditJSON, err := json.Marshal(audit)
if err != nil {
return fmt.Errorf("marshal trim audit report: %w", err)
}
rpt := report.Report{
Metadata: report.Metadata{
Application: outputArtifact.Application(),
@@ -86,7 +134,8 @@ func newTrimCommand() *cobra.Command {
OutputModules: []string{"json"},
},
Events: []report.Event{
report.Info("trim", "trim", fmt.Sprintf("mode=%s retained %d segment(s), removed %d segment(s)", cfg.Mode, len(trimmed.OldToNewID), len(trimmed.RemovedIDs))),
report.Info("trim", "trim", fmt.Sprintf("trimmed %d input segment(s) into %d output segment(s) with mode=%s", inputSegmentCount, outputArtifact.SegmentCount(), cfg.Mode)),
report.Info("trim", "trim-audit", string(auditJSON)),
report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())),
report.Info("output", "json", "wrote transcript JSON"),
},
@@ -123,3 +172,20 @@ func writeOutputJSON(path string, value any) error {
enc.SetIndent("", " ")
return enc.Encode(value)
}
func orderedIDMapping(mapping map[int]int) []trimIDMapping {
keys := make([]int, 0, len(mapping))
for oldID := range mapping {
keys = append(keys, oldID)
}
sort.Ints(keys)
pairs := make([]trimIDMapping, 0, len(keys))
for _, oldID := range keys {
pairs = append(pairs, trimIDMapping{
OldID: oldID,
NewID: mapping[oldID],
})
}
return pairs
}

View File

@@ -8,6 +8,7 @@ import (
"testing"
"gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/report"
"gitea.maximumdirect.net/eric/seriatim/schema"
)
@@ -221,6 +222,125 @@ func TestTrimRejectsNonSeriatimInputArtifacts(t *testing.T) {
}
}
func TestTrimReportFileContainsAuditFields(t *testing.T) {
dir := t.TempDir()
input := writeTrimFullFixture(t, dir, "input.json")
output := filepath.Join(dir, "trimmed.json")
reportPath := filepath.Join(dir, "trim-report.json")
err := executeTrim(
"--input-file", input,
"--output-file", output,
"--report-file", reportPath,
"--remove", "4,2",
)
if err != nil {
t.Fatalf("trim failed: %v", err)
}
var rpt report.Report
readJSON(t, reportPath, &rpt)
if len(rpt.Events) == 0 {
t.Fatal("expected report events")
}
if !hasReportEvent(rpt, "trim", "trim", "trimmed 4 input segment(s) into 2 output segment(s) with mode=remove") {
t.Fatal("expected trim summary event")
}
if !hasReportEvent(rpt, "trim", "validate-output", "validated 2 output segment(s)") {
t.Fatal("expected validation event")
}
audit := extractTrimAuditEvent(t, rpt)
if audit.Operation != "trim" {
t.Fatalf("operation = %q, want trim", audit.Operation)
}
if audit.InputFile != input {
t.Fatalf("input_file = %q, want %q", audit.InputFile, input)
}
if audit.OutputFile != output {
t.Fatalf("output_file = %q, want %q", audit.OutputFile, output)
}
if audit.InputSchema != config.OutputSchemaFull || audit.OutputSchema != config.OutputSchemaFull {
t.Fatalf("schemas = %q -> %q, want full -> full", audit.InputSchema, audit.OutputSchema)
}
if audit.Mode != "remove" {
t.Fatalf("mode = %q, want remove", audit.Mode)
}
if audit.Selector != "4,2" {
t.Fatalf("selector = %q, want %q", audit.Selector, "4,2")
}
assertIntSliceEqual(t, audit.SelectedIDs, []int{2, 4})
if audit.AllowEmpty {
t.Fatal("allow_empty should be false")
}
if audit.InputSegmentCount != 4 || audit.RetainedSegmentCount != 2 || audit.RemovedSegmentCount != 2 {
t.Fatalf("counts = input:%d retained:%d removed:%d, want 4/2/2", audit.InputSegmentCount, audit.RetainedSegmentCount, audit.RemovedSegmentCount)
}
assertIntSliceEqual(t, audit.RemovedInputIDs, []int{2, 4})
if len(audit.OldToNewIDMapping) != 2 {
t.Fatalf("mapping length = %d, want 2", len(audit.OldToNewIDMapping))
}
if audit.OldToNewIDMapping[0].OldID != 1 || audit.OldToNewIDMapping[0].NewID != 1 {
t.Fatalf("mapping[0] = %#v, want old_id=1 new_id=1", audit.OldToNewIDMapping[0])
}
if audit.OldToNewIDMapping[1].OldID != 3 || audit.OldToNewIDMapping[1].NewID != 2 {
t.Fatalf("mapping[1] = %#v, want old_id=3 new_id=2", audit.OldToNewIDMapping[1])
}
if !audit.OverlapGroupsRecomputed {
t.Fatal("expected overlap_groups_recomputed=true for full schema trim")
}
}
func TestTrimReportOldToNewMappingIsDeterministicSorted(t *testing.T) {
dir := t.TempDir()
input := writeTrimFullFixture(t, dir, "input.json")
output := filepath.Join(dir, "trimmed.json")
reportPath := filepath.Join(dir, "trim-report.json")
err := executeTrim(
"--input-file", input,
"--output-file", output,
"--report-file", reportPath,
"--keep", "4,1,3",
)
if err != nil {
t.Fatalf("trim failed: %v", err)
}
var rpt report.Report
readJSON(t, reportPath, &rpt)
audit := extractTrimAuditEvent(t, rpt)
if len(audit.OldToNewIDMapping) != 3 {
t.Fatalf("mapping length = %d, want 3", len(audit.OldToNewIDMapping))
}
for index, expectedOld := range []int{1, 3, 4} {
if audit.OldToNewIDMapping[index].OldID != expectedOld {
t.Fatalf("mapping[%d].old_id = %d, want %d", index, audit.OldToNewIDMapping[index].OldID, expectedOld)
}
}
}
func TestTrimNoReportFileWhenOmitted(t *testing.T) {
dir := t.TempDir()
input := writeTrimFullFixture(t, dir, "input.json")
output := filepath.Join(dir, "trimmed.json")
reportPath := filepath.Join(dir, "trim-report.json")
err := executeTrim(
"--input-file", input,
"--output-file", output,
"--keep", "1",
)
if err != nil {
t.Fatalf("trim failed: %v", err)
}
_, statErr := os.Stat(reportPath)
if !os.IsNotExist(statErr) {
t.Fatalf("expected no report file at %q, got err=%v", reportPath, statErr)
}
}
func executeTrim(args ...string) error {
cmd := NewRootCommand()
cmd.SetArgs(append([]string{"trim"}, args...))
@@ -299,3 +419,31 @@ func assertSequentialIDs(t *testing.T, ids []int) {
}
}
}
func extractTrimAuditEvent(t *testing.T, rpt report.Report) trimAuditReport {
t.Helper()
for _, event := range rpt.Events {
if event.Stage == "trim" && event.Module == "trim-audit" {
var audit trimAuditReport
if err := json.Unmarshal([]byte(event.Message), &audit); err != nil {
t.Fatalf("decode trim audit event: %v", err)
}
return audit
}
}
t.Fatal("missing trim-audit event")
return trimAuditReport{}
}
func assertIntSliceEqual(t *testing.T, got []int, want []int) {
t.Helper()
if len(got) != len(want) {
t.Fatalf("slice length = %d, want %d", len(got), len(want))
}
for index := range got {
if got[index] != want[index] {
t.Fatalf("slice[%d] = %d, want %d (full got=%v, want=%v)", index, got[index], want[index], got, want)
}
}
}

View File

@@ -26,6 +26,7 @@ type ApplyArtifactResult struct {
Artifact Artifact
OldToNewID map[int]int
RemovedIDs []int
OverlapGroupsRecomputed bool
}
// ParseArtifactJSON parses and validates a serialized seriatim output artifact.
@@ -197,6 +198,7 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
},
OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: true,
}, nil
case SchemaIntermediate:
if input.Intermediate == nil {
@@ -214,6 +216,7 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
},
OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: false,
}, nil
case SchemaMinimal:
if input.Minimal == nil {
@@ -231,6 +234,7 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
},
OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: false,
}, nil
default:
return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema)