Add trim report output

This commit is contained in:
2026-05-08 14:56:24 +00:00
parent ac3dcf2557
commit c48b02d2ec
3 changed files with 228 additions and 10 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"os" "os"
"sort"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@@ -12,6 +13,29 @@ import (
triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim" triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim"
) )
type trimAuditReport struct {
Operation string `json:"operation"`
InputFile string `json:"input_file"`
OutputFile string `json:"output_file"`
InputSchema string `json:"input_schema"`
OutputSchema string `json:"output_schema"`
Mode string `json:"mode"`
Selector string `json:"selector"`
SelectedIDs []int `json:"selected_ids"`
AllowEmpty bool `json:"allow_empty"`
InputSegmentCount int `json:"input_segment_count"`
RetainedSegmentCount int `json:"retained_segment_count"`
RemovedSegmentCount int `json:"removed_segment_count"`
RemovedInputIDs []int `json:"removed_input_ids"`
OldToNewIDMapping []trimIDMapping `json:"old_to_new_id_mapping"`
OverlapGroupsRecomputed bool `json:"overlap_groups_recomputed"`
}
type trimIDMapping struct {
OldID int `json:"old_id"`
NewID int `json:"new_id"`
}
func newTrimCommand() *cobra.Command { func newTrimCommand() *cobra.Command {
var opts config.TrimOptions var opts config.TrimOptions
@@ -43,6 +67,8 @@ func newTrimCommand() *cobra.Command {
if err != nil { if err != nil {
return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err) return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err)
} }
inputSegmentCount := artifact.SegmentCount()
inputSchema := artifact.Schema
mode := triminternal.ModeKeep mode := triminternal.ModeKeep
if cfg.Mode == "remove" { if cfg.Mode == "remove" {
@@ -77,6 +103,28 @@ func newTrimCommand() *cobra.Command {
} }
if cfg.ReportFile != "" { if cfg.ReportFile != "" {
audit := trimAuditReport{
Operation: "trim",
InputFile: cfg.InputFile,
OutputFile: cfg.OutputFile,
InputSchema: inputSchema,
OutputSchema: outputArtifact.Schema,
Mode: cfg.Mode,
Selector: cfg.Selector,
SelectedIDs: selector.IDs(),
AllowEmpty: cfg.AllowEmpty,
InputSegmentCount: inputSegmentCount,
RetainedSegmentCount: len(trimmed.OldToNewID),
RemovedSegmentCount: len(trimmed.RemovedIDs),
RemovedInputIDs: append([]int(nil), trimmed.RemovedIDs...),
OldToNewIDMapping: orderedIDMapping(trimmed.OldToNewID),
OverlapGroupsRecomputed: trimmed.OverlapGroupsRecomputed,
}
auditJSON, err := json.Marshal(audit)
if err != nil {
return fmt.Errorf("marshal trim audit report: %w", err)
}
rpt := report.Report{ rpt := report.Report{
Metadata: report.Metadata{ Metadata: report.Metadata{
Application: outputArtifact.Application(), Application: outputArtifact.Application(),
@@ -86,7 +134,8 @@ func newTrimCommand() *cobra.Command {
OutputModules: []string{"json"}, OutputModules: []string{"json"},
}, },
Events: []report.Event{ Events: []report.Event{
report.Info("trim", "trim", fmt.Sprintf("mode=%s retained %d segment(s), removed %d segment(s)", cfg.Mode, len(trimmed.OldToNewID), len(trimmed.RemovedIDs))), report.Info("trim", "trim", fmt.Sprintf("trimmed %d input segment(s) into %d output segment(s) with mode=%s", inputSegmentCount, outputArtifact.SegmentCount(), cfg.Mode)),
report.Info("trim", "trim-audit", string(auditJSON)),
report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())), report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())),
report.Info("output", "json", "wrote transcript JSON"), report.Info("output", "json", "wrote transcript JSON"),
}, },
@@ -123,3 +172,20 @@ func writeOutputJSON(path string, value any) error {
enc.SetIndent("", " ") enc.SetIndent("", " ")
return enc.Encode(value) return enc.Encode(value)
} }
func orderedIDMapping(mapping map[int]int) []trimIDMapping {
keys := make([]int, 0, len(mapping))
for oldID := range mapping {
keys = append(keys, oldID)
}
sort.Ints(keys)
pairs := make([]trimIDMapping, 0, len(keys))
for _, oldID := range keys {
pairs = append(pairs, trimIDMapping{
OldID: oldID,
NewID: mapping[oldID],
})
}
return pairs
}

View File

@@ -8,6 +8,7 @@ import (
"testing" "testing"
"gitea.maximumdirect.net/eric/seriatim/internal/config" "gitea.maximumdirect.net/eric/seriatim/internal/config"
"gitea.maximumdirect.net/eric/seriatim/internal/report"
"gitea.maximumdirect.net/eric/seriatim/schema" "gitea.maximumdirect.net/eric/seriatim/schema"
) )
@@ -221,6 +222,125 @@ func TestTrimRejectsNonSeriatimInputArtifacts(t *testing.T) {
} }
} }
func TestTrimReportFileContainsAuditFields(t *testing.T) {
dir := t.TempDir()
input := writeTrimFullFixture(t, dir, "input.json")
output := filepath.Join(dir, "trimmed.json")
reportPath := filepath.Join(dir, "trim-report.json")
err := executeTrim(
"--input-file", input,
"--output-file", output,
"--report-file", reportPath,
"--remove", "4,2",
)
if err != nil {
t.Fatalf("trim failed: %v", err)
}
var rpt report.Report
readJSON(t, reportPath, &rpt)
if len(rpt.Events) == 0 {
t.Fatal("expected report events")
}
if !hasReportEvent(rpt, "trim", "trim", "trimmed 4 input segment(s) into 2 output segment(s) with mode=remove") {
t.Fatal("expected trim summary event")
}
if !hasReportEvent(rpt, "trim", "validate-output", "validated 2 output segment(s)") {
t.Fatal("expected validation event")
}
audit := extractTrimAuditEvent(t, rpt)
if audit.Operation != "trim" {
t.Fatalf("operation = %q, want trim", audit.Operation)
}
if audit.InputFile != input {
t.Fatalf("input_file = %q, want %q", audit.InputFile, input)
}
if audit.OutputFile != output {
t.Fatalf("output_file = %q, want %q", audit.OutputFile, output)
}
if audit.InputSchema != config.OutputSchemaFull || audit.OutputSchema != config.OutputSchemaFull {
t.Fatalf("schemas = %q -> %q, want full -> full", audit.InputSchema, audit.OutputSchema)
}
if audit.Mode != "remove" {
t.Fatalf("mode = %q, want remove", audit.Mode)
}
if audit.Selector != "4,2" {
t.Fatalf("selector = %q, want %q", audit.Selector, "4,2")
}
assertIntSliceEqual(t, audit.SelectedIDs, []int{2, 4})
if audit.AllowEmpty {
t.Fatal("allow_empty should be false")
}
if audit.InputSegmentCount != 4 || audit.RetainedSegmentCount != 2 || audit.RemovedSegmentCount != 2 {
t.Fatalf("counts = input:%d retained:%d removed:%d, want 4/2/2", audit.InputSegmentCount, audit.RetainedSegmentCount, audit.RemovedSegmentCount)
}
assertIntSliceEqual(t, audit.RemovedInputIDs, []int{2, 4})
if len(audit.OldToNewIDMapping) != 2 {
t.Fatalf("mapping length = %d, want 2", len(audit.OldToNewIDMapping))
}
if audit.OldToNewIDMapping[0].OldID != 1 || audit.OldToNewIDMapping[0].NewID != 1 {
t.Fatalf("mapping[0] = %#v, want old_id=1 new_id=1", audit.OldToNewIDMapping[0])
}
if audit.OldToNewIDMapping[1].OldID != 3 || audit.OldToNewIDMapping[1].NewID != 2 {
t.Fatalf("mapping[1] = %#v, want old_id=3 new_id=2", audit.OldToNewIDMapping[1])
}
if !audit.OverlapGroupsRecomputed {
t.Fatal("expected overlap_groups_recomputed=true for full schema trim")
}
}
func TestTrimReportOldToNewMappingIsDeterministicSorted(t *testing.T) {
dir := t.TempDir()
input := writeTrimFullFixture(t, dir, "input.json")
output := filepath.Join(dir, "trimmed.json")
reportPath := filepath.Join(dir, "trim-report.json")
err := executeTrim(
"--input-file", input,
"--output-file", output,
"--report-file", reportPath,
"--keep", "4,1,3",
)
if err != nil {
t.Fatalf("trim failed: %v", err)
}
var rpt report.Report
readJSON(t, reportPath, &rpt)
audit := extractTrimAuditEvent(t, rpt)
if len(audit.OldToNewIDMapping) != 3 {
t.Fatalf("mapping length = %d, want 3", len(audit.OldToNewIDMapping))
}
for index, expectedOld := range []int{1, 3, 4} {
if audit.OldToNewIDMapping[index].OldID != expectedOld {
t.Fatalf("mapping[%d].old_id = %d, want %d", index, audit.OldToNewIDMapping[index].OldID, expectedOld)
}
}
}
func TestTrimNoReportFileWhenOmitted(t *testing.T) {
dir := t.TempDir()
input := writeTrimFullFixture(t, dir, "input.json")
output := filepath.Join(dir, "trimmed.json")
reportPath := filepath.Join(dir, "trim-report.json")
err := executeTrim(
"--input-file", input,
"--output-file", output,
"--keep", "1",
)
if err != nil {
t.Fatalf("trim failed: %v", err)
}
_, statErr := os.Stat(reportPath)
if !os.IsNotExist(statErr) {
t.Fatalf("expected no report file at %q, got err=%v", reportPath, statErr)
}
}
func executeTrim(args ...string) error { func executeTrim(args ...string) error {
cmd := NewRootCommand() cmd := NewRootCommand()
cmd.SetArgs(append([]string{"trim"}, args...)) cmd.SetArgs(append([]string{"trim"}, args...))
@@ -299,3 +419,31 @@ func assertSequentialIDs(t *testing.T, ids []int) {
} }
} }
} }
func extractTrimAuditEvent(t *testing.T, rpt report.Report) trimAuditReport {
t.Helper()
for _, event := range rpt.Events {
if event.Stage == "trim" && event.Module == "trim-audit" {
var audit trimAuditReport
if err := json.Unmarshal([]byte(event.Message), &audit); err != nil {
t.Fatalf("decode trim audit event: %v", err)
}
return audit
}
}
t.Fatal("missing trim-audit event")
return trimAuditReport{}
}
func assertIntSliceEqual(t *testing.T, got []int, want []int) {
t.Helper()
if len(got) != len(want) {
t.Fatalf("slice length = %d, want %d", len(got), len(want))
}
for index := range got {
if got[index] != want[index] {
t.Fatalf("slice[%d] = %d, want %d (full got=%v, want=%v)", index, got[index], want[index], got, want)
}
}
}

View File

@@ -23,9 +23,10 @@ type Artifact struct {
// ApplyArtifactResult contains trimmed artifact output and ID mapping metadata. // ApplyArtifactResult contains trimmed artifact output and ID mapping metadata.
type ApplyArtifactResult struct { type ApplyArtifactResult struct {
Artifact Artifact Artifact Artifact
OldToNewID map[int]int OldToNewID map[int]int
RemovedIDs []int RemovedIDs []int
OverlapGroupsRecomputed bool
} }
// ParseArtifactJSON parses and validates a serialized seriatim output artifact. // ParseArtifactJSON parses and validates a serialized seriatim output artifact.
@@ -195,8 +196,9 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
Schema: SchemaFull, Schema: SchemaFull,
Full: &out, Full: &out,
}, },
OldToNewID: result.OldToNewID, OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs, RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: true,
}, nil }, nil
case SchemaIntermediate: case SchemaIntermediate:
if input.Intermediate == nil { if input.Intermediate == nil {
@@ -212,8 +214,9 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
Schema: SchemaIntermediate, Schema: SchemaIntermediate,
Intermediate: &out, Intermediate: &out,
}, },
OldToNewID: result.OldToNewID, OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs, RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: false,
}, nil }, nil
case SchemaMinimal: case SchemaMinimal:
if input.Minimal == nil { if input.Minimal == nil {
@@ -229,8 +232,9 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) {
Schema: SchemaMinimal, Schema: SchemaMinimal,
Minimal: &out, Minimal: &out,
}, },
OldToNewID: result.OldToNewID, OldToNewID: result.OldToNewID,
RemovedIDs: result.RemovedIDs, RemovedIDs: result.RemovedIDs,
OverlapGroupsRecomputed: false,
}, nil }, nil
default: default:
return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema) return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema)