diff --git a/internal/cli/trim.go b/internal/cli/trim.go index 7828016..037d68e 100644 --- a/internal/cli/trim.go +++ b/internal/cli/trim.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "os" + "sort" "github.com/spf13/cobra" @@ -12,6 +13,29 @@ import ( triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim" ) +type trimAuditReport struct { + Operation string `json:"operation"` + InputFile string `json:"input_file"` + OutputFile string `json:"output_file"` + InputSchema string `json:"input_schema"` + OutputSchema string `json:"output_schema"` + Mode string `json:"mode"` + Selector string `json:"selector"` + SelectedIDs []int `json:"selected_ids"` + AllowEmpty bool `json:"allow_empty"` + InputSegmentCount int `json:"input_segment_count"` + RetainedSegmentCount int `json:"retained_segment_count"` + RemovedSegmentCount int `json:"removed_segment_count"` + RemovedInputIDs []int `json:"removed_input_ids"` + OldToNewIDMapping []trimIDMapping `json:"old_to_new_id_mapping"` + OverlapGroupsRecomputed bool `json:"overlap_groups_recomputed"` +} + +type trimIDMapping struct { + OldID int `json:"old_id"` + NewID int `json:"new_id"` +} + func newTrimCommand() *cobra.Command { var opts config.TrimOptions @@ -43,6 +67,8 @@ func newTrimCommand() *cobra.Command { if err != nil { return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err) } + inputSegmentCount := artifact.SegmentCount() + inputSchema := artifact.Schema mode := triminternal.ModeKeep if cfg.Mode == "remove" { @@ -77,6 +103,28 @@ func newTrimCommand() *cobra.Command { } if cfg.ReportFile != "" { + audit := trimAuditReport{ + Operation: "trim", + InputFile: cfg.InputFile, + OutputFile: cfg.OutputFile, + InputSchema: inputSchema, + OutputSchema: outputArtifact.Schema, + Mode: cfg.Mode, + Selector: cfg.Selector, + SelectedIDs: selector.IDs(), + AllowEmpty: cfg.AllowEmpty, + InputSegmentCount: inputSegmentCount, + RetainedSegmentCount: len(trimmed.OldToNewID), + RemovedSegmentCount: len(trimmed.RemovedIDs), + RemovedInputIDs: append([]int(nil), trimmed.RemovedIDs...), + OldToNewIDMapping: orderedIDMapping(trimmed.OldToNewID), + OverlapGroupsRecomputed: trimmed.OverlapGroupsRecomputed, + } + auditJSON, err := json.Marshal(audit) + if err != nil { + return fmt.Errorf("marshal trim audit report: %w", err) + } + rpt := report.Report{ Metadata: report.Metadata{ Application: outputArtifact.Application(), @@ -86,7 +134,8 @@ func newTrimCommand() *cobra.Command { OutputModules: []string{"json"}, }, Events: []report.Event{ - report.Info("trim", "trim", fmt.Sprintf("mode=%s retained %d segment(s), removed %d segment(s)", cfg.Mode, len(trimmed.OldToNewID), len(trimmed.RemovedIDs))), + report.Info("trim", "trim", fmt.Sprintf("trimmed %d input segment(s) into %d output segment(s) with mode=%s", inputSegmentCount, outputArtifact.SegmentCount(), cfg.Mode)), + report.Info("trim", "trim-audit", string(auditJSON)), report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())), report.Info("output", "json", "wrote transcript JSON"), }, @@ -123,3 +172,20 @@ func writeOutputJSON(path string, value any) error { enc.SetIndent("", " ") return enc.Encode(value) } + +func orderedIDMapping(mapping map[int]int) []trimIDMapping { + keys := make([]int, 0, len(mapping)) + for oldID := range mapping { + keys = append(keys, oldID) + } + sort.Ints(keys) + + pairs := make([]trimIDMapping, 0, len(keys)) + for _, oldID := range keys { + pairs = append(pairs, trimIDMapping{ + OldID: oldID, + NewID: mapping[oldID], + }) + } + return pairs +} diff --git a/internal/cli/trim_test.go b/internal/cli/trim_test.go index 175cbe6..e32fbfe 100644 --- a/internal/cli/trim_test.go +++ b/internal/cli/trim_test.go @@ -8,6 +8,7 @@ import ( "testing" "gitea.maximumdirect.net/eric/seriatim/internal/config" + "gitea.maximumdirect.net/eric/seriatim/internal/report" "gitea.maximumdirect.net/eric/seriatim/schema" ) @@ -221,6 +222,125 @@ func TestTrimRejectsNonSeriatimInputArtifacts(t *testing.T) { } } +func TestTrimReportFileContainsAuditFields(t *testing.T) { + dir := t.TempDir() + input := writeTrimFullFixture(t, dir, "input.json") + output := filepath.Join(dir, "trimmed.json") + reportPath := filepath.Join(dir, "trim-report.json") + + err := executeTrim( + "--input-file", input, + "--output-file", output, + "--report-file", reportPath, + "--remove", "4,2", + ) + if err != nil { + t.Fatalf("trim failed: %v", err) + } + + var rpt report.Report + readJSON(t, reportPath, &rpt) + if len(rpt.Events) == 0 { + t.Fatal("expected report events") + } + if !hasReportEvent(rpt, "trim", "trim", "trimmed 4 input segment(s) into 2 output segment(s) with mode=remove") { + t.Fatal("expected trim summary event") + } + if !hasReportEvent(rpt, "trim", "validate-output", "validated 2 output segment(s)") { + t.Fatal("expected validation event") + } + + audit := extractTrimAuditEvent(t, rpt) + if audit.Operation != "trim" { + t.Fatalf("operation = %q, want trim", audit.Operation) + } + if audit.InputFile != input { + t.Fatalf("input_file = %q, want %q", audit.InputFile, input) + } + if audit.OutputFile != output { + t.Fatalf("output_file = %q, want %q", audit.OutputFile, output) + } + if audit.InputSchema != config.OutputSchemaFull || audit.OutputSchema != config.OutputSchemaFull { + t.Fatalf("schemas = %q -> %q, want full -> full", audit.InputSchema, audit.OutputSchema) + } + if audit.Mode != "remove" { + t.Fatalf("mode = %q, want remove", audit.Mode) + } + if audit.Selector != "4,2" { + t.Fatalf("selector = %q, want %q", audit.Selector, "4,2") + } + assertIntSliceEqual(t, audit.SelectedIDs, []int{2, 4}) + if audit.AllowEmpty { + t.Fatal("allow_empty should be false") + } + if audit.InputSegmentCount != 4 || audit.RetainedSegmentCount != 2 || audit.RemovedSegmentCount != 2 { + t.Fatalf("counts = input:%d retained:%d removed:%d, want 4/2/2", audit.InputSegmentCount, audit.RetainedSegmentCount, audit.RemovedSegmentCount) + } + assertIntSliceEqual(t, audit.RemovedInputIDs, []int{2, 4}) + if len(audit.OldToNewIDMapping) != 2 { + t.Fatalf("mapping length = %d, want 2", len(audit.OldToNewIDMapping)) + } + if audit.OldToNewIDMapping[0].OldID != 1 || audit.OldToNewIDMapping[0].NewID != 1 { + t.Fatalf("mapping[0] = %#v, want old_id=1 new_id=1", audit.OldToNewIDMapping[0]) + } + if audit.OldToNewIDMapping[1].OldID != 3 || audit.OldToNewIDMapping[1].NewID != 2 { + t.Fatalf("mapping[1] = %#v, want old_id=3 new_id=2", audit.OldToNewIDMapping[1]) + } + if !audit.OverlapGroupsRecomputed { + t.Fatal("expected overlap_groups_recomputed=true for full schema trim") + } +} + +func TestTrimReportOldToNewMappingIsDeterministicSorted(t *testing.T) { + dir := t.TempDir() + input := writeTrimFullFixture(t, dir, "input.json") + output := filepath.Join(dir, "trimmed.json") + reportPath := filepath.Join(dir, "trim-report.json") + + err := executeTrim( + "--input-file", input, + "--output-file", output, + "--report-file", reportPath, + "--keep", "4,1,3", + ) + if err != nil { + t.Fatalf("trim failed: %v", err) + } + + var rpt report.Report + readJSON(t, reportPath, &rpt) + audit := extractTrimAuditEvent(t, rpt) + if len(audit.OldToNewIDMapping) != 3 { + t.Fatalf("mapping length = %d, want 3", len(audit.OldToNewIDMapping)) + } + for index, expectedOld := range []int{1, 3, 4} { + if audit.OldToNewIDMapping[index].OldID != expectedOld { + t.Fatalf("mapping[%d].old_id = %d, want %d", index, audit.OldToNewIDMapping[index].OldID, expectedOld) + } + } +} + +func TestTrimNoReportFileWhenOmitted(t *testing.T) { + dir := t.TempDir() + input := writeTrimFullFixture(t, dir, "input.json") + output := filepath.Join(dir, "trimmed.json") + reportPath := filepath.Join(dir, "trim-report.json") + + err := executeTrim( + "--input-file", input, + "--output-file", output, + "--keep", "1", + ) + if err != nil { + t.Fatalf("trim failed: %v", err) + } + + _, statErr := os.Stat(reportPath) + if !os.IsNotExist(statErr) { + t.Fatalf("expected no report file at %q, got err=%v", reportPath, statErr) + } +} + func executeTrim(args ...string) error { cmd := NewRootCommand() cmd.SetArgs(append([]string{"trim"}, args...)) @@ -299,3 +419,31 @@ func assertSequentialIDs(t *testing.T, ids []int) { } } } + +func extractTrimAuditEvent(t *testing.T, rpt report.Report) trimAuditReport { + t.Helper() + + for _, event := range rpt.Events { + if event.Stage == "trim" && event.Module == "trim-audit" { + var audit trimAuditReport + if err := json.Unmarshal([]byte(event.Message), &audit); err != nil { + t.Fatalf("decode trim audit event: %v", err) + } + return audit + } + } + t.Fatal("missing trim-audit event") + return trimAuditReport{} +} + +func assertIntSliceEqual(t *testing.T, got []int, want []int) { + t.Helper() + if len(got) != len(want) { + t.Fatalf("slice length = %d, want %d", len(got), len(want)) + } + for index := range got { + if got[index] != want[index] { + t.Fatalf("slice[%d] = %d, want %d (full got=%v, want=%v)", index, got[index], want[index], got, want) + } + } +} diff --git a/internal/trim/artifact.go b/internal/trim/artifact.go index 8ed4504..b8e0c27 100644 --- a/internal/trim/artifact.go +++ b/internal/trim/artifact.go @@ -23,9 +23,10 @@ type Artifact struct { // ApplyArtifactResult contains trimmed artifact output and ID mapping metadata. type ApplyArtifactResult struct { - Artifact Artifact - OldToNewID map[int]int - RemovedIDs []int + Artifact Artifact + OldToNewID map[int]int + RemovedIDs []int + OverlapGroupsRecomputed bool } // ParseArtifactJSON parses and validates a serialized seriatim output artifact. @@ -195,8 +196,9 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) { Schema: SchemaFull, Full: &out, }, - OldToNewID: result.OldToNewID, - RemovedIDs: result.RemovedIDs, + OldToNewID: result.OldToNewID, + RemovedIDs: result.RemovedIDs, + OverlapGroupsRecomputed: true, }, nil case SchemaIntermediate: if input.Intermediate == nil { @@ -212,8 +214,9 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) { Schema: SchemaIntermediate, Intermediate: &out, }, - OldToNewID: result.OldToNewID, - RemovedIDs: result.RemovedIDs, + OldToNewID: result.OldToNewID, + RemovedIDs: result.RemovedIDs, + OverlapGroupsRecomputed: false, }, nil case SchemaMinimal: if input.Minimal == nil { @@ -229,8 +232,9 @@ func ApplyArtifact(input Artifact, opts Options) (ApplyArtifactResult, error) { Schema: SchemaMinimal, Minimal: &out, }, - OldToNewID: result.OldToNewID, - RemovedIDs: result.RemovedIDs, + OldToNewID: result.OldToNewID, + RemovedIDs: result.RemovedIDs, + OverlapGroupsRecomputed: false, }, nil default: return ApplyArtifactResult{}, fmt.Errorf("unsupported artifact schema %q", input.Schema)