Add trim report output
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
@@ -12,6 +13,29 @@ import (
|
||||
triminternal "gitea.maximumdirect.net/eric/seriatim/internal/trim"
|
||||
)
|
||||
|
||||
type trimAuditReport struct {
|
||||
Operation string `json:"operation"`
|
||||
InputFile string `json:"input_file"`
|
||||
OutputFile string `json:"output_file"`
|
||||
InputSchema string `json:"input_schema"`
|
||||
OutputSchema string `json:"output_schema"`
|
||||
Mode string `json:"mode"`
|
||||
Selector string `json:"selector"`
|
||||
SelectedIDs []int `json:"selected_ids"`
|
||||
AllowEmpty bool `json:"allow_empty"`
|
||||
InputSegmentCount int `json:"input_segment_count"`
|
||||
RetainedSegmentCount int `json:"retained_segment_count"`
|
||||
RemovedSegmentCount int `json:"removed_segment_count"`
|
||||
RemovedInputIDs []int `json:"removed_input_ids"`
|
||||
OldToNewIDMapping []trimIDMapping `json:"old_to_new_id_mapping"`
|
||||
OverlapGroupsRecomputed bool `json:"overlap_groups_recomputed"`
|
||||
}
|
||||
|
||||
type trimIDMapping struct {
|
||||
OldID int `json:"old_id"`
|
||||
NewID int `json:"new_id"`
|
||||
}
|
||||
|
||||
func newTrimCommand() *cobra.Command {
|
||||
var opts config.TrimOptions
|
||||
|
||||
@@ -43,6 +67,8 @@ func newTrimCommand() *cobra.Command {
|
||||
if err != nil {
|
||||
return fmt.Errorf("--input-file %q: %w", cfg.InputFile, err)
|
||||
}
|
||||
inputSegmentCount := artifact.SegmentCount()
|
||||
inputSchema := artifact.Schema
|
||||
|
||||
mode := triminternal.ModeKeep
|
||||
if cfg.Mode == "remove" {
|
||||
@@ -77,6 +103,28 @@ func newTrimCommand() *cobra.Command {
|
||||
}
|
||||
|
||||
if cfg.ReportFile != "" {
|
||||
audit := trimAuditReport{
|
||||
Operation: "trim",
|
||||
InputFile: cfg.InputFile,
|
||||
OutputFile: cfg.OutputFile,
|
||||
InputSchema: inputSchema,
|
||||
OutputSchema: outputArtifact.Schema,
|
||||
Mode: cfg.Mode,
|
||||
Selector: cfg.Selector,
|
||||
SelectedIDs: selector.IDs(),
|
||||
AllowEmpty: cfg.AllowEmpty,
|
||||
InputSegmentCount: inputSegmentCount,
|
||||
RetainedSegmentCount: len(trimmed.OldToNewID),
|
||||
RemovedSegmentCount: len(trimmed.RemovedIDs),
|
||||
RemovedInputIDs: append([]int(nil), trimmed.RemovedIDs...),
|
||||
OldToNewIDMapping: orderedIDMapping(trimmed.OldToNewID),
|
||||
OverlapGroupsRecomputed: trimmed.OverlapGroupsRecomputed,
|
||||
}
|
||||
auditJSON, err := json.Marshal(audit)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal trim audit report: %w", err)
|
||||
}
|
||||
|
||||
rpt := report.Report{
|
||||
Metadata: report.Metadata{
|
||||
Application: outputArtifact.Application(),
|
||||
@@ -86,7 +134,8 @@ func newTrimCommand() *cobra.Command {
|
||||
OutputModules: []string{"json"},
|
||||
},
|
||||
Events: []report.Event{
|
||||
report.Info("trim", "trim", fmt.Sprintf("mode=%s retained %d segment(s), removed %d segment(s)", cfg.Mode, len(trimmed.OldToNewID), len(trimmed.RemovedIDs))),
|
||||
report.Info("trim", "trim", fmt.Sprintf("trimmed %d input segment(s) into %d output segment(s) with mode=%s", inputSegmentCount, outputArtifact.SegmentCount(), cfg.Mode)),
|
||||
report.Info("trim", "trim-audit", string(auditJSON)),
|
||||
report.Info("trim", "validate-output", fmt.Sprintf("validated %d output segment(s)", outputArtifact.SegmentCount())),
|
||||
report.Info("output", "json", "wrote transcript JSON"),
|
||||
},
|
||||
@@ -123,3 +172,20 @@ func writeOutputJSON(path string, value any) error {
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(value)
|
||||
}
|
||||
|
||||
func orderedIDMapping(mapping map[int]int) []trimIDMapping {
|
||||
keys := make([]int, 0, len(mapping))
|
||||
for oldID := range mapping {
|
||||
keys = append(keys, oldID)
|
||||
}
|
||||
sort.Ints(keys)
|
||||
|
||||
pairs := make([]trimIDMapping, 0, len(keys))
|
||||
for _, oldID := range keys {
|
||||
pairs = append(pairs, trimIDMapping{
|
||||
OldID: oldID,
|
||||
NewID: mapping[oldID],
|
||||
})
|
||||
}
|
||||
return pairs
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/config"
|
||||
"gitea.maximumdirect.net/eric/seriatim/internal/report"
|
||||
"gitea.maximumdirect.net/eric/seriatim/schema"
|
||||
)
|
||||
|
||||
@@ -221,6 +222,125 @@ func TestTrimRejectsNonSeriatimInputArtifacts(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrimReportFileContainsAuditFields(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTrimFullFixture(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "trimmed.json")
|
||||
reportPath := filepath.Join(dir, "trim-report.json")
|
||||
|
||||
err := executeTrim(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--report-file", reportPath,
|
||||
"--remove", "4,2",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("trim failed: %v", err)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
if len(rpt.Events) == 0 {
|
||||
t.Fatal("expected report events")
|
||||
}
|
||||
if !hasReportEvent(rpt, "trim", "trim", "trimmed 4 input segment(s) into 2 output segment(s) with mode=remove") {
|
||||
t.Fatal("expected trim summary event")
|
||||
}
|
||||
if !hasReportEvent(rpt, "trim", "validate-output", "validated 2 output segment(s)") {
|
||||
t.Fatal("expected validation event")
|
||||
}
|
||||
|
||||
audit := extractTrimAuditEvent(t, rpt)
|
||||
if audit.Operation != "trim" {
|
||||
t.Fatalf("operation = %q, want trim", audit.Operation)
|
||||
}
|
||||
if audit.InputFile != input {
|
||||
t.Fatalf("input_file = %q, want %q", audit.InputFile, input)
|
||||
}
|
||||
if audit.OutputFile != output {
|
||||
t.Fatalf("output_file = %q, want %q", audit.OutputFile, output)
|
||||
}
|
||||
if audit.InputSchema != config.OutputSchemaFull || audit.OutputSchema != config.OutputSchemaFull {
|
||||
t.Fatalf("schemas = %q -> %q, want full -> full", audit.InputSchema, audit.OutputSchema)
|
||||
}
|
||||
if audit.Mode != "remove" {
|
||||
t.Fatalf("mode = %q, want remove", audit.Mode)
|
||||
}
|
||||
if audit.Selector != "4,2" {
|
||||
t.Fatalf("selector = %q, want %q", audit.Selector, "4,2")
|
||||
}
|
||||
assertIntSliceEqual(t, audit.SelectedIDs, []int{2, 4})
|
||||
if audit.AllowEmpty {
|
||||
t.Fatal("allow_empty should be false")
|
||||
}
|
||||
if audit.InputSegmentCount != 4 || audit.RetainedSegmentCount != 2 || audit.RemovedSegmentCount != 2 {
|
||||
t.Fatalf("counts = input:%d retained:%d removed:%d, want 4/2/2", audit.InputSegmentCount, audit.RetainedSegmentCount, audit.RemovedSegmentCount)
|
||||
}
|
||||
assertIntSliceEqual(t, audit.RemovedInputIDs, []int{2, 4})
|
||||
if len(audit.OldToNewIDMapping) != 2 {
|
||||
t.Fatalf("mapping length = %d, want 2", len(audit.OldToNewIDMapping))
|
||||
}
|
||||
if audit.OldToNewIDMapping[0].OldID != 1 || audit.OldToNewIDMapping[0].NewID != 1 {
|
||||
t.Fatalf("mapping[0] = %#v, want old_id=1 new_id=1", audit.OldToNewIDMapping[0])
|
||||
}
|
||||
if audit.OldToNewIDMapping[1].OldID != 3 || audit.OldToNewIDMapping[1].NewID != 2 {
|
||||
t.Fatalf("mapping[1] = %#v, want old_id=3 new_id=2", audit.OldToNewIDMapping[1])
|
||||
}
|
||||
if !audit.OverlapGroupsRecomputed {
|
||||
t.Fatal("expected overlap_groups_recomputed=true for full schema trim")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrimReportOldToNewMappingIsDeterministicSorted(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTrimFullFixture(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "trimmed.json")
|
||||
reportPath := filepath.Join(dir, "trim-report.json")
|
||||
|
||||
err := executeTrim(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--report-file", reportPath,
|
||||
"--keep", "4,1,3",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("trim failed: %v", err)
|
||||
}
|
||||
|
||||
var rpt report.Report
|
||||
readJSON(t, reportPath, &rpt)
|
||||
audit := extractTrimAuditEvent(t, rpt)
|
||||
if len(audit.OldToNewIDMapping) != 3 {
|
||||
t.Fatalf("mapping length = %d, want 3", len(audit.OldToNewIDMapping))
|
||||
}
|
||||
for index, expectedOld := range []int{1, 3, 4} {
|
||||
if audit.OldToNewIDMapping[index].OldID != expectedOld {
|
||||
t.Fatalf("mapping[%d].old_id = %d, want %d", index, audit.OldToNewIDMapping[index].OldID, expectedOld)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrimNoReportFileWhenOmitted(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := writeTrimFullFixture(t, dir, "input.json")
|
||||
output := filepath.Join(dir, "trimmed.json")
|
||||
reportPath := filepath.Join(dir, "trim-report.json")
|
||||
|
||||
err := executeTrim(
|
||||
"--input-file", input,
|
||||
"--output-file", output,
|
||||
"--keep", "1",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("trim failed: %v", err)
|
||||
}
|
||||
|
||||
_, statErr := os.Stat(reportPath)
|
||||
if !os.IsNotExist(statErr) {
|
||||
t.Fatalf("expected no report file at %q, got err=%v", reportPath, statErr)
|
||||
}
|
||||
}
|
||||
|
||||
func executeTrim(args ...string) error {
|
||||
cmd := NewRootCommand()
|
||||
cmd.SetArgs(append([]string{"trim"}, args...))
|
||||
@@ -299,3 +419,31 @@ func assertSequentialIDs(t *testing.T, ids []int) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func extractTrimAuditEvent(t *testing.T, rpt report.Report) trimAuditReport {
|
||||
t.Helper()
|
||||
|
||||
for _, event := range rpt.Events {
|
||||
if event.Stage == "trim" && event.Module == "trim-audit" {
|
||||
var audit trimAuditReport
|
||||
if err := json.Unmarshal([]byte(event.Message), &audit); err != nil {
|
||||
t.Fatalf("decode trim audit event: %v", err)
|
||||
}
|
||||
return audit
|
||||
}
|
||||
}
|
||||
t.Fatal("missing trim-audit event")
|
||||
return trimAuditReport{}
|
||||
}
|
||||
|
||||
func assertIntSliceEqual(t *testing.T, got []int, want []int) {
|
||||
t.Helper()
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("slice length = %d, want %d", len(got), len(want))
|
||||
}
|
||||
for index := range got {
|
||||
if got[index] != want[index] {
|
||||
t.Fatalf("slice[%d] = %d, want %d (full got=%v, want=%v)", index, got[index], want[index], got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user