package trim import ( "strings" "testing" "gitea.maximumdirect.net/eric/seriatim/schema" ) func TestApplyKeepModeRenumbersFromOne(t *testing.T) { input := fullTranscriptFixture() selector := mustParseSelector(t, "2,4") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if len(result.Transcript.Segments) != 2 { t.Fatalf("segment count = %d, want 2", len(result.Transcript.Segments)) } assertSegmentIDs(t, result.Transcript.Segments, []int{1, 2}) assertSegmentTexts(t, result.Transcript.Segments, []string{"beta", "delta"}) assertIntMap(t, result.OldToNewID, map[int]int{2: 1, 4: 2}) assertIntSlice(t, result.RemovedIDs, []int{1, 3}) } func TestApplyRemoveModeRenumbersFromOne(t *testing.T) { input := fullTranscriptFixture() selector := mustParseSelector(t, "2,4") result, err := Apply(input, Options{ Mode: ModeRemove, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } assertSegmentIDs(t, result.Transcript.Segments, []int{1, 2}) assertSegmentTexts(t, result.Transcript.Segments, []string{"alpha", "gamma"}) assertIntMap(t, result.OldToNewID, map[int]int{1: 1, 3: 2}) assertIntSlice(t, result.RemovedIDs, []int{2, 4}) } func TestApplySelectorOrderDoesNotChangeTranscriptOrder(t *testing.T) { input := fullTranscriptFixture() selector := mustParseSelector(t, "4,1,3") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } assertSegmentIDs(t, result.Transcript.Segments, []int{1, 2, 3}) assertSegmentTexts(t, result.Transcript.Segments, []string{"alpha", "gamma", "delta"}) } func TestApplyFailsWhenSelectedIDDoesNotExist(t *testing.T) { input := fullTranscriptFixture() selector := mustParseSelector(t, "2,99") _, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err == nil { t.Fatal("expected missing selected ID error") } if !strings.Contains(err.Error(), "does not exist") { t.Fatalf("unexpected error: %v", err) } } func TestApplyFailsOnDuplicateInputIDs(t *testing.T) { input := fullTranscriptFixture() input.Segments[2].ID = 2 selector := mustParseSelector(t, "2") _, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err == nil { t.Fatal("expected duplicate input ID error") } if !strings.Contains(err.Error(), "duplicate segment ID") { t.Fatalf("unexpected error: %v", err) } } func TestApplyFailsOnMissingOrNonSequentialInputIDs(t *testing.T) { input := fullTranscriptFixture() input.Segments[1].ID = 5 selector := mustParseSelector(t, "1") _, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err == nil { t.Fatal("expected non-sequential input ID error") } if !strings.Contains(err.Error(), "must be sequential") { t.Fatalf("unexpected error: %v", err) } } func TestApplyFailsOnNonPositiveInputIDs(t *testing.T) { input := fullTranscriptFixture() input.Segments[0].ID = 0 selector := mustParseSelector(t, "1") _, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err == nil { t.Fatal("expected non-positive input ID error") } if !strings.Contains(err.Error(), "non-positive") { t.Fatalf("unexpected error: %v", err) } } func TestApplyEmptyOutputFailsUnlessAllowEmpty(t *testing.T) { input := fullTranscriptFixture() selector := mustParseSelector(t, "1-4") _, err := Apply(input, Options{ Mode: ModeRemove, Selector: selector, }) if err == nil { t.Fatal("expected empty-output error") } if !strings.Contains(err.Error(), "empty transcript") { t.Fatalf("unexpected error: %v", err) } allowed, err := Apply(input, Options{ Mode: ModeRemove, Selector: selector, AllowEmpty: true, }) if err != nil { t.Fatalf("apply with AllowEmpty failed: %v", err) } if len(allowed.Transcript.Segments) != 0 { t.Fatalf("segment count = %d, want 0", len(allowed.Transcript.Segments)) } assertIntMap(t, allowed.OldToNewID, map[int]int{}) assertIntSlice(t, allowed.RemovedIDs, []int{1, 2, 3, 4}) } func TestApplyPreservesRetainedSegmentFieldsAndClearsOverlapIDs(t *testing.T) { input := fullTranscriptFixture() selector := mustParseSelector(t, "2") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if len(result.Transcript.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(result.Transcript.Segments)) } segment := result.Transcript.Segments[0] if segment.ID != 1 { t.Fatalf("segment ID = %d, want 1", segment.ID) } if segment.Source != "b.json" { t.Fatalf("source = %q, want %q", segment.Source, "b.json") } if segment.SourceSegmentIndex == nil || *segment.SourceSegmentIndex != 20 { t.Fatalf("source_segment_index = %v, want 20", segment.SourceSegmentIndex) } if segment.SourceRef != "b.json#20" { t.Fatalf("source_ref = %q, want %q", segment.SourceRef, "b.json#20") } if !equalStringSlices(segment.DerivedFrom, []string{"b.json#19", "b.json#20"}) { t.Fatalf("derived_from = %v, want %v", segment.DerivedFrom, []string{"b.json#19", "b.json#20"}) } if !equalStringSlices(segment.Categories, []string{"filler", "backchannel"}) { t.Fatalf("categories = %v, want %v", segment.Categories, []string{"filler", "backchannel"}) } if segment.Speaker != "Bob" { t.Fatalf("speaker = %q, want Bob", segment.Speaker) } if segment.Start != 2 || segment.End != 3 { t.Fatalf("times = %.3f-%.3f, want 2.000-3.000", segment.Start, segment.End) } if segment.Text != "beta" { t.Fatalf("text = %q, want beta", segment.Text) } if segment.OverlapGroupID != 0 { t.Fatalf("overlap_group_id = %d, want 0", segment.OverlapGroupID) } if len(result.Transcript.OverlapGroups) != 0 { t.Fatalf("overlap_groups count = %d, want 0", len(result.Transcript.OverlapGroups)) } } func TestApplyFullSchemaRemovesStaleOverlapGroups(t *testing.T) { input := overlapTranscriptFixture() selector := mustParseSelector(t, "1,3") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if len(result.Transcript.OverlapGroups) != 0 { t.Fatalf("overlap_groups count = %d, want 0", len(result.Transcript.OverlapGroups)) } for index, segment := range result.Transcript.Segments { if segment.OverlapGroupID != 0 { t.Fatalf("segment %d overlap_group_id = %d, want 0", index, segment.OverlapGroupID) } } } func TestApplyFullSchemaRecomputesOverlapGroup(t *testing.T) { input := overlapTranscriptFixture() selector := mustParseSelector(t, "1,2") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } assertSegmentIDs(t, result.Transcript.Segments, []int{1, 2}) assertIntSlice(t, []int{ result.Transcript.Segments[0].OverlapGroupID, result.Transcript.Segments[1].OverlapGroupID, }, []int{1, 1}) if len(result.Transcript.OverlapGroups) != 1 { t.Fatalf("overlap_groups count = %d, want 1", len(result.Transcript.OverlapGroups)) } group := result.Transcript.OverlapGroups[0] if group.ID != 1 { t.Fatalf("group ID = %d, want 1", group.ID) } if group.Start != 1 || group.End != 4 { t.Fatalf("group times = %.3f-%.3f, want 1.000-4.000", group.Start, group.End) } if !equalStringSlices(group.Segments, []string{"a.json#10", "b.json#20"}) { t.Fatalf("group segments = %v, want %v", group.Segments, []string{"a.json#10", "b.json#20"}) } if !equalStringSlices(group.Speakers, []string{"Alice", "Bob"}) { t.Fatalf("group speakers = %v, want %v", group.Speakers, []string{"Alice", "Bob"}) } } func TestApplyFullSchemaDropsGroupWhenFewerThanTwoSpeakersRemain(t *testing.T) { input := overlapTranscriptFixture() selector := mustParseSelector(t, "1") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if len(result.Transcript.OverlapGroups) != 0 { t.Fatalf("overlap_groups count = %d, want 0", len(result.Transcript.OverlapGroups)) } if len(result.Transcript.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(result.Transcript.Segments)) } if result.Transcript.Segments[0].OverlapGroupID != 0 { t.Fatalf("segment overlap_group_id = %d, want 0", result.Transcript.Segments[0].OverlapGroupID) } } func TestApplyFullSchemaHandlesTransitiveOverlaps(t *testing.T) { input := transitiveOverlapFixture() selector := mustParseSelector(t, "1-3") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if len(result.Transcript.OverlapGroups) != 1 { t.Fatalf("overlap_groups count = %d, want 1", len(result.Transcript.OverlapGroups)) } assertIntSlice(t, []int{ result.Transcript.Segments[0].OverlapGroupID, result.Transcript.Segments[1].OverlapGroupID, result.Transcript.Segments[2].OverlapGroupID, }, []int{1, 1, 1}) group := result.Transcript.OverlapGroups[0] if group.Start != 10 || group.End != 15 { t.Fatalf("group times = %.3f-%.3f, want 10.000-15.000", group.Start, group.End) } } func TestApplyFullSchemaBoundaryTouchingNotGrouped(t *testing.T) { input := boundaryFixture() selector := mustParseSelector(t, "1-2") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if len(result.Transcript.OverlapGroups) != 0 { t.Fatalf("overlap_groups count = %d, want 0", len(result.Transcript.OverlapGroups)) } assertIntSlice(t, []int{ result.Transcript.Segments[0].OverlapGroupID, result.Transcript.Segments[1].OverlapGroupID, }, []int{0, 0}) } func TestApplyIntermediateDoesNotIncludeOverlapGroups(t *testing.T) { input := schema.IntermediateTranscript{ Metadata: schema.IntermediateMetadata{ Application: "seriatim", Version: "v-test", OutputSchema: "seriatim-intermediate", }, Segments: []schema.IntermediateSegment{ {ID: 1, Start: 1, End: 3, Speaker: "Alice", Text: "alpha", Categories: []string{"word-run"}}, {ID: 2, Start: 2, End: 4, Speaker: "Bob", Text: "beta", Categories: []string{"filler"}}, }, } selector := mustParseSelector(t, "1") result, err := ApplyIntermediate(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply intermediate failed: %v", err) } if len(result.Transcript.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(result.Transcript.Segments)) } if result.Transcript.Segments[0].ID != 1 { t.Fatalf("segment id = %d, want 1", result.Transcript.Segments[0].ID) } if err := schema.ValidateIntermediateTranscript(result.Transcript); err != nil { t.Fatalf("intermediate output should remain valid: %v", err) } } func TestApplyMinimalDoesNotIncludeOverlapGroups(t *testing.T) { input := schema.MinimalTranscript{ Metadata: schema.MinimalMetadata{ Application: "seriatim", Version: "v-test", OutputSchema: "seriatim-minimal", }, Segments: []schema.MinimalSegment{ {ID: 1, Start: 1, End: 3, Speaker: "Alice", Text: "alpha"}, {ID: 2, Start: 2, End: 4, Speaker: "Bob", Text: "beta"}, }, } selector := mustParseSelector(t, "2") result, err := ApplyMinimal(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply minimal failed: %v", err) } if len(result.Transcript.Segments) != 1 { t.Fatalf("segment count = %d, want 1", len(result.Transcript.Segments)) } if result.Transcript.Segments[0].ID != 1 { t.Fatalf("segment id = %d, want 1", result.Transcript.Segments[0].ID) } if err := schema.ValidateMinimalTranscript(result.Transcript); err != nil { t.Fatalf("minimal output should remain valid: %v", err) } } func TestApplyOutputInvariantsValidAfterRenumberAndOverlapRecompute(t *testing.T) { input := overlapTranscriptFixture() selector := mustParseSelector(t, "2,1") result, err := Apply(input, Options{ Mode: ModeKeep, Selector: selector, }) if err != nil { t.Fatalf("apply failed: %v", err) } if err := schema.ValidateTranscript(result.Transcript); err != nil { t.Fatalf("trim output should remain valid: %v", err) } } func mustParseSelector(t *testing.T, value string) Selector { t.Helper() selector, err := ParseSelector(value) if err != nil { t.Fatalf("selector parse failed for %q: %v", value, err) } return selector } func fullTranscriptFixture() schema.Transcript { firstIndex := 10 secondIndex := 20 thirdIndex := 30 fourthIndex := 40 return schema.Transcript{ Metadata: schema.Metadata{ Application: "seriatim", Version: "v-test", InputReader: "json-files", InputFiles: []string{"a.json", "b.json"}, PreprocessingModules: []string{"validate-raw"}, PostprocessingModules: []string{"detect-overlaps"}, OutputModules: []string{"json"}, }, Segments: []schema.Segment{ { ID: 1, Source: "a.json", SourceSegmentIndex: &firstIndex, SourceRef: "a.json#10", DerivedFrom: []string{"a.json#10"}, Speaker: "Alice", Start: 1, End: 2, Text: "alpha", Categories: []string{"word-run"}, OverlapGroupID: 7, }, { ID: 2, Source: "b.json", SourceSegmentIndex: &secondIndex, SourceRef: "b.json#20", DerivedFrom: []string{"b.json#19", "b.json#20"}, Speaker: "Bob", Start: 2, End: 3, Text: "beta", Categories: []string{"filler", "backchannel"}, OverlapGroupID: 7, }, { ID: 3, Source: "c.json", SourceSegmentIndex: &thirdIndex, SourceRef: "c.json#30", DerivedFrom: []string{"c.json#30"}, Speaker: "Carol", Start: 3, End: 4, Text: "gamma", Categories: []string{"normal"}, OverlapGroupID: 8, }, { ID: 4, Source: "d.json", SourceSegmentIndex: &fourthIndex, SourceRef: "d.json#40", DerivedFrom: []string{"d.json#40"}, Speaker: "Dan", Start: 4, End: 5, Text: "delta", Categories: []string{"normal"}, OverlapGroupID: 9, }, }, OverlapGroups: []schema.OverlapGroup{ { ID: 7, Start: 1.5, End: 3.1, Segments: []string{"a.json#10", "b.json#20"}, Speakers: []string{"Alice", "Bob"}, Class: "unknown", Resolution: "unresolved", }, }, } } func overlapTranscriptFixture() schema.Transcript { first := 10 second := 20 third := 30 return schema.Transcript{ Metadata: schema.Metadata{ Application: "seriatim", Version: "v-test", InputReader: "json-files", InputFiles: []string{"a.json", "b.json", "c.json"}, PreprocessingModules: []string{"validate-raw"}, PostprocessingModules: []string{"detect-overlaps"}, OutputModules: []string{"json"}, }, Segments: []schema.Segment{ { ID: 1, Source: "a.json", SourceSegmentIndex: &first, SourceRef: "a.json#10", Speaker: "Alice", Start: 1, End: 4, Text: "a", OverlapGroupID: 99, }, { ID: 2, Source: "b.json", SourceSegmentIndex: &second, SourceRef: "b.json#20", Speaker: "Bob", Start: 2, End: 3, Text: "b", OverlapGroupID: 99, }, { ID: 3, Source: "c.json", SourceSegmentIndex: &third, SourceRef: "c.json#30", Speaker: "Carol", Start: 10, End: 11, Text: "c", OverlapGroupID: 100, }, }, OverlapGroups: []schema.OverlapGroup{ { ID: 99, Start: 0, End: 100, Segments: []string{"stale#1", "stale#2"}, Speakers: []string{"stale"}, Class: "unknown", Resolution: "unresolved", }, }, } } func transitiveOverlapFixture() schema.Transcript { one := 1 two := 2 three := 3 return schema.Transcript{ Metadata: schema.Metadata{ Application: "seriatim", Version: "v-test", }, Segments: []schema.Segment{ {ID: 1, Source: "a.json", SourceSegmentIndex: &one, Speaker: "Alice", Start: 10, End: 14, Text: "a"}, {ID: 2, Source: "b.json", SourceSegmentIndex: &two, Speaker: "Bob", Start: 12, End: 13, Text: "b"}, {ID: 3, Source: "c.json", SourceSegmentIndex: &three, Speaker: "Carol", Start: 13.5, End: 15, Text: "c"}, }, OverlapGroups: []schema.OverlapGroup{{ID: 77}}, } } func boundaryFixture() schema.Transcript { one := 1 two := 2 return schema.Transcript{ Metadata: schema.Metadata{ Application: "seriatim", Version: "v-test", }, Segments: []schema.Segment{ {ID: 1, Source: "a.json", SourceSegmentIndex: &one, Speaker: "Alice", Start: 1, End: 2, Text: "a", OverlapGroupID: 7}, {ID: 2, Source: "b.json", SourceSegmentIndex: &two, Speaker: "Bob", Start: 2, End: 3, Text: "b", OverlapGroupID: 7}, }, OverlapGroups: []schema.OverlapGroup{{ID: 7, Start: 1, End: 3}}, } } func assertSegmentIDs(t *testing.T, segments []schema.Segment, want []int) { t.Helper() got := make([]int, len(segments)) for index, segment := range segments { got[index] = segment.ID } assertIntSlice(t, got, want) } func assertSegmentTexts(t *testing.T, segments []schema.Segment, want []string) { t.Helper() got := make([]string, len(segments)) for index, segment := range segments { got[index] = segment.Text } if !equalStringSlices(got, want) { t.Fatalf("segment texts = %v, want %v", got, want) } } func assertIntSlice(t *testing.T, got []int, want []int) { t.Helper() if len(got) != len(want) { t.Fatalf("slice length = %d, want %d", len(got), len(want)) } for index := range got { if got[index] != want[index] { t.Fatalf("slice[%d] = %d, want %d (full got=%v, want=%v)", index, got[index], want[index], got, want) } } } func assertIntMap(t *testing.T, got map[int]int, want map[int]int) { t.Helper() if len(got) != len(want) { t.Fatalf("map length = %d, want %d", len(got), len(want)) } for key, wantValue := range want { gotValue, exists := got[key] if !exists { t.Fatalf("missing map key %d", key) } if gotValue != wantValue { t.Fatalf("map[%d] = %d, want %d", key, gotValue, wantValue) } } } func equalStringSlices(got []string, want []string) bool { if len(got) != len(want) { return false } for index := range got { if got[index] != want[index] { return false } } return true }