From 2c82f8bf5c8916e2e941e403a60d70b0e4d7c668 Mon Sep 17 00:00:00 2001 From: Eric Rakestraw Date: Fri, 8 May 2026 14:41:47 +0000 Subject: [PATCH] Add trim selector parsing --- internal/trim/selector.go | 156 +++++++++++++++++++++++++++++++++ internal/trim/selector_test.go | 127 +++++++++++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100644 internal/trim/selector.go create mode 100644 internal/trim/selector_test.go diff --git a/internal/trim/selector.go b/internal/trim/selector.go new file mode 100644 index 0000000..cadd183 --- /dev/null +++ b/internal/trim/selector.go @@ -0,0 +1,156 @@ +package trim + +import ( + "fmt" + "regexp" + "sort" + "strconv" + "strings" +) + +var selectorElementPattern = regexp.MustCompile(`^([+-]?\d+)(?:\s*-\s*([+-]?\d+))?$`) + +// Selector represents a normalized union of segment IDs. +type Selector struct { + ranges []idRange +} + +type idRange struct { + start int + end int +} + +// ParseSelector parses an inline segment selector expression. +func ParseSelector(input string) (Selector, error) { + if strings.TrimSpace(input) == "" { + return Selector{}, fmt.Errorf("selector cannot be empty") + } + + parts := strings.Split(input, ",") + ranges := make([]idRange, 0, len(parts)) + for index, raw := range parts { + element := strings.TrimSpace(raw) + if element == "" { + return Selector{}, fmt.Errorf("selector element %d cannot be empty", index+1) + } + + rangeValue, err := parseElement(element) + if err != nil { + return Selector{}, fmt.Errorf("selector element %d %q: %w", index+1, element, err) + } + ranges = append(ranges, rangeValue) + } + + normalized := normalizeRanges(ranges) + if len(normalized) == 0 { + return Selector{}, fmt.Errorf("selector cannot be empty") + } + return Selector{ranges: normalized}, nil +} + +// Contains returns true when id is included by this selector. +func (s Selector) Contains(id int) bool { + if id <= 0 { + return false + } + index := sort.Search(len(s.ranges), func(i int) bool { + return s.ranges[i].end >= id + }) + if index == len(s.ranges) { + return false + } + rangeValue := s.ranges[index] + return id >= rangeValue.start && id <= rangeValue.end +} + +// IDs returns a deterministic ascending list of unique segment IDs. +func (s Selector) IDs() []int { + total := 0 + for _, rangeValue := range s.ranges { + total += rangeValue.end - rangeValue.start + 1 + } + + ids := make([]int, 0, total) + for _, rangeValue := range s.ranges { + for id := rangeValue.start; id <= rangeValue.end; id++ { + ids = append(ids, id) + } + } + return ids +} + +func parseElement(element string) (idRange, error) { + matches := selectorElementPattern.FindStringSubmatch(element) + if matches == nil { + return idRange{}, fmt.Errorf("malformed element") + } + + start, err := parseID(matches[1]) + if err != nil { + return idRange{}, err + } + + if matches[2] == "" { + return idRange{start: start, end: start}, nil + } + + end, err := parseID(matches[2]) + if err != nil { + return idRange{}, fmt.Errorf("invalid range end: %w", err) + } + if start > end { + return idRange{}, fmt.Errorf("descending range %d-%d is invalid", start, end) + } + return idRange{start: start, end: end}, nil +} + +func parseID(value string) (int, error) { + value = strings.TrimSpace(value) + if value == "" { + return 0, fmt.Errorf("missing segment ID") + } + + id, err := strconv.Atoi(value) + if err != nil { + return 0, fmt.Errorf("segment ID must be an integer") + } + if id <= 0 { + return 0, fmt.Errorf("segment ID must be positive") + } + return id, nil +} + +func normalizeRanges(in []idRange) []idRange { + if len(in) == 0 { + return nil + } + + sorted := make([]idRange, len(in)) + copy(sorted, in) + sort.Slice(sorted, func(i, j int) bool { + if sorted[i].start == sorted[j].start { + return sorted[i].end < sorted[j].end + } + return sorted[i].start < sorted[j].start + }) + + merged := make([]idRange, 0, len(sorted)) + for _, next := range sorted { + if len(merged) == 0 { + merged = append(merged, next) + continue + } + + last := &merged[len(merged)-1] + if next.start <= last.end+1 { + if next.end > last.end { + last.end = next.end + } + continue + } + + merged = append(merged, next) + } + + return merged +} diff --git a/internal/trim/selector_test.go b/internal/trim/selector_test.go new file mode 100644 index 0000000..b1ee433 --- /dev/null +++ b/internal/trim/selector_test.go @@ -0,0 +1,127 @@ +package trim + +import ( + "strings" + "testing" +) + +func TestParseSelectorSingleID(t *testing.T) { + selector, err := ParseSelector("1") + if err != nil { + t.Fatalf("parse failed: %v", err) + } + assertIDs(t, selector, []int{1}) + assertContains(t, selector, map[int]bool{1: true, 2: false, 0: false, -1: false}) +} + +func TestParseSelectorInclusiveRange(t *testing.T) { + selector, err := ParseSelector("1-3") + if err != nil { + t.Fatalf("parse failed: %v", err) + } + assertIDs(t, selector, []int{1, 2, 3}) +} + +func TestParseSelectorCommaSeparatedCombination(t *testing.T) { + selector, err := ParseSelector("1-3,8,10-12") + if err != nil { + t.Fatalf("parse failed: %v", err) + } + assertIDs(t, selector, []int{1, 2, 3, 8, 10, 11, 12}) +} + +func TestParseSelectorWhitespaceTolerance(t *testing.T) { + selector, err := ParseSelector(" 1 - 3 , 8 , 10 - 12 ") + if err != nil { + t.Fatalf("parse failed: %v", err) + } + assertIDs(t, selector, []int{1, 2, 3, 8, 10, 11, 12}) +} + +func TestParseSelectorDuplicatesAndOverlapsNormalizeUnion(t *testing.T) { + selector, err := ParseSelector("1-4,2,4,3-6,6") + if err != nil { + t.Fatalf("parse failed: %v", err) + } + assertIDs(t, selector, []int{1, 2, 3, 4, 5, 6}) + assertContains(t, selector, map[int]bool{1: true, 5: true, 6: true, 7: false}) +} + +func TestParseSelectorDeterministicNormalizedOutput(t *testing.T) { + left, err := ParseSelector("8,1-3,2,10-12") + if err != nil { + t.Fatalf("parse left failed: %v", err) + } + right, err := ParseSelector("10-12,3,2,1,8") + if err != nil { + t.Fatalf("parse right failed: %v", err) + } + + leftIDs := left.IDs() + rightIDs := right.IDs() + if !equalInts(leftIDs, rightIDs) { + t.Fatalf("normalized IDs mismatch: %v vs %v", leftIDs, rightIDs) + } +} + +func TestParseSelectorFailures(t *testing.T) { + tests := []struct { + name string + selector string + wantError string + }{ + {name: "empty", selector: "", wantError: "cannot be empty"}, + {name: "whitespace only", selector: " ", wantError: "cannot be empty"}, + {name: "zero", selector: "0", wantError: "must be positive"}, + {name: "negative", selector: "-1", wantError: "must be positive"}, + {name: "range includes zero", selector: "0-2", wantError: "must be positive"}, + {name: "descending range", selector: "10-1", wantError: "descending range"}, + {name: "empty element", selector: "1,,2", wantError: "cannot be empty"}, + {name: "trailing comma", selector: "1,", wantError: "cannot be empty"}, + {name: "malformed alpha", selector: "abc", wantError: "malformed element"}, + {name: "malformed range", selector: "1-2-3", wantError: "malformed element"}, + {name: "missing end", selector: "1-", wantError: "malformed element"}, + {name: "missing start", selector: "-2", wantError: "must be positive"}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + _, err := ParseSelector(test.selector) + if err == nil { + t.Fatalf("expected error for %q", test.selector) + } + if !strings.Contains(err.Error(), test.wantError) { + t.Fatalf("error = %q, want substring %q", err.Error(), test.wantError) + } + }) + } +} + +func assertIDs(t *testing.T, selector Selector, want []int) { + t.Helper() + got := selector.IDs() + if !equalInts(got, want) { + t.Fatalf("IDs = %v, want %v", got, want) + } +} + +func assertContains(t *testing.T, selector Selector, checks map[int]bool) { + t.Helper() + for id, want := range checks { + if got := selector.Contains(id); got != want { + t.Fatalf("Contains(%d) = %t, want %t", id, got, want) + } + } +} + +func equalInts(left []int, right []int) bool { + if len(left) != len(right) { + return false + } + for index := range left { + if left[index] != right[index] { + return false + } + } + return true +}