Added a dedupe processor, and moved processor packages under processors/*
This commit is contained in:
89
processors/dedupe/processor.go
Normal file
89
processors/dedupe/processor.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package dedupe
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/processors"
|
||||
)
|
||||
|
||||
// Processor drops duplicate events by Event.ID using an in-memory LRU.
|
||||
type Processor struct {
|
||||
maxEntries int
|
||||
|
||||
mu sync.Mutex
|
||||
order *list.List // most-recent at front, least-recent at back
|
||||
byID map[string]*list.Element // id -> list element (element.Value is string id)
|
||||
}
|
||||
|
||||
var _ processors.Processor = (*Processor)(nil)
|
||||
|
||||
// NewProcessor constructs a dedupe processor with a required max entry count.
|
||||
func NewProcessor(maxEntries int) (*Processor, error) {
|
||||
if maxEntries <= 0 {
|
||||
return nil, fmt.Errorf("dedupe: maxEntries must be > 0, got %d", maxEntries)
|
||||
}
|
||||
|
||||
return &Processor{
|
||||
maxEntries: maxEntries,
|
||||
order: list.New(),
|
||||
byID: make(map[string]*list.Element, maxEntries),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Factory returns a processors.Factory that constructs Processor instances.
|
||||
func Factory(maxEntries int) processors.Factory {
|
||||
return func() (processors.Processor, error) {
|
||||
return NewProcessor(maxEntries)
|
||||
}
|
||||
}
|
||||
|
||||
// Process implements processors.Processor.
|
||||
func (p *Processor) Process(_ context.Context, in event.Event) (*event.Event, error) {
|
||||
if p == nil {
|
||||
return nil, fmt.Errorf("dedupe: processor is nil")
|
||||
}
|
||||
if p.maxEntries <= 0 {
|
||||
return nil, fmt.Errorf("dedupe: processor maxEntries must be > 0")
|
||||
}
|
||||
|
||||
id := strings.TrimSpace(in.ID)
|
||||
if id == "" {
|
||||
return nil, fmt.Errorf("dedupe: event ID is required")
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
|
||||
if p.order == nil || p.byID == nil {
|
||||
p.mu.Unlock()
|
||||
return nil, fmt.Errorf("dedupe: processor is not initialized")
|
||||
}
|
||||
|
||||
if elem, exists := p.byID[id]; exists {
|
||||
p.order.MoveToFront(elem)
|
||||
p.mu.Unlock()
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
elem := p.order.PushFront(id)
|
||||
p.byID[id] = elem
|
||||
|
||||
if p.order.Len() > p.maxEntries {
|
||||
oldest := p.order.Back()
|
||||
if oldest != nil {
|
||||
p.order.Remove(oldest)
|
||||
if oldestID, ok := oldest.Value.(string); ok {
|
||||
delete(p.byID, oldestID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.mu.Unlock()
|
||||
|
||||
out := in
|
||||
return &out, nil
|
||||
}
|
||||
Reference in New Issue
Block a user