90 lines
2.0 KiB
Go
90 lines
2.0 KiB
Go
package dedupe
|
|
|
|
import (
|
|
"container/list"
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
|
|
"gitea.maximumdirect.net/ejr/feedkit/event"
|
|
"gitea.maximumdirect.net/ejr/feedkit/processors"
|
|
)
|
|
|
|
// Processor drops duplicate events by Event.ID using an in-memory LRU.
|
|
type Processor struct {
|
|
maxEntries int
|
|
|
|
mu sync.Mutex
|
|
order *list.List // most-recent at front, least-recent at back
|
|
byID map[string]*list.Element // id -> list element (element.Value is string id)
|
|
}
|
|
|
|
var _ processors.Processor = (*Processor)(nil)
|
|
|
|
// NewProcessor constructs a dedupe processor with a required max entry count.
|
|
func NewProcessor(maxEntries int) (*Processor, error) {
|
|
if maxEntries <= 0 {
|
|
return nil, fmt.Errorf("dedupe: maxEntries must be > 0, got %d", maxEntries)
|
|
}
|
|
|
|
return &Processor{
|
|
maxEntries: maxEntries,
|
|
order: list.New(),
|
|
byID: make(map[string]*list.Element, maxEntries),
|
|
}, nil
|
|
}
|
|
|
|
// Factory returns a processors.Factory that constructs Processor instances.
|
|
func Factory(maxEntries int) processors.Factory {
|
|
return func() (processors.Processor, error) {
|
|
return NewProcessor(maxEntries)
|
|
}
|
|
}
|
|
|
|
// Process implements processors.Processor.
|
|
func (p *Processor) Process(_ context.Context, in event.Event) (*event.Event, error) {
|
|
if p == nil {
|
|
return nil, fmt.Errorf("dedupe: processor is nil")
|
|
}
|
|
if p.maxEntries <= 0 {
|
|
return nil, fmt.Errorf("dedupe: processor maxEntries must be > 0")
|
|
}
|
|
|
|
id := strings.TrimSpace(in.ID)
|
|
if id == "" {
|
|
return nil, fmt.Errorf("dedupe: event ID is required")
|
|
}
|
|
|
|
p.mu.Lock()
|
|
|
|
if p.order == nil || p.byID == nil {
|
|
p.mu.Unlock()
|
|
return nil, fmt.Errorf("dedupe: processor is not initialized")
|
|
}
|
|
|
|
if elem, exists := p.byID[id]; exists {
|
|
p.order.MoveToFront(elem)
|
|
p.mu.Unlock()
|
|
return nil, nil
|
|
}
|
|
|
|
elem := p.order.PushFront(id)
|
|
p.byID[id] = elem
|
|
|
|
if p.order.Len() > p.maxEntries {
|
|
oldest := p.order.Back()
|
|
if oldest != nil {
|
|
p.order.Remove(oldest)
|
|
if oldestID, ok := oldest.Value.(string); ok {
|
|
delete(p.byID, oldestID)
|
|
}
|
|
}
|
|
}
|
|
|
|
p.mu.Unlock()
|
|
|
|
out := in
|
|
return &out, nil
|
|
}
|