package dedupe import ( "container/list" "context" "fmt" "strings" "sync" "gitea.maximumdirect.net/ejr/feedkit/event" "gitea.maximumdirect.net/ejr/feedkit/processors" ) // Processor drops duplicate events by Event.ID using an in-memory LRU. type Processor struct { maxEntries int mu sync.Mutex order *list.List // most-recent at front, least-recent at back byID map[string]*list.Element // id -> list element (element.Value is string id) } var _ processors.Processor = (*Processor)(nil) // NewProcessor constructs a dedupe processor with a required max entry count. func NewProcessor(maxEntries int) (*Processor, error) { if maxEntries <= 0 { return nil, fmt.Errorf("dedupe: maxEntries must be > 0, got %d", maxEntries) } return &Processor{ maxEntries: maxEntries, order: list.New(), byID: make(map[string]*list.Element, maxEntries), }, nil } // Factory returns a processors.Factory that constructs Processor instances. func Factory(maxEntries int) processors.Factory { return func() (processors.Processor, error) { return NewProcessor(maxEntries) } } // Process implements processors.Processor. func (p *Processor) Process(_ context.Context, in event.Event) (*event.Event, error) { if p == nil { return nil, fmt.Errorf("dedupe: processor is nil") } if p.maxEntries <= 0 { return nil, fmt.Errorf("dedupe: processor maxEntries must be > 0") } id := strings.TrimSpace(in.ID) if id == "" { return nil, fmt.Errorf("dedupe: event ID is required") } p.mu.Lock() if p.order == nil || p.byID == nil { p.mu.Unlock() return nil, fmt.Errorf("dedupe: processor is not initialized") } if elem, exists := p.byID[id]; exists { p.order.MoveToFront(elem) p.mu.Unlock() return nil, nil } elem := p.order.PushFront(id) p.byID[id] = elem if p.order.Len() > p.maxEntries { oldest := p.order.Back() if oldest != nil { p.order.Remove(oldest) if oldestID, ok := oldest.Value.(string); ok { delete(p.byID, oldestID) } } } p.mu.Unlock() out := in return &out, nil }