// Package feedkit provides domain-agnostic plumbing for "feed processing daemons". // // A feed daemon polls one or more upstream providers (HTTP APIs, RSS, etc.), // converts upstream items into a normalized internal representation, applies // lightweight policy (dedupe/rate-limit/filters), and emits events to one or // more sinks (stdout, files, Postgres, brokers, ...). // // feedkit is intentionally NOT a framework. It supplies small, composable // primitives that concrete daemons wire together in main.go (or via a small // optional Runner helper, see "Future additions"). // // Conceptual pipeline // // Collect → Normalize → Filter/Policy → Persist/Emit → Signal // // In feedkit today, that maps to: // // Collect: sources.Source + scheduler.Scheduler // Normalize: (optional) normalize.Processor (or domain code inside Source.Poll) // Policy: pipeline.Pipeline (Processor chain; dedupe/ratelimit are planned) // Emit: dispatch.Dispatcher + dispatch.Fanout // Sinks: sinks.Sink (+ sinks.Registry to build from config) // Config: config.Load + config.Config validation // // Public packages (API surface) // // - config // YAML configuration types and loader/validator. // // - config.Load(path) (*config.Config, error) // // - config.Config: Sources, Sinks, Routes // // - config.SourceConfig / SinkConfig include Params map[string]any // with convenience helpers like: // // - ParamString / ParamStringDefault // // - ParamBool / ParamBoolDefault // // - ParamInt / ParamIntDefault // // - ParamDuration / ParamDurationDefault // // - ParamStringSlice // // - event // Domain-agnostic event envelope moved through the system. // // - event.Event includes ID, Kind, Source, timestamps, Schema, Payload // // - event.Kind is stringly typed; event.ParseKind normalizes/validates. // // - sources // Extension point for domain-specific polling jobs. // // - sources.Source interface: Name(), Kind(), Poll(ctx) // // - sources.Registry lets daemons register driver factories and build // sources from config.SourceConfig. // // - scheduler // Runs sources on a cadence and publishes emitted events onto a channel. // // - scheduler.Scheduler{Jobs, Out, Logf}.Run(ctx) // // - scheduler.Job: {Source, Every, Jitter} // // - pipeline // Optional processing chain between scheduler and dispatch. // // - pipeline.Pipeline{Processors}.Process(ctx, event) // // - pipeline.Processor can mutate, drop (return nil), or error. // // - dedupe/ratelimit processors are placeholders (planned). // // - normalize // Optional normalization hook for splitting "fetch" from "transform". // // Many domains (like weather) ingest multiple upstream providers whose payloads // differ. A common evolution is to keep sources small and focused on polling, // and move mapping/normalization into a dedicated stage. // // feedkit provides this as an OPTIONAL pipeline processor: // // - normalize.Normalizer: domain-implemented mapping logic // // - normalize.Registry: holds normalizers and selects one by Match() // // - normalize.Processor: adapts Registry into a pipeline.Processor // // Normalization is NOT required: // // - If you do all normalization inside Source.Poll, you can ignore this package. // // - If normalize.Processor is not installed in your pipeline, nothing changes. // // - If normalize.Processor is installed but no Normalizer matches an event, // the event passes through unchanged. // // The key types: // // type Normalizer interface { // // Match returns true if this normalizer should handle the event. // // Matching is intentionally flexible: match on Schema, Kind, Source, // // or any combination. // Match(e event.Event) bool // // // Normalize converts the incoming event into a new (or modified) event. // // // // Return values: // // - (out, nil) where out != nil: emit the normalized event // // - (nil, nil): drop the event (policy drop) // // - (nil, err): fail the pipeline // Normalize(ctx context.Context, in event.Event) (*event.Event, error) // } // // type Registry struct { ... } // // func (r *Registry) Register(n Normalizer) // // // Normalize finds the first matching normalizer (in registration order) and applies it. // // If none match, it returns the input event unchanged. // func (r *Registry) Normalize(ctx context.Context, in event.Event) (*event.Event, error) // // // Processor implements pipeline.Processor and calls into the Registry. // // Optional behavior: // // - If Registry is nil, Processor is a no-op pass-through. // // - If RequireMatch is false (default), non-matching events pass through. // // - If RequireMatch is true, non-matching events are treated as errors. // type Processor struct { // Registry *Registry // RequireMatch bool // } // // "First match wins": // Registry applies the first Normalizer whose Match() returns true. // This is intentional: normalization is usually a single mapping step from a // raw schema into a canonical schema. If you want multiple sequential transforms, // model them as multiple pipeline processors. // // Recommended convention: match by Event.Schema // ------------------------------------------------ // Schema gives you a versionable selector that doesn't depend on source names. // // A common pattern is: // // - sources emit "raw" events with Schema like: // "raw.openweather.current.v1" // "raw.openmeteo.current.v1" // "raw.nws.observation.v1" // // - normalizers transform them into canonical domain schemas like: // "weather.observation.v1" // "weather.forecast.v1" // "weather.alert.v1" // // What is a "raw event"? // ------------------------------------------------ // feedkit does not prescribe the raw payload representation. // A raw payload is typically one of: // // - json.RawMessage (recommended for JSON APIs) // // - []byte (raw bytes) // // - map[string]any (already-decoded but untyped JSON) // // The only hard requirement enforced by feedkit is Event.Validate(): // // - ID, Kind, Source, EmittedAt must be set // // - Payload must be non-nil // // If you use raw events, you still must provide Event.Kind. // Typical approaches: // // - set Kind to the intended canonical kind (e.g. "observation") even before normalization // // - or set Kind to a domain-defined "raw_*" kind and normalize it later // // The simplest approach is: set Kind to the final kind early, and use Schema // to describe the raw-vs-normalized payload shape. // // Wiring example (daemon main.go) // ------------------------------------------------ // Install normalize.Processor at the front of your pipeline: // // normReg := &normalize.Registry{} // // normReg.Register(normalize.Func{ // Name: "openweather current -> weather.observation.v1", // MatchFn: func(e event.Event) bool { // return e.Schema == "raw.openweather.current.v1" // }, // NormalizeFn: func(ctx context.Context, in event.Event) (*event.Event, error) { // // 1) interpret in.Payload (json.RawMessage / []byte / map) // // 2) build canonical domain payload // // 3) return updated event // // out := in // out.Schema = "weather.observation.v1" // // Optionally adjust Kind, EffectiveAt, etc. // out.Payload = /* canonical weather observation struct */ // return &out, nil // }, // }) // // p := &pipeline.Pipeline{ // Processors: []pipeline.Processor{ // normalize.Processor{Registry: normReg}, // optional stage // // dedupe.New(...), ratelimit.New(...), ... // }, // } // // If the event does not match any normalizer, it passes through unmodified. // // - sinks // Extension point for output adapters. // // - sinks.Sink interface: Name(), Consume(ctx, event) // // - sinks.Registry to register driver factories and build sinks from config // // - sinks.RegisterBuiltins registers feedkit-provided sink drivers // (stdout/file/postgres/rabbitmq; some are currently stubs). // // - dispatch // Routes processed events to sinks, and isolates slow sinks via per-sink queues. // // - dispatch.Dispatcher{In, Pipeline, Sinks, Routes, ...}.Run(ctx, logf) // // - dispatch.Fanout: one buffered queue + worker goroutine per sink // // - dispatch.CompileRoutes(*config.Config) compiles cfg.Routes into []dispatch.Route. // If routes: is omitted, it defaults to "all sinks receive all kinds". If a route // omits kinds: (or sets it empty), that route matches all kinds. // // - logging // Shared logger type used across feedkit packages. // // - logging.Logf is a printf-style logger signature. // // Typical wiring (what a daemon does in main.go) // // 1. Load config (domain code may add domain-specific validation). // 2. Register and build sources from config.Sources using sources.Registry. // 3. Register and build sinks from config.Sinks using sinks.Registry. // 4. Compile routes (typically via dispatch.CompileRoutes). // 5. Create an event bus channel. // 6. Start scheduler (sources → bus). // 7. Start dispatcher (bus → pipeline → routes → sinks). // // A sketch: // // cfg, _ := config.Load("config.yml") // // // Build sources (domain registers its drivers). // srcReg := sources.NewRegistry() // // domain: srcReg.Register("openweather_observation", newOpenWeatherSource) // // ... // // var jobs []scheduler.Job // for _, sc := range cfg.Sources { // src, _ := srcReg.Build(sc) // jobs = append(jobs, scheduler.Job{Source: src, Every: sc.Every.Duration}) // } // // // Build sinks (feedkit can register builtins). // sinkReg := sinks.NewRegistry() // sinks.RegisterBuiltins(sinkReg) // builtSinks := map[string]sinks.Sink{} // for _, sk := range cfg.Sinks { // s, _ := sinkReg.Build(sk) // builtSinks[sk.Name] = s // } // // // Compile routes. // routes, _ := dispatch.CompileRoutes(cfg) // // // Event bus. // bus := make(chan event.Event, 256) // // // Optional normalization registry + pipeline. // normReg := &normalize.Registry{} // // domain registers normalizers into normReg... // // p := &pipeline.Pipeline{ // Processors: []pipeline.Processor{ // normalize.Processor{Registry: normReg}, // optional // // dedupe/ratelimit/etc... // }, // } // // // Scheduler. // s := &scheduler.Scheduler{Jobs: jobs, Out: bus, Logf: logf} // // // Dispatcher. // d := &dispatch.Dispatcher{ // In: bus, // Pipeline: p, // Sinks: builtSinks, // Routes: routes, // } // // go s.Run(ctx) // return d.Run(ctx, logf) // // Conventions (recommended, not required) // // - Event.ID should be stable for dedupe/storage (often ":"). // - Event.Kind should be lowercase ("observation", "alert", "article", ...). // - Event.Schema should identify the payload shape/version // (e.g. "weather.observation.v1"). // // # Context and cancellation // // All blocking or I/O work should honor ctx.Done(): // - sources.Source.Poll should pass ctx to HTTP calls, etc. // - sinks.Sink.Consume should honor ctx (Fanout timeouts only help if sinks cooperate). // - normalizers should honor ctx if they do expensive work (rare; usually pure transforms). // // Future additions (likely) // // - A small Runner helper that performs the standard wiring (load config, // build sources/sinks/routes, run scheduler+dispatcher, handle shutdown). // // # Non-goals // // feedkit does not define domain payload schemas, does not enforce domain kinds, // and does not embed domain-specific validation rules. Those live in each // concrete daemon/module (weatherfeeder, newsfeeder, ...). package feedkit