feedkit/doc.go

// Package feedkit provides domain-agnostic plumbing for "feed processing daemons".
//
// A feed daemon polls one or more upstream providers (HTTP APIs, RSS, etc.),
// converts upstream items into a normalized internal representation, applies
// lightweight policy (dedupe/rate-limit/filters), and emits events to one or
// more sinks (stdout, files, Postgres, brokers, ...).
//
// feedkit is intentionally NOT a framework. It supplies small, composable
// primitives that concrete daemons wire together in main.go (or via a small
// optional Runner helper, see "Future additions").
//
// Conceptual pipeline
//
//	Collect → Normalize → Filter/Policy → Persist/Emit → Signal
//
// In feedkit today, that maps to:
//
//	Collect:     sources.Source + scheduler.Scheduler
//	Normalize:   (optional) normalize.Processor (or domain code inside Source.Poll)
//	Policy:      pipeline.Pipeline (Processor chain; dedupe/ratelimit are planned)
//	Emit:        dispatch.Dispatcher + dispatch.Fanout
//	Sinks:       sinks.Sink (+ sinks.Registry to build from config)
//	Config:      config.Load + config.Config validation
//
// Public packages (API surface)
//
//   - config
//     YAML configuration types and loader/validator.
//
//   - config.Load(path) (*config.Config, error)
//
//   - config.Config: Sources, Sinks, Routes
//
//   - config.SourceConfig / SinkConfig include Params map[string]any
//     with convenience helpers like:
//
//   - ParamString / ParamStringDefault
//
//   - ParamBool / ParamBoolDefault
//
//   - ParamInt / ParamIntDefault
//
//   - ParamDuration / ParamDurationDefault
//
//   - ParamStringSlice
//
//   - event
//     Domain-agnostic event envelope moved through the system.
//
//   - event.Event includes ID, Kind, Source, timestamps, Schema, Payload
//
//   - event.Kind is stringly typed; event.ParseKind normalizes/validates.
//
//   - sources
//     Extension point for domain-specific polling jobs.
//
//   - sources.Source interface: Name(), Kind(), Poll(ctx)
//
//   - sources.Registry lets daemons register driver factories and build
//     sources from config.SourceConfig.
//
//   - scheduler
//     Runs sources on a cadence and publishes emitted events onto a channel.
//
//   - scheduler.Scheduler{Jobs, Out, Logf}.Run(ctx)
//
//   - scheduler.Job: {Source, Every, Jitter}
//
//   - pipeline
//     Optional processing chain between scheduler and dispatch.
//
//   - pipeline.Pipeline{Processors}.Process(ctx, event)
//
//   - pipeline.Processor can mutate, drop (return nil), or error.
//
//   - dedupe/ratelimit processors are placeholders (planned).
//
//   - normalize
//     Optional normalization hook for splitting "fetch" from "transform".
//
//     Many domains (like weather) ingest multiple upstream providers whose payloads
//     differ. A common evolution is to keep sources small and focused on polling,
//     and move mapping/normalization into a dedicated stage.
//
//     feedkit provides this as an OPTIONAL pipeline processor:
//
//   - normalize.Normalizer: domain-implemented mapping logic
//
//   - normalize.Registry: holds normalizers and selects one by Match()
//
//   - normalize.Processor: adapts Registry into a pipeline.Processor
//
//     Normalization is NOT required:
//
//   - If you do all normalization inside Source.Poll, you can ignore this package.
//
//   - If normalize.Processor is not installed in your pipeline, nothing changes.
//
//   - If normalize.Processor is installed but no Normalizer matches an event,
//     the event passes through unchanged.
//
//     The key types:
//
//     type Normalizer interface {
//     // Match returns true if this normalizer should handle the event.
//     // Matching is intentionally flexible: match on Schema, Kind, Source,
//     // or any combination.
//     Match(e event.Event) bool
//
//     // Normalize converts the incoming event into a new (or modified) event.
//     //
//     // Return values:
//     //   - (out, nil) where out != nil: emit the normalized event
//     //   - (nil, nil): drop the event (policy drop)
//     //   - (nil, err): fail the pipeline
//     Normalize(ctx context.Context, in event.Event) (*event.Event, error)
//     }
//
//     type Registry struct { ... }
//
//     func (r *Registry) Register(n Normalizer)
//
//     // Normalize finds the first matching normalizer (in registration order) and applies it.
//     // If none match, it returns the input event unchanged.
//     func (r *Registry) Normalize(ctx context.Context, in event.Event) (*event.Event, error)
//
//     // Processor implements pipeline.Processor and calls into the Registry.
//     // Optional behavior:
//     //   - If Registry is nil, Processor is a no-op pass-through.
//     //   - If RequireMatch is false (default), non-matching events pass through.
//     //   - If RequireMatch is true, non-matching events are treated as errors.
//     type Processor struct {
//     Registry     *Registry
//     RequireMatch bool
//     }
//
//     "First match wins":
//     Registry applies the first Normalizer whose Match() returns true.
//     This is intentional: normalization is usually a single mapping step from a
//     raw schema into a canonical schema. If you want multiple sequential transforms,
//     model them as multiple pipeline processors.
//
//     Recommended convention: match by Event.Schema
//     ------------------------------------------------
//     Schema gives you a versionable selector that doesn't depend on source names.
//
//     A common pattern is:
//
//   - sources emit "raw" events with Schema like:
//     "raw.openweather.current.v1"
//     "raw.openmeteo.current.v1"
//     "raw.nws.observation.v1"
//
//   - normalizers transform them into canonical domain schemas like:
//     "weather.observation.v1"
//     "weather.forecast.v1"
//     "weather.alert.v1"
//
//     What is a "raw event"?
//     ------------------------------------------------
//     feedkit does not prescribe the raw payload representation.
//     A raw payload is typically one of:
//
//   - json.RawMessage (recommended for JSON APIs)
//
//   - []byte (raw bytes)
//
//   - map[string]any (already-decoded but untyped JSON)
//
//     The only hard requirement enforced by feedkit is Event.Validate():
//
//   - ID, Kind, Source, EmittedAt must be set
//
//   - Payload must be non-nil
//
//     If you use raw events, you still must provide Event.Kind.
//     Typical approaches:
//
//   - set Kind to the intended canonical kind (e.g. "observation") even before normalization
//
//   - or set Kind to a domain-defined "raw_*" kind and normalize it later
//
//     The simplest approach is: set Kind to the final kind early, and use Schema
//     to describe the raw-vs-normalized payload shape.
//
//     Wiring example (daemon main.go)
//     ------------------------------------------------
//     Install normalize.Processor at the front of your pipeline:
//
//     normReg := &normalize.Registry{}
//
//     normReg.Register(normalize.Func{
//     Name: "openweather current -> weather.observation.v1",
//     MatchFn: func(e event.Event) bool {
//     return e.Schema == "raw.openweather.current.v1"
//     },
//     NormalizeFn: func(ctx context.Context, in event.Event) (*event.Event, error) {
//     // 1) interpret in.Payload (json.RawMessage / []byte / map)
//     // 2) build canonical domain payload
//     // 3) return updated event
//
//     out := in
//     out.Schema = "weather.observation.v1"
//     // Optionally adjust Kind, EffectiveAt, etc.
//     out.Payload = /* canonical weather observation struct */
//     return &out, nil
//     },
//     })
//
//     p := &pipeline.Pipeline{
//     Processors: []pipeline.Processor{
//     normalize.Processor{Registry: normReg}, // optional stage
//     // dedupe.New(...), ratelimit.New(...), ...
//     },
//     }
//
//     If the event does not match any normalizer, it passes through unmodified.
//
//   - sinks
//     Extension point for output adapters.
//
//   - sinks.Sink interface: Name(), Consume(ctx, event)
//
//   - sinks.Registry to register driver factories and build sinks from config
//
//   - sinks.RegisterBuiltins registers feedkit-provided sink drivers
//     (stdout/file/postgres/rabbitmq; some are currently stubs).
//
//   - dispatch
//     Routes processed events to sinks, and isolates slow sinks via per-sink queues.
//
//   - dispatch.Dispatcher{In, Pipeline, Sinks, Routes, ...}.Run(ctx, logf)
//
//   - dispatch.Fanout: one buffered queue + worker goroutine per sink
//
//   - dispatch.CompileRoutes(*config.Config) compiles cfg.Routes into []dispatch.Route.
//     If routes: is omitted, it defaults to "all sinks receive all kinds". If a route
//     omits kinds: (or sets it empty), that route matches all kinds.
//
//   - logging
//     Shared logger type used across feedkit packages.
//
//   - logging.Logf is a printf-style logger signature.
//
// Typical wiring (what a daemon does in main.go)
//
//  1. Load config (domain code may add domain-specific validation).
//  2. Register and build sources from config.Sources using sources.Registry.
//  3. Register and build sinks from config.Sinks using sinks.Registry.
//  4. Compile routes (typically via dispatch.CompileRoutes).
//  5. Create an event bus channel.
//  6. Start scheduler (sources → bus).
//  7. Start dispatcher (bus → pipeline → routes → sinks).
//
// A sketch:
//
//	cfg, _ := config.Load("config.yml")
//
//	// Build sources (domain registers its drivers).
//	srcReg := sources.NewRegistry()
//	// domain: srcReg.Register("openweather_observation", newOpenWeatherSource)
//	// ...
//
//	var jobs []scheduler.Job
//	for _, sc := range cfg.Sources {
//	    src, _ := srcReg.Build(sc)
//	    jobs = append(jobs, scheduler.Job{Source: src, Every: sc.Every.Duration})
//	}
//
//	// Build sinks (feedkit can register builtins).
//	sinkReg := sinks.NewRegistry()
//	sinks.RegisterBuiltins(sinkReg)
//	builtSinks := map[string]sinks.Sink{}
//	for _, sk := range cfg.Sinks {
//	    s, _ := sinkReg.Build(sk)
//	    builtSinks[sk.Name] = s
//	}
//
//	// Compile routes.
//	routes, _ := dispatch.CompileRoutes(cfg)
//
//	// Event bus.
//	bus := make(chan event.Event, 256)
//
//	// Optional normalization registry + pipeline.
//	normReg := &normalize.Registry{}
//	// domain registers normalizers into normReg...
//
//	p := &pipeline.Pipeline{
//	    Processors: []pipeline.Processor{
//	        normalize.Processor{Registry: normReg}, // optional
//	        // dedupe/ratelimit/etc...
//	    },
//	}
//
//	// Scheduler.
//	s := &scheduler.Scheduler{Jobs: jobs, Out: bus, Logf: logf}
//
//	// Dispatcher.
//	d := &dispatch.Dispatcher{
//	    In:       bus,
//	    Pipeline: p,
//	    Sinks:    builtSinks,
//	    Routes:   routes,
//	}
//
//	go s.Run(ctx)
//	return d.Run(ctx, logf)
//
// Conventions (recommended, not required)
//
//   - Event.ID should be stable for dedupe/storage (often "<provider>:<upstream-id>").
//   - Event.Kind should be lowercase ("observation", "alert", "article", ...).
//   - Event.Schema should identify the payload shape/version
//     (e.g. "weather.observation.v1").
//
// # Context and cancellation
//
// All blocking or I/O work should honor ctx.Done():
//   - sources.Source.Poll should pass ctx to HTTP calls, etc.
//   - sinks.Sink.Consume should honor ctx (Fanout timeouts only help if sinks cooperate).
//   - normalizers should honor ctx if they do expensive work (rare; usually pure transforms).
//
// Future additions (likely)
//
//   - A small Runner helper that performs the standard wiring (load config,
//     build sources/sinks/routes, run scheduler+dispatcher, handle shutdown).
//
// # Non-goals
//
// feedkit does not define domain payload schemas, does not enforce domain kinds,
// and does not embed domain-specific validation rules. Those live in each
// concrete daemon/module (weatherfeeder, newsfeeder, ...).
package feedkit