Refactored the scheduler and source interfaces to accommondate both polling (e.g., HTTP) sources and streaming (e.g., message queue) sources.

Updated the README to reflect recent updates to default sinks.
Implemented a builtin NATS sink.
2026-02-08 15:03:46 -06:00 · 2026-02-07 19:45:26 -06:00 · 2026-02-07 11:35:55 -06:00 · 2026-01-15 19:08:28 -06:00 · 2026-01-15 18:26:45 -06:00 · 2026-01-13 18:23:43 -06:00
19 changed files with 845 additions and 93 deletions
--- a/README.md
+++ b/README.md
@@ -180,9 +180,9 @@ Registry-based construction allows daemons to opt into any sink drivers.

 Sink	Status
 stdout	🟢 Implemented
+nats    🟢 Implemented
 file	🔴 Stub
 postgres	🔴 Stub
-rabbitmq	🔴 Stub

 All sinks are required to respect context cancellation.

--- a/config/config.go
+++ b/config/config.go
@@ -54,7 +54,10 @@ type RouteConfig struct {
 	Sink string `yaml:"sink"` // sink name

 	// Kinds is domain-defined. feedkit only enforces that each entry is non-empty.
-	// Whether a given daemon "recognizes" a kind is domain-specific validation.
+	//
+	// If Kinds is omitted or empty, the route matches ALL kinds.
+	// This is useful when you want explicit per-sink routing rules even when a
+	// particular sink should receive everything.
 	Kinds []string `yaml:"kinds"`
 }

--- a/config/load.go
+++ b/config/load.go
@@ -83,9 +83,9 @@ func (c *Config) Validate() error {
 				m.Add(fieldErr(path+".driver", "is required (e.g. openmeteo_observation, rss_feed, ...)"))
 			}

-			// Every
-			if s.Every.Duration <= 0 {
-				m.Add(fieldErr(path+".every", "must be a positive duration (e.g. 15m, 1m, 30s)"))
+			// Every (optional but if present must be >=0)
+			if s.Every.Duration < 0 {
+				m.Add(fieldErr(path+".every", "is optional, but must be a positive duration (e.g. 15m, 1m, 30s) if provided"))
 			}

 			// Kind (optional but if present must be non-empty after trimming)
@@ -133,11 +133,8 @@ func (c *Config) Validate() error {
 			m.Add(fieldErr(path+".sink", fmt.Sprintf("references unknown sink %q (define it under sinks:)", r.Sink)))
 		}

-		if len(r.Kinds) == 0 {
-			// You could relax this later (e.g. empty == "all kinds"), but for now
-			// keeping it strict prevents accidental "route does nothing".
-			m.Add(fieldErr(path+".kinds", "must contain at least one kind"))
-		} else {
+		// Kinds is optional. If omitted or empty, the route matches ALL kinds.
+		// If provided, each entry must be non-empty.
 		for j, k := range r.Kinds {
 			kpath := fmt.Sprintf("%s.kinds[%d]", path, j)
 			if strings.TrimSpace(k) == "" {
@@ -145,7 +142,6 @@ func (c *Config) Validate() error {
 			}
 		}
 	}
-	}

 	return m.Err()
 }
--- a/config/validate_test.go
+++ b/config/validate_test.go
@@ -0,0 +1,48 @@
+package config
+
+import (
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestValidate_RouteKindsEmptyIsAllowed(t *testing.T) {
+	cfg := &Config{
+		Sources: []SourceConfig{
+			{Name: "src1", Driver: "driver1", Every: Duration{Duration: time.Minute}},
+		},
+		Sinks: []SinkConfig{
+			{Name: "sink1", Driver: "stdout"},
+		},
+		Routes: []RouteConfig{
+			{Sink: "sink1", Kinds: nil},        // omitted
+			{Sink: "sink1", Kinds: []string{}}, // explicit empty
+		},
+	}
+
+	if err := cfg.Validate(); err != nil {
+		t.Fatalf("expected no error, got: %v", err)
+	}
+}
+
+func TestValidate_RouteKindsRejectsBlankEntries(t *testing.T) {
+	cfg := &Config{
+		Sources: []SourceConfig{
+			{Name: "src1", Driver: "driver1", Every: Duration{Duration: time.Minute}},
+		},
+		Sinks: []SinkConfig{
+			{Name: "sink1", Driver: "stdout"},
+		},
+		Routes: []RouteConfig{
+			{Sink: "sink1", Kinds: []string{"observation", "   ", "alert"}},
+		},
+	}
+
+	err := cfg.Validate()
+	if err == nil {
+		t.Fatalf("expected error, got nil")
+	}
+	if !strings.Contains(err.Error(), "routes[0].kinds[1]") {
+		t.Fatalf("expected error to mention blank kind entry, got: %v", err)
+	}
+}
--- a/dispatch/routes.go
+++ b/dispatch/routes.go
@@ -13,11 +13,13 @@ import (
 // Behavior:
 //   - If cfg.Routes is empty, we default to "all sinks receive all kinds".
 //     (Implemented as one Route per sink with Kinds == nil.)
+//   - If a specific route's kinds: is omitted or empty, that route matches ALL kinds.
+//     (Also compiled as Kinds == nil.)
 //   - Kind strings are normalized via event.ParseKind (lowercase + trim).
 //
-// Note: config.Validate() already ensures route.sink references a known sink and
-// route.kinds are non-empty strings. We re-check a few invariants here anyway so
-// CompileRoutes is safe to call even if a daemon chooses not to call Validate().
+// Note: config.Validate() ensures route.sink references a known sink and rejects
+// blank kind entries. We re-check a few invariants here anyway so CompileRoutes
+// is safe to call even if a daemon chooses not to call Validate().
 func CompileRoutes(cfg *config.Config) ([]Route, error) {
 	if cfg == nil {
 		return nil, fmt.Errorf("dispatch.CompileRoutes: cfg is nil")
@@ -27,14 +29,13 @@ func CompileRoutes(cfg *config.Config) ([]Route, error) {
 		return nil, fmt.Errorf("dispatch.CompileRoutes: cfg has no sinks")
 	}

-	// Build a quick lookup of sink names.
+	// Build a quick lookup of sink names (exact match; no normalization).
 	sinkNames := make(map[string]bool, len(cfg.Sinks))
 	for i, s := range cfg.Sinks {
-		name := strings.TrimSpace(s.Name)
-		if name == "" {
+		if strings.TrimSpace(s.Name) == "" {
 			return nil, fmt.Errorf("dispatch.CompileRoutes: sinks[%d].name is empty", i)
 		}
-		sinkNames[name] = true
+		sinkNames[s.Name] = true
 	}

 	// Default routing: everything to every sink.
@@ -52,16 +53,21 @@ func CompileRoutes(cfg *config.Config) ([]Route, error) {
 	out := make([]Route, 0, len(cfg.Routes))

 	for i, r := range cfg.Routes {
-		sink := strings.TrimSpace(r.Sink)
-		if sink == "" {
+		sink := r.Sink
+		if strings.TrimSpace(sink) == "" {
 			return nil, fmt.Errorf("dispatch.CompileRoutes: routes[%d].sink is required", i)
 		}
 		if !sinkNames[sink] {
 			return nil, fmt.Errorf("dispatch.CompileRoutes: routes[%d].sink references unknown sink %q", i, sink)
 		}

+		// If kinds is omitted/empty, this route matches all kinds.
 		if len(r.Kinds) == 0 {
-			return nil, fmt.Errorf("dispatch.CompileRoutes: routes[%d].kinds must contain at least one kind", i)
+			out = append(out, Route{
+				SinkName: sink,
+				Kinds:    nil,
+			})
+			continue
 		}

 		kinds := make(map[event.Kind]bool, len(r.Kinds))
--- a/dispatch/routes_test.go
+++ b/dispatch/routes_test.go
@@ -0,0 +1,67 @@
+package dispatch
+
+import (
+	"testing"
+
+	"gitea.maximumdirect.net/ejr/feedkit/config"
+)
+
+func TestCompileRoutes_DefaultIsAllSinksAllKinds(t *testing.T) {
+	cfg := &config.Config{
+		Sinks: []config.SinkConfig{
+			{Name: "a", Driver: "stdout"},
+			{Name: "b", Driver: "stdout"},
+		},
+		// Routes omitted => default
+	}
+
+	routes, err := CompileRoutes(cfg)
+	if err != nil {
+		t.Fatalf("CompileRoutes error: %v", err)
+	}
+	if len(routes) != 2 {
+		t.Fatalf("expected 2 routes, got %d", len(routes))
+	}
+
+	// Order should match cfg.Sinks order (deterministic).
+	if routes[0].SinkName != "a" || routes[1].SinkName != "b" {
+		t.Fatalf("unexpected route order: %+v", routes)
+	}
+
+	for _, r := range routes {
+		if len(r.Kinds) != 0 {
+			t.Fatalf("expected nil/empty kinds for default routes, got: %+v", r.Kinds)
+		}
+	}
+}
+
+func TestCompileRoutes_EmptyKindsMeansAllKinds(t *testing.T) {
+	cfg := &config.Config{
+		Sinks: []config.SinkConfig{
+			{Name: "sink1", Driver: "stdout"},
+		},
+		Routes: []config.RouteConfig{
+			{Sink: "sink1"},                    // omitted kinds
+			{Sink: "sink1", Kinds: nil},        // explicit nil
+			{Sink: "sink1", Kinds: []string{}}, // explicit empty
+		},
+	}
+
+	routes, err := CompileRoutes(cfg)
+	if err != nil {
+		t.Fatalf("CompileRoutes error: %v", err)
+	}
+
+	if len(routes) != 3 {
+		t.Fatalf("expected 3 routes, got %d", len(routes))
+	}
+
+	for i, r := range routes {
+		if r.SinkName != "sink1" {
+			t.Fatalf("route[%d] unexpected sink: %q", i, r.SinkName)
+		}
+		if len(r.Kinds) != 0 {
+			t.Fatalf("route[%d] expected nil/empty kinds (match all), got: %+v", i, r.Kinds)
+		}
+	}
+}
--- a/doc.go
+++ b/doc.go
@@ -16,8 +16,7 @@
 // In feedkit today, that maps to:
 //
 //	Collect:     sources.Source + scheduler.Scheduler
-//	Normalize:   (today: domain code typically does this inside Source.Poll;
-//	            future: a normalization Processor is a good fit)
+//	Normalize:   (optional) normalize.Processor (or domain code inside Source.Poll)
 //	Policy:      pipeline.Pipeline (Processor chain; dedupe/ratelimit are planned)
 //	Emit:        dispatch.Dispatcher + dispatch.Fanout
 //	Sinks:       sinks.Sink (+ sinks.Registry to build from config)
@@ -76,6 +75,147 @@
 //
 //   - dedupe/ratelimit processors are placeholders (planned).
 //
+//   - normalize
+//     Optional normalization hook for splitting "fetch" from "transform".
+//
+//     Many domains (like weather) ingest multiple upstream providers whose payloads
+//     differ. A common evolution is to keep sources small and focused on polling,
+//     and move mapping/normalization into a dedicated stage.
+//
+//     feedkit provides this as an OPTIONAL pipeline processor:
+//
+//   - normalize.Normalizer: domain-implemented mapping logic
+//
+//   - normalize.Registry: holds normalizers and selects one by Match()
+//
+//   - normalize.Processor: adapts Registry into a pipeline.Processor
+//
+//     Normalization is NOT required:
+//
+//   - If you do all normalization inside Source.Poll, you can ignore this package.
+//
+//   - If normalize.Processor is not installed in your pipeline, nothing changes.
+//
+//   - If normalize.Processor is installed but no Normalizer matches an event,
+//     the event passes through unchanged.
+//
+//     The key types:
+//
+//     type Normalizer interface {
+//     // Match returns true if this normalizer should handle the event.
+//     // Matching is intentionally flexible: match on Schema, Kind, Source,
+//     // or any combination.
+//     Match(e event.Event) bool
+//
+//     // Normalize converts the incoming event into a new (or modified) event.
+//     //
+//     // Return values:
+//     //   - (out, nil) where out != nil: emit the normalized event
+//     //   - (nil, nil): drop the event (policy drop)
+//     //   - (nil, err): fail the pipeline
+//     Normalize(ctx context.Context, in event.Event) (*event.Event, error)
+//     }
+//
+//     type Registry struct { ... }
+//
+//     func (r *Registry) Register(n Normalizer)
+//
+//     // Normalize finds the first matching normalizer (in registration order) and applies it.
+//     // If none match, it returns the input event unchanged.
+//     func (r *Registry) Normalize(ctx context.Context, in event.Event) (*event.Event, error)
+//
+//     // Processor implements pipeline.Processor and calls into the Registry.
+//     // Optional behavior:
+//     //   - If Registry is nil, Processor is a no-op pass-through.
+//     //   - If RequireMatch is false (default), non-matching events pass through.
+//     //   - If RequireMatch is true, non-matching events are treated as errors.
+//     type Processor struct {
+//     Registry     *Registry
+//     RequireMatch bool
+//     }
+//
+//     "First match wins":
+//     Registry applies the first Normalizer whose Match() returns true.
+//     This is intentional: normalization is usually a single mapping step from a
+//     raw schema into a canonical schema. If you want multiple sequential transforms,
+//     model them as multiple pipeline processors.
+//
+//     Recommended convention: match by Event.Schema
+//     ------------------------------------------------
+//     Schema gives you a versionable selector that doesn't depend on source names.
+//
+//     A common pattern is:
+//
+//   - sources emit "raw" events with Schema like:
+//     "raw.openweather.current.v1"
+//     "raw.openmeteo.current.v1"
+//     "raw.nws.observation.v1"
+//
+//   - normalizers transform them into canonical domain schemas like:
+//     "weather.observation.v1"
+//     "weather.forecast.v1"
+//     "weather.alert.v1"
+//
+//     What is a "raw event"?
+//     ------------------------------------------------
+//     feedkit does not prescribe the raw payload representation.
+//     A raw payload is typically one of:
+//
+//   - json.RawMessage (recommended for JSON APIs)
+//
+//   - []byte (raw bytes)
+//
+//   - map[string]any (already-decoded but untyped JSON)
+//
+//     The only hard requirement enforced by feedkit is Event.Validate():
+//
+//   - ID, Kind, Source, EmittedAt must be set
+//
+//   - Payload must be non-nil
+//
+//     If you use raw events, you still must provide Event.Kind.
+//     Typical approaches:
+//
+//   - set Kind to the intended canonical kind (e.g. "observation") even before normalization
+//
+//   - or set Kind to a domain-defined "raw_*" kind and normalize it later
+//
+//     The simplest approach is: set Kind to the final kind early, and use Schema
+//     to describe the raw-vs-normalized payload shape.
+//
+//     Wiring example (daemon main.go)
+//     ------------------------------------------------
+//     Install normalize.Processor at the front of your pipeline:
+//
+//     normReg := &normalize.Registry{}
+//
+//     normReg.Register(normalize.Func{
+//     Name: "openweather current -> weather.observation.v1",
+//     MatchFn: func(e event.Event) bool {
+//     return e.Schema == "raw.openweather.current.v1"
+//     },
+//     NormalizeFn: func(ctx context.Context, in event.Event) (*event.Event, error) {
+//     // 1) interpret in.Payload (json.RawMessage / []byte / map)
+//     // 2) build canonical domain payload
+//     // 3) return updated event
+//
+//     out := in
+//     out.Schema = "weather.observation.v1"
+//     // Optionally adjust Kind, EffectiveAt, etc.
+//     out.Payload = /* canonical weather observation struct */
+//     return &out, nil
+//     },
+//     })
+//
+//     p := &pipeline.Pipeline{
+//     Processors: []pipeline.Processor{
+//     normalize.Processor{Registry: normReg}, // optional stage
+//     // dedupe.New(...), ratelimit.New(...), ...
+//     },
+//     }
+//
+//     If the event does not match any normalizer, it passes through unmodified.
+//
 //   - sinks
 //     Extension point for output adapters.
 //
@@ -94,7 +234,8 @@
 //   - dispatch.Fanout: one buffered queue + worker goroutine per sink
 //
 //   - dispatch.CompileRoutes(*config.Config) compiles cfg.Routes into []dispatch.Route.
-//     If routes: is omitted, it defaults to "all sinks receive all kinds".
+//     If routes: is omitted, it defaults to "all sinks receive all kinds". If a route
+//     omits kinds: (or sets it empty), that route matches all kinds.
 //
 //   - logging
 //     Shared logger type used across feedkit packages.
@@ -141,13 +282,24 @@
 //	// Event bus.
 //	bus := make(chan event.Event, 256)
 //
+//	// Optional normalization registry + pipeline.
+//	normReg := &normalize.Registry{}
+//	// domain registers normalizers into normReg...
+//
+//	p := &pipeline.Pipeline{
+//	    Processors: []pipeline.Processor{
+//	        normalize.Processor{Registry: normReg}, // optional
+//	        // dedupe/ratelimit/etc...
+//	    },
+//	}
+//
 //	// Scheduler.
 //	s := &scheduler.Scheduler{Jobs: jobs, Out: bus, Logf: logf}
 //
 //	// Dispatcher.
 //	d := &dispatch.Dispatcher{
 //	    In:       bus,
-//	    Pipeline: &pipeline.Pipeline{Processors: nil},
+//	    Pipeline: p,
 //	    Sinks:    builtSinks,
 //	    Routes:   routes,
 //	}
@@ -167,13 +319,12 @@
 // All blocking or I/O work should honor ctx.Done():
 //   - sources.Source.Poll should pass ctx to HTTP calls, etc.
 //   - sinks.Sink.Consume should honor ctx (Fanout timeouts only help if sinks cooperate).
+//   - normalizers should honor ctx if they do expensive work (rare; usually pure transforms).
 //
 // Future additions (likely)
 //
 //   - A small Runner helper that performs the standard wiring (load config,
 //     build sources/sinks/routes, run scheduler+dispatcher, handle shutdown).
-//   - A normalization hook (a Pipeline Processor + registry) that allows sources
-//     to emit "raw" payloads and defer normalization to a dedicated stage.
 //
 // # Non-goals
 //
--- a/go.mod
+++ b/go.mod
@@ -2,4 +2,15 @@ module gitea.maximumdirect.net/ejr/feedkit

 go 1.22

-require gopkg.in/yaml.v3 v3.0.1
+require (
+	github.com/nats-io/nats.go v1.34.0
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/klauspost/compress v1.17.2 // indirect
+	github.com/nats-io/nkeys v0.4.7 // indirect
+	github.com/nats-io/nuid v1.0.1 // indirect
+	golang.org/x/crypto v0.18.0 // indirect
+	golang.org/x/sys v0.16.0 // indirect
+)
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,16 @@
+github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
+github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/nats-io/nats.go v1.34.0 h1:fnxnPCNiwIG5w08rlMcEKTUw4AV/nKyGCOJE8TdhSPk=
+github.com/nats-io/nats.go v1.34.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
+github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
+github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
+github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
+github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
+golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
+golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
+golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
+golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/normalize/doc.go
+++ b/normalize/doc.go
@@ -0,0 +1,17 @@
+// Package normalize provides an OPTIONAL normalization hook for feedkit pipelines.
+//
+// Motivation:
+// Many daemons have sources that:
+//  1. fetch raw upstream data (often JSON), and
+//  2. transform it into a domain's normalized payload format.
+//
+// Doing both steps inside Source.Poll works, but tends to make sources large and
+// encourages duplication (unit conversions, common mapping helpers, etc.).
+//
+// This package lets a source emit a "raw" event (e.g., Schema="raw.openweather.current.v1",
+// Payload=json.RawMessage), and then a normalization processor can convert it into a
+// normalized event (e.g., Schema="weather.observation.v1", Payload=WeatherObservation{}).
+//
+// Key property: normalization is optional.
+// If no registered Normalizer matches an event, it passes through unchanged.
+package normalize
--- a/normalize/normalize.go
+++ b/normalize/normalize.go
@@ -0,0 +1,76 @@
+package normalize
+
+import (
+	"context"
+	"fmt"
+
+	"gitea.maximumdirect.net/ejr/feedkit/event"
+)
+
+// Normalizer converts one event shape into another.
+//
+// A Normalizer is typically domain-owned code (weatherfeeder/newsfeeder/...)
+// that knows how to interpret a specific upstream payload and produce a
+// normalized payload.
+//
+// Normalizers are selected via Match(). The matching strategy is intentionally
+// flexible: implementations may match on Schema, Kind, Source, or any other
+// Event fields.
+type Normalizer interface {
+	// Match reports whether this normalizer applies to the given event.
+	//
+	// Common patterns:
+	//   - match on e.Schema (recommended for versioning)
+	//   - match on e.Source (useful if Schema is empty)
+	//   - match on (e.Kind + e.Source), etc.
+	Match(e event.Event) bool
+
+	// Normalize transforms the incoming event into a new (or modified) event.
+	//
+	// Return values:
+	//   - (out, nil) where out != nil: emit the normalized event
+	//   - (nil, nil): drop the event (treat as policy drop)
+	//   - (nil, err): fail the pipeline
+	//
+	// Note: If you simply want to pass the event through unchanged, return &in.
+	Normalize(ctx context.Context, in event.Event) (*event.Event, error)
+}
+
+// Func is an ergonomic adapter that lets you define a Normalizer with functions.
+//
+// Example:
+//
+//	n := normalize.Func{
+//	  MatchFn: func(e event.Event) bool { return e.Schema == "raw.openweather.current.v1" },
+//	  NormalizeFn: func(ctx context.Context, in event.Event) (*event.Event, error) {
+//	    // ... map in.Payload -> normalized payload ...
+//	  },
+//	}
+type Func struct {
+	MatchFn     func(e event.Event) bool
+	NormalizeFn func(ctx context.Context, in event.Event) (*event.Event, error)
+
+	// Optional: helps produce nicer panic/error messages if something goes wrong.
+	Name string
+}
+
+func (f Func) Match(e event.Event) bool {
+	if f.MatchFn == nil {
+		return false
+	}
+	return f.MatchFn(e)
+}
+
+func (f Func) Normalize(ctx context.Context, in event.Event) (*event.Event, error) {
+	if f.NormalizeFn == nil {
+		return nil, fmt.Errorf("normalize.Func(%s): NormalizeFn is nil", f.safeName())
+	}
+	return f.NormalizeFn(ctx, in)
+}
+
+func (f Func) safeName() string {
+	if f.Name == "" {
+		return "<unnamed>"
+	}
+	return f.Name
+}
--- a/normalize/registry.go
+++ b/normalize/registry.go
@@ -0,0 +1,140 @@
+package normalize
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"gitea.maximumdirect.net/ejr/feedkit/event"
+)
+
+// Registry holds a set of Normalizers and selects one for a given event.
+//
+// Selection rule (simple + predictable):
+//   - iterate in registration order
+//   - the FIRST Normalizer whose Match(e) returns true is used
+//
+// If none match, the event passes through unchanged.
+//
+// Why "first match wins"?
+// Normalization is usually a single mapping step from a raw schema/version into
+// a normalized schema/version. If you want multiple transformation steps,
+// model them as multiple pipeline processors (which feedkit already supports).
+type Registry struct {
+	mu sync.RWMutex
+	ns []Normalizer
+}
+
+// Register adds a normalizer to the registry.
+//
+// Register panics if n is nil; this is a programmer error and should fail fast.
+func (r *Registry) Register(n Normalizer) {
+	if n == nil {
+		panic("normalize.Registry.Register: normalizer cannot be nil")
+	}
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.ns = append(r.ns, n)
+}
+
+// Normalize finds the first matching Normalizer and applies it.
+//
+// If no normalizer matches, it returns the input event unchanged.
+//
+// If a normalizer returns (nil, nil), the event is dropped.
+func (r *Registry) Normalize(ctx context.Context, in event.Event) (*event.Event, error) {
+	if r == nil {
+		// Nil registry is a valid "feature off" state.
+		out := in
+		return &out, nil
+	}
+
+	r.mu.RLock()
+	ns := append([]Normalizer(nil), r.ns...) // copy for safe iteration outside lock
+	r.mu.RUnlock()
+
+	for _, n := range ns {
+		if n == nil {
+			// Shouldn't happen (Register panics), but guard anyway.
+			continue
+		}
+		if !n.Match(in) {
+			continue
+		}
+
+		out, err := n.Normalize(ctx, in)
+		if err != nil {
+			return nil, fmt.Errorf("normalize: normalizer failed: %w", err)
+		}
+		// out may be nil to signal "drop".
+		return out, nil
+	}
+
+	// No match: pass through unchanged.
+	out := in
+	return &out, nil
+}
+
+// Processor adapts a Registry into a pipeline Processor.
+//
+// It implements:
+//
+//	Process(ctx context.Context, in event.Event) (*event.Event, error)
+//
+// which matches feedkit/pipeline.Processor.
+//
+// Optionality:
+//   - If Registry is nil, Processor becomes a no-op pass-through.
+//   - If Registry has no matching normalizer for an event, that event passes through unchanged.
+type Processor struct {
+	Registry *Registry
+
+	// If true, events that do not match any normalizer cause an error.
+	// Default is false (pass-through), which is the behavior you asked for.
+	RequireMatch bool
+}
+
+// Process implements the pipeline.Processor interface.
+func (p Processor) Process(ctx context.Context, in event.Event) (*event.Event, error) {
+	// "Feature off": no registry means no normalization.
+	if p.Registry == nil {
+		out := in
+		return &out, nil
+	}
+
+	out, err := p.Registry.Normalize(ctx, in)
+	if err != nil {
+		return nil, err
+	}
+
+	if out == nil {
+		// Dropped by normalization policy.
+		return nil, nil
+	}
+
+	if p.RequireMatch {
+		// Detect "no-op pass-through due to no match" by checking whether a match existed.
+		// We do this with a cheap second pass to avoid changing Normalize()'s signature.
+		// (This is rare to enable; correctness/clarity > micro-optimization.)
+		if !p.Registry.hasMatch(in) {
+			return nil, fmt.Errorf("normalize: no normalizer matched event (id=%s kind=%s source=%s schema=%q)",
+				in.ID, in.Kind, in.Source, in.Schema)
+		}
+	}
+
+	return out, nil
+}
+
+func (r *Registry) hasMatch(in event.Event) bool {
+	if r == nil {
+		return false
+	}
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	for _, n := range r.ns {
+		if n != nil && n.Match(in) {
+			return true
+		}
+	}
+	return false
+}
--- a/scheduler/scheduler.go
+++ b/scheduler/scheduler.go
@@ -17,15 +17,27 @@ import (
 // one function everywhere without type mismatch friction.
 type Logger = logging.Logf

+// Job describes one scheduler task.
+//
+// A Job may be backed by either:
+//   - a polling source (sources.Source): uses Every + jitter and calls Poll()
+//   - a stream source (sources.StreamSource): ignores Every and calls Run()
+//
+// Jitter behavior:
+//   - For polling sources: Jitter is applied at startup and before each poll tick.
+//   - For stream sources: Jitter is applied once at startup only (optional; useful to avoid
+//     reconnect storms when many instances start together).
 type Job struct {
-	Source sources.Source
+	Source sources.Input
 	Every  time.Duration

 	// Jitter is the maximum additional delay added before each poll.
 	// Example: if Every=15m and Jitter=30s, each poll will occur at:
 	//   tick time + random(0..30s)
 	//
-	// If Jitter == 0, we compute a default jitter based on Every.
+	// If Jitter == 0 for polling sources, we compute a default jitter based on Every.
+	//
+	// For stream sources, Jitter is treated as *startup jitter only*.
 	Jitter time.Duration
 }

@@ -35,8 +47,9 @@ type Scheduler struct {
 	Logf Logger
 }

-// Run starts one polling goroutine per job.
-// Each job runs on its own interval and emits 0..N events per poll.
+// Run starts one goroutine per job.
+// Poll jobs run on their own interval and emit 0..N events per poll.
+// Stream jobs run continuously and emit events as they arrive.
 func (s *Scheduler) Run(ctx context.Context) error {
 	if s.Out == nil {
 		return fmt.Errorf("scheduler.Run: Out channel is nil")
@@ -59,17 +72,48 @@ func (s *Scheduler) runJob(ctx context.Context, job Job) {
 		s.logf("scheduler: job has nil source")
 		return
 	}
-	if job.Every <= 0 {
-		s.logf("scheduler: job %s has invalid interval", job.Source.Name())
+
+	// Stream sources: event-driven.
+	if ss, ok := job.Source.(sources.StreamSource); ok {
+		s.runStream(ctx, job, ss)
 		return
 	}

+	// Poll sources: time-based.
+	ps, ok := job.Source.(sources.Source)
+	if !ok {
+		s.logf("scheduler: source %T (%s) implements neither Poll() nor Run()", job.Source, job.Source.Name())
+		return
+	}
+	if job.Every <= 0 {
+		s.logf("scheduler: polling job %q missing/invalid interval (sources[].every)", ps.Name())
+		return
+	}
+
+	s.runPoller(ctx, job, ps)
+}
+
+func (s *Scheduler) runStream(ctx context.Context, job Job, src sources.StreamSource) {
+	// Optional startup jitter: helps avoid reconnect storms if many daemons start at once.
+	if job.Jitter > 0 {
+		rng := seededRNG(src.Name())
+		if !sleepJitter(ctx, rng, job.Jitter) {
+			return
+		}
+	}
+
+	// Stream sources should block until ctx cancel or fatal error.
+	if err := src.Run(ctx, s.Out); err != nil && ctx.Err() == nil {
+		s.logf("scheduler: stream source %q exited with error: %v", src.Name(), err)
+	}
+}
+
+func (s *Scheduler) runPoller(ctx context.Context, job Job, src sources.Source) {
 	// Compute jitter: either configured per job, or a sensible default.
 	jitter := effectiveJitter(job.Every, job.Jitter)

 	// Each worker gets its own RNG (safe + no lock contention).
-	seed := time.Now().UnixNano() ^ int64(hashStringFNV32a(job.Source.Name()))
-	rng := rand.New(rand.NewSource(seed))
+	rng := seededRNG(src.Name())

 	// Optional startup jitter: avoids all jobs firing at the exact moment the daemon starts.
 	if !sleepJitter(ctx, rng, jitter) {
@@ -77,7 +121,7 @@ func (s *Scheduler) runJob(ctx context.Context, job Job) {
 	}

 	// Immediate poll at startup (after startup jitter).
-	s.pollOnce(ctx, job)
+	s.pollOnce(ctx, src)

 	t := time.NewTicker(job.Every)
 	defer t.Stop()
@@ -89,7 +133,7 @@ func (s *Scheduler) runJob(ctx context.Context, job Job) {
 			if !sleepJitter(ctx, rng, jitter) {
 				return
 			}
-			s.pollOnce(ctx, job)
+			s.pollOnce(ctx, src)

 		case <-ctx.Done():
 			return
@@ -97,10 +141,10 @@ func (s *Scheduler) runJob(ctx context.Context, job Job) {
 	}
 }

-func (s *Scheduler) pollOnce(ctx context.Context, job Job) {
-	events, err := job.Source.Poll(ctx)
+func (s *Scheduler) pollOnce(ctx context.Context, src sources.Source) {
+	events, err := src.Poll(ctx)
 	if err != nil {
-		s.logf("scheduler: poll failed (%s): %v", job.Source.Name(), err)
+		s.logf("scheduler: poll failed (%s): %v", src.Name(), err)
 		return
 	}

@@ -120,6 +164,13 @@ func (s *Scheduler) logf(format string, args ...any) {
 	s.Logf(format, args...)
 }

+// ---- helpers ----
+
+func seededRNG(name string) *rand.Rand {
+	seed := time.Now().UnixNano() ^ int64(hashStringFNV32a(name))
+	return rand.New(rand.NewSource(seed))
+}
+
 // effectiveJitter chooses a jitter value.
 // - If configuredMax > 0, use it (but clamp).
 // - Else default to min(every/10, 30s).
--- a/sinks/builtins.go
+++ b/sinks/builtins.go
@@ -27,9 +27,9 @@ func RegisterBuiltins(r *Registry) {
 		return NewPostgresSinkFromConfig(cfg)
 	})

-	// RabbitMQ sink: publishes events to a broker for downstream consumers.
-	r.Register("rabbitmq", func(cfg config.SinkConfig) (Sink, error) {
-		return NewRabbitMQSinkFromConfig(cfg)
+	// NATS sink: publishes events to a broker for downstream consumers.
+	r.Register("nats", func(cfg config.SinkConfig) (Sink, error) {
+		return NewNATSSinkFromConfig(cfg)
 	})
 }

--- a/sinks/nats.go
+++ b/sinks/nats.go
@@ -0,0 +1,97 @@
+package sinks
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"sync"
+	"time"
+
+	"gitea.maximumdirect.net/ejr/feedkit/config"
+	"gitea.maximumdirect.net/ejr/feedkit/event"
+	"github.com/nats-io/nats.go"
+)
+
+type NATSSink struct {
+	name     string
+	url      string
+	exchange string
+
+	mu   sync.Mutex
+	conn *nats.Conn
+}
+
+func NewNATSSinkFromConfig(cfg config.SinkConfig) (Sink, error) {
+	url, err := requireStringParam(cfg, "url")
+	if err != nil {
+		return nil, err
+	}
+	ex, err := requireStringParam(cfg, "exchange")
+	if err != nil {
+		return nil, err
+	}
+	return &NATSSink{name: cfg.Name, url: url, exchange: ex}, nil
+}
+
+func (r *NATSSink) Name() string { return r.name }
+
+func (r *NATSSink) Consume(ctx context.Context, e event.Event) error {
+	// Boundary validation: if something upstream violated invariants,
+	// surface it loudly rather than printing partial nonsense.
+	if err := e.Validate(); err != nil {
+		return fmt.Errorf("NATS sink: invalid event: %w", err)
+	}
+
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+
+	conn, err := r.connect(ctx)
+	if err != nil {
+		return fmt.Errorf("NATS sink: connect: %w", err)
+	}
+
+	b, err := json.Marshal(e)
+	if err != nil {
+		return fmt.Errorf("NATS sink: marshal event: %w", err)
+	}
+
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+	if err := conn.Publish(r.exchange, b); err != nil {
+		return fmt.Errorf("NATS sink: publish: %w", err)
+	}
+	return nil
+}
+
+func (r *NATSSink) connect(ctx context.Context) (*nats.Conn, error) {
+	if err := ctx.Err(); err != nil {
+		return nil, err
+	}
+
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.conn != nil && r.conn.Status() != nats.CLOSED {
+		return r.conn, nil
+	}
+
+	opts := []nats.Option{
+		nats.Name(fmt.Sprintf("feedkit sink %s", r.name)),
+	}
+	if deadline, ok := ctx.Deadline(); ok {
+		timeout := time.Until(deadline)
+		if timeout <= 0 {
+			return nil, ctx.Err()
+		}
+		opts = append(opts, nats.Timeout(timeout))
+	}
+
+	conn, err := nats.Connect(r.url, opts...)
+	if err != nil {
+		return nil, err
+	}
+	r.conn = conn
+	return conn, nil
+}
--- a/sinks/rabbitmq.go
+++ b/sinks/rabbitmq.go
@@ -1,42 +0,0 @@
-package sinks
-
-import (
-	"context"
-	"fmt"
-
-	"gitea.maximumdirect.net/ejr/feedkit/config"
-	"gitea.maximumdirect.net/ejr/feedkit/event"
-)
-
-type RabbitMQSink struct {
-	name     string
-	url      string
-	exchange string
-}
-
-func NewRabbitMQSinkFromConfig(cfg config.SinkConfig) (Sink, error) {
-	url, err := requireStringParam(cfg, "url")
-	if err != nil {
-		return nil, err
-	}
-	ex, err := requireStringParam(cfg, "exchange")
-	if err != nil {
-		return nil, err
-	}
-	return &RabbitMQSink{name: cfg.Name, url: url, exchange: ex}, nil
-}
-
-func (r *RabbitMQSink) Name() string { return r.name }
-
-func (r *RabbitMQSink) Consume(ctx context.Context, e event.Event) error {
-	_ = ctx
-
-	// Boundary validation: if something upstream violated invariants,
-	// surface it loudly rather than printing partial nonsense.
-	if err := e.Validate(); err != nil {
-		return fmt.Errorf("rabbitmq sink: invalid event: %w", err)
-	}
-
-	// TODO implement RabbitMQ publishing
-	return nil
-}
--- a/sources/registry.go
+++ b/sources/registry.go
@@ -13,13 +13,18 @@ import (
 // domain-specific source drivers (Open-Meteo, NWS, RSS, etc.) while feedkit
 // remains domain-agnostic.
 type Factory func(cfg config.SourceConfig) (Source, error)
+type StreamFactory func(cfg config.SourceConfig) (StreamSource, error)

 type Registry struct {
 	byDriver       map[string]Factory
+	byStreamDriver map[string]StreamFactory
 }

 func NewRegistry() *Registry {
-	return &Registry{byDriver: map[string]Factory{}}
+	return &Registry{
+		byDriver:       map[string]Factory{},
+		byStreamDriver: map[string]StreamFactory{},
+	}
 }

 // Register associates a driver name (e.g. "openmeteo_observation") with a factory.
@@ -35,10 +40,27 @@ func (r *Registry) Register(driver string, f Factory) {
 	if f == nil {
 		panic(fmt.Sprintf("sources.Registry.Register: factory cannot be nil (driver=%q)", driver))
 	}
-
+	if _, exists := r.byStreamDriver[driver]; exists {
+		panic(fmt.Sprintf("sources.Registry.Register: driver %q already registered as a stream source", driver))
+	}
 	r.byDriver[driver] = f
 }

+// RegisterStream is the StreamSource equivalent of Register.
+func (r *Registry) RegisterStream(driver string, f StreamFactory) {
+	driver = strings.TrimSpace(driver)
+	if driver == "" {
+		panic("sources.Registry.RegisterStream: driver cannot be empty")
+	}
+	if f == nil {
+		panic(fmt.Sprintf("sources.Registry.RegisterStream: factory cannot be nil (driver=%q)", driver))
+	}
+	if _, exists := r.byDriver[driver]; exists {
+		panic(fmt.Sprintf("sources.Registry.RegisterStream: driver %q already registered as a polling source", driver))
+	}
+	r.byStreamDriver[driver] = f
+}
+
 // Build constructs a Source from a SourceConfig by looking up cfg.Driver.
 func (r *Registry) Build(cfg config.SourceConfig) (Source, error) {
 	f, ok := r.byDriver[cfg.Driver]
@@ -47,3 +69,14 @@ func (r *Registry) Build(cfg config.SourceConfig) (Source, error) {
 	}
 	return f(cfg)
 }
+
+// BuildInput can return either a polling Source or a StreamSource.
+func (r *Registry) BuildInput(cfg config.SourceConfig) (Input, error) {
+	if f, ok := r.byStreamDriver[cfg.Driver]; ok {
+		return f(cfg)
+	}
+	if f, ok := r.byDriver[cfg.Driver]; ok {
+		return f(cfg)
+	}
+	return nil, fmt.Errorf("unknown source driver: %q", cfg.Driver)
+}
--- a/sources/source.go
+++ b/sources/source.go
@@ -6,6 +6,12 @@ import (
 	"gitea.maximumdirect.net/ejr/feedkit/event"
 )

+// Input is the common surface shared by all source types.
+type Input interface {
+	Name() string
+	Kind() event.Kind
+}
+
 // Source is a configured polling job that emits 0..N events per poll.
 //
 // Source implementations live in domain modules (weatherfeeder/newsfeeder/...)
@@ -28,3 +34,12 @@ type Source interface {
 	// Implementations should honor ctx.Done() for network calls and other I/O.
 	Poll(ctx context.Context) ([]event.Event, error)
 }
+
+// StreamSource is an event-driven source (NATS/RabbitMQ/MQTT/etc).
+//
+// Run should block, producing events into `out` until ctx is cancelled or a fatal error occurs.
+// It MUST NOT close out (the scheduler/daemon owns the bus).
+type StreamSource interface {
+	Input
+	Run(ctx context.Context, out chan<- event.Event) error
+}
--- a/transport/http.go
+++ b/transport/http.go
@@ -0,0 +1,70 @@
+// FILE: ./transport/http.go
+package transport
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+)
+
+// maxResponseBodyBytes is a hard safety limit on HTTP response bodies.
+// API responses should be small, so this protects us from accidental
+// or malicious large responses.
+const maxResponseBodyBytes = 2 << 21 // 4 MiB
+
+// DefaultHTTPTimeout is the standard timeout used by weatherfeeder HTTP sources.
+// Individual drivers may override this if they have a specific need.
+const DefaultHTTPTimeout = 10 * time.Second
+
+// NewHTTPClient returns a simple http.Client configured with a timeout.
+// If timeout <= 0, DefaultHTTPTimeout is used.
+func NewHTTPClient(timeout time.Duration) *http.Client {
+	if timeout <= 0 {
+		timeout = DefaultHTTPTimeout
+	}
+	return &http.Client{Timeout: timeout}
+}
+
+func FetchBody(ctx context.Context, client *http.Client, url, userAgent, accept string) ([]byte, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	if userAgent != "" {
+		req.Header.Set("User-Agent", userAgent)
+	}
+	if accept != "" {
+		req.Header.Set("Accept", accept)
+	}
+
+	res, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer res.Body.Close()
+
+	if res.StatusCode < 200 || res.StatusCode >= 300 {
+		return nil, fmt.Errorf("HTTP %s", res.Status)
+	}
+
+	// Read at most maxResponseBodyBytes + 1 so we can detect overflow.
+	limited := io.LimitReader(res.Body, maxResponseBodyBytes+1)
+
+	b, err := io.ReadAll(limited)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(b) == 0 {
+		return nil, fmt.Errorf("empty response body")
+	}
+
+	if len(b) > maxResponseBodyBytes {
+		return nil, fmt.Errorf("response body too large (>%d bytes)", maxResponseBodyBytes)
+	}
+
+	return b, nil
+}
Author	SHA1	Message	Date
Eric Rakestraw	fafba0f01b	Refactored the scheduler and source interfaces to accommondate both polling (e.g., HTTP) sources and streaming (e.g., message queue) sources.	2026-02-08 15:03:46 -06:00
Eric Rakestraw	3c95fa97cd	Updated the README to reflect recent updates to default sinks.	2026-02-07 19:45:26 -06:00
Eric Rakestraw	dbca0548b1	Implemented a builtin NATS sink.	2026-02-07 11:35:55 -06:00
Eric Rakestraw	9b2c1e5ceb	transport: Moved transport/http.go upstream to feedkit (was previously in weatherfeeder).	2026-01-15 19:08:28 -06:00
Eric Rakestraw	1d43adcfa0	dispatch: allow empty route kinds (match all) + add routing tests - config: permit routes[].kinds to be omitted/empty; treat as "all kinds" - dispatch: compile empty kinds to Route{Kinds:nil} (match all kinds) - tests: add coverage for route compilation + config validation edge cases Files: - config/load.go - config/config.go - dispatch/routes.go - config/validate_test.go - dispatch/routes_test.go	2026-01-15 18:26:45 -06:00
Eric Rakestraw	a6c133319a	feat(feedkit): add optional normalization hook and document external API Introduce an optional normalization stage for feedkit pipelines via the new normalize package. This adds: - normalize.Normalizer interface with flexible Match() semantics - normalize.Registry for ordered normalizer selection (first match wins) - normalize.Processor adapter implementing pipeline.Processor - Pass-through behavior when no normalizer matches (normalization is optional) - Func helper for ergonomic normalizer definitions Update root doc.go to fully document the normalization model, its role in the pipeline, recommended conventions (Schema-based matching, raw vs normalized events), and concrete wiring examples. The documentation now serves as a complete external-facing API specification for downstream daemons such as weatherfeeder. This change preserves feedkit’s non-framework philosophy while enabling a clean separation between data collection and domain normalization.	2026-01-13 18:23:43 -06:00