feedkit: split the former maximumdirect.net/weatherd project in two.
feedkit now contains a reusable core, while weatherfeeder is a concrete implementation that includes weather-specific functions.
This commit is contained in:
27
.gitignore
vendored
Normal file
27
.gitignore
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# ---> Go
|
||||
# If you prefer the allow list template instead of the deny list, see community template:
|
||||
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
|
||||
#
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, built with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Dependency directories (remove the comment below to include it)
|
||||
# vendor/
|
||||
|
||||
# Go workspace file
|
||||
go.work
|
||||
go.work.sum
|
||||
|
||||
# env file
|
||||
.env
|
||||
|
||||
87
NAMING.md
Normal file
87
NAMING.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# Naming Conventions
|
||||
|
||||
This document defines naming conventions for the Feed-based ingestion daemons and shared libraries in the HomeOps ecosystem.
|
||||
|
||||
These conventions are intentionally simple and consistent, and are designed to make system behavior obvious from names alone.
|
||||
|
||||
---
|
||||
|
||||
## Core Shared Module
|
||||
|
||||
**Module / repository name**
|
||||
maximumdirect.net/feedkit
|
||||
|
||||
|
||||
**Purpose**
|
||||
|
||||
`feedkit` provides the domain-agnostic core used by all feed-processing daemons:
|
||||
polling, scheduling, filtering, routing, persistence fanout, and signaling.
|
||||
|
||||
It contains *infrastructure primitives*, not domain logic.
|
||||
|
||||
---
|
||||
|
||||
## Feed Daemons
|
||||
|
||||
### Naming Format
|
||||
<domain>feeder
|
||||
|
||||
|
||||
This name is used for **both**:
|
||||
- the repository / Go module name
|
||||
- the compiled binary name
|
||||
|
||||
### Rationale
|
||||
|
||||
A *feeder* is an active agent that:
|
||||
- consumes one or more upstream feeds
|
||||
- normalizes new items into events
|
||||
- emits facts and/or signals into the system
|
||||
|
||||
### Examples
|
||||
|
||||
| Domain | Repository / Binary |
|
||||
|---------------|---------------------|
|
||||
| Weather | `weatherfeeder` |
|
||||
| RSS | `rssfeeder` |
|
||||
| News | `newsfeeder` |
|
||||
| NBA | `nbafeeder` |
|
||||
| Earthquakes | `earthquakefeeder` |
|
||||
| Patreon | `patreonfeeder` |
|
||||
|
||||
---
|
||||
|
||||
## Non-Feed Daemons
|
||||
|
||||
Daemons that perform downstream work (downloads, archiving, transformation, etc.)
|
||||
**should not** use the `feeder` suffix.
|
||||
|
||||
Preferred naming reflects the primary action:
|
||||
|
||||
- `mediafetcher`
|
||||
- `videodownloader`
|
||||
- `archiver`
|
||||
- `syncd`
|
||||
|
||||
This preserves a clear semantic boundary between:
|
||||
- **feed detection** (feeders)
|
||||
- **work execution** (workers)
|
||||
|
||||
---
|
||||
|
||||
## Go Naming Conventions
|
||||
|
||||
- All repository, module, package, and binary names are **lowercase**
|
||||
- No dashes (`-`) or underscores (`_`) in Go package names
|
||||
- Repository name and binary name should match unless there is a compelling reason not to
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
- **Core library:** `feedkit`
|
||||
- **Feed daemons:** `<domain>feeder`
|
||||
- **Downstream workers:** action-based names
|
||||
- Names should communicate *role*, not implementation details
|
||||
|
||||
|
||||
139
config/config.go
Normal file
139
config/config.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Config is the top-level YAML configuration used by feedkit-style daemons.
|
||||
//
|
||||
// IMPORTANT: This package is intentionally domain-agnostic.
|
||||
// - We validate *shape* and internal consistency (required fields, uniqueness, etc.).
|
||||
// - We do NOT enforce domain rules (e.g., "observation|forecast|alert").
|
||||
// Domain modules (weatherfeeder/newsfeeder/...) can add their own validation layer.
|
||||
type Config struct {
|
||||
Sources []SourceConfig `yaml:"sources"`
|
||||
Sinks []SinkConfig `yaml:"sinks"`
|
||||
Routes []RouteConfig `yaml:"routes"`
|
||||
}
|
||||
|
||||
// SourceConfig describes one polling job.
|
||||
//
|
||||
// This is intentionally generic:
|
||||
// - driver-specific knobs belong in Params.
|
||||
// - "kind" is allowed (useful for safety checks / routing), but feedkit does not
|
||||
// restrict the allowed values.
|
||||
type SourceConfig struct {
|
||||
Name string `yaml:"name"`
|
||||
Driver string `yaml:"driver"` // e.g. "openmeteo_observation", "rss_feed", etc.
|
||||
|
||||
Every Duration `yaml:"every"` // "15m", "1m", etc.
|
||||
|
||||
// Kind is optional and domain-defined. If set, it should be a non-empty string.
|
||||
// Domains commonly use it to enforce "this source should only emit kind X".
|
||||
Kind string `yaml:"kind"`
|
||||
|
||||
// Params are driver-specific settings (URL, headers, station IDs, API keys, etc.).
|
||||
// The driver implementation is responsible for reading/validating these.
|
||||
Params map[string]any `yaml:"params"`
|
||||
}
|
||||
|
||||
// SinkConfig describes one output sink adapter.
|
||||
type SinkConfig struct {
|
||||
Name string `yaml:"name"`
|
||||
Driver string `yaml:"driver"` // "stdout", "file", "postgres", "rabbitmq", ...
|
||||
Params map[string]any `yaml:"params"` // sink-specific settings
|
||||
}
|
||||
|
||||
// RouteConfig describes a routing rule: which sink receives which kinds.
|
||||
type RouteConfig struct {
|
||||
Sink string `yaml:"sink"` // sink name
|
||||
|
||||
// Kinds is domain-defined. feedkit only enforces that each entry is non-empty.
|
||||
// Whether a given daemon "recognizes" a kind is domain-specific validation.
|
||||
Kinds []string `yaml:"kinds"`
|
||||
}
|
||||
|
||||
// Duration is a YAML-friendly duration wrapper.
|
||||
//
|
||||
// Supported YAML formats:
|
||||
//
|
||||
// every: 15m # string duration (recommended)
|
||||
// every: 30s
|
||||
// every: 1h
|
||||
//
|
||||
// Also supported for convenience (interpreted as minutes):
|
||||
//
|
||||
// every: 15 # integer minutes
|
||||
// every: "15" # string minutes
|
||||
type Duration struct {
|
||||
time.Duration
|
||||
}
|
||||
|
||||
func (d *Duration) UnmarshalYAML(value *yaml.Node) error {
|
||||
// We expect a scalar (string or int).
|
||||
if value.Kind != yaml.ScalarNode {
|
||||
return fmt.Errorf("duration must be a scalar (e.g. 15m), got %v", value.Kind)
|
||||
}
|
||||
|
||||
// Case 1: YAML integer -> interpret as minutes.
|
||||
if value.Tag == "!!int" {
|
||||
n, err := strconv.Atoi(value.Value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid integer duration %q: %w", value.Value, err)
|
||||
}
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("duration must be > 0, got %d", n)
|
||||
}
|
||||
d.Duration = time.Duration(n) * time.Minute
|
||||
return nil
|
||||
}
|
||||
|
||||
// Case 2: YAML string.
|
||||
if value.Tag == "!!str" {
|
||||
s := strings.TrimSpace(value.Value)
|
||||
if s == "" {
|
||||
return fmt.Errorf("duration cannot be empty")
|
||||
}
|
||||
|
||||
// If it’s all digits, interpret as minutes (e.g. "15").
|
||||
if isAllDigits(s) {
|
||||
n, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid numeric duration %q: %w", s, err)
|
||||
}
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("duration must be > 0, got %d", n)
|
||||
}
|
||||
d.Duration = time.Duration(n) * time.Minute
|
||||
return nil
|
||||
}
|
||||
|
||||
// Otherwise parse as Go duration string (15m, 30s, 1h, etc.).
|
||||
parsed, err := time.ParseDuration(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid duration %q (expected e.g. 15m, 30s, 1h): %w", s, err)
|
||||
}
|
||||
if parsed <= 0 {
|
||||
return fmt.Errorf("duration must be > 0, got %s", parsed)
|
||||
}
|
||||
d.Duration = parsed
|
||||
return nil
|
||||
}
|
||||
|
||||
// Anything else: reject.
|
||||
return fmt.Errorf("duration must be a string like 15m or an integer minutes, got tag %s", value.Tag)
|
||||
}
|
||||
|
||||
func isAllDigits(s string) bool {
|
||||
for _, r := range s {
|
||||
if r < '0' || r > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(s) > 0
|
||||
}
|
||||
193
config/load.go
Normal file
193
config/load.go
Normal file
@@ -0,0 +1,193 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Load reads the YAML config file from disk, decodes it into Config,
|
||||
// and then validates it.
|
||||
//
|
||||
// Important behaviors:
|
||||
// - Uses yaml.Decoder with KnownFields(true) to catch typos like "srouces:".
|
||||
// - Returns a validation error that (usually) contains multiple issues at once.
|
||||
func Load(path string) (*Config, error) {
|
||||
if strings.TrimSpace(path) == "" {
|
||||
path = "config.yml"
|
||||
}
|
||||
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("config.Load: read %q: %w", path, err)
|
||||
}
|
||||
|
||||
var cfg Config
|
||||
|
||||
dec := yaml.NewDecoder(strings.NewReader(string(raw)))
|
||||
dec.KnownFields(true) // strict mode for struct fields
|
||||
|
||||
if err := dec.Decode(&cfg); err != nil {
|
||||
return nil, fmt.Errorf("config.Load: parse YAML %q: %w", path, err)
|
||||
}
|
||||
|
||||
// Optional: ensure there isn't a second YAML document accidentally appended.
|
||||
var extra any
|
||||
if err := dec.Decode(&extra); err == nil {
|
||||
return nil, fmt.Errorf("config.Load: %q contains multiple YAML documents; expected exactly one", path)
|
||||
}
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// Validate checks whether the config is internally consistent and safe to run.
|
||||
//
|
||||
// This is intentionally DOMAIN-AGNOSTIC validation:
|
||||
// - required fields are present
|
||||
// - names are unique
|
||||
// - durations are > 0
|
||||
// - routes reference defined sinks
|
||||
//
|
||||
// We DO NOT enforce domain-specific constraints like "allowed kinds" or
|
||||
// "NWS requires a user-agent". Those belong in the domain module (weatherfeeder).
|
||||
func (c *Config) Validate() error {
|
||||
var m multiError
|
||||
|
||||
// ---------- sources ----------
|
||||
if len(c.Sources) == 0 {
|
||||
m.Add(fieldErr("sources", "must contain at least one source"))
|
||||
} else {
|
||||
seen := map[string]bool{}
|
||||
for i, s := range c.Sources {
|
||||
path := fmt.Sprintf("sources[%d]", i)
|
||||
|
||||
// Name
|
||||
if strings.TrimSpace(s.Name) == "" {
|
||||
m.Add(fieldErr(path+".name", "is required"))
|
||||
} else {
|
||||
if seen[s.Name] {
|
||||
m.Add(fieldErr(path+".name", fmt.Sprintf("duplicate source name %q (source names must be unique)", s.Name)))
|
||||
}
|
||||
seen[s.Name] = true
|
||||
}
|
||||
|
||||
// Driver
|
||||
if strings.TrimSpace(s.Driver) == "" {
|
||||
m.Add(fieldErr(path+".driver", "is required (e.g. openmeteo_observation, rss_feed, ...)"))
|
||||
}
|
||||
|
||||
// Every
|
||||
if s.Every.Duration <= 0 {
|
||||
m.Add(fieldErr(path+".every", "must be a positive duration (e.g. 15m, 1m, 30s)"))
|
||||
}
|
||||
|
||||
// Kind (optional but if present must be non-empty after trimming)
|
||||
if s.Kind != "" && strings.TrimSpace(s.Kind) == "" {
|
||||
m.Add(fieldErr(path+".kind", "cannot be blank (omit it entirely, or provide a non-empty string)"))
|
||||
}
|
||||
|
||||
// Params can be nil; that's fine.
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- sinks ----------
|
||||
sinkNames := map[string]bool{}
|
||||
if len(c.Sinks) == 0 {
|
||||
m.Add(fieldErr("sinks", "must contain at least one sink"))
|
||||
} else {
|
||||
for i, s := range c.Sinks {
|
||||
path := fmt.Sprintf("sinks[%d]", i)
|
||||
|
||||
if strings.TrimSpace(s.Name) == "" {
|
||||
m.Add(fieldErr(path+".name", "is required"))
|
||||
} else {
|
||||
if sinkNames[s.Name] {
|
||||
m.Add(fieldErr(path+".name", fmt.Sprintf("duplicate sink name %q (sink names must be unique)", s.Name)))
|
||||
}
|
||||
sinkNames[s.Name] = true
|
||||
}
|
||||
|
||||
if strings.TrimSpace(s.Driver) == "" {
|
||||
m.Add(fieldErr(path+".driver", "is required (stdout|file|postgres|rabbitmq|...)"))
|
||||
}
|
||||
|
||||
// Params can be nil; that's fine.
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- routes ----------
|
||||
// Routes are optional. If provided, validate shape + references.
|
||||
for i, r := range c.Routes {
|
||||
path := fmt.Sprintf("routes[%d]", i)
|
||||
|
||||
if strings.TrimSpace(r.Sink) == "" {
|
||||
m.Add(fieldErr(path+".sink", "is required"))
|
||||
} else if !sinkNames[r.Sink] {
|
||||
m.Add(fieldErr(path+".sink", fmt.Sprintf("references unknown sink %q (define it under sinks:)", r.Sink)))
|
||||
}
|
||||
|
||||
if len(r.Kinds) == 0 {
|
||||
// You could relax this later (e.g. empty == "all kinds"), but for now
|
||||
// keeping it strict prevents accidental "route does nothing".
|
||||
m.Add(fieldErr(path+".kinds", "must contain at least one kind"))
|
||||
} else {
|
||||
for j, k := range r.Kinds {
|
||||
kpath := fmt.Sprintf("%s.kinds[%d]", path, j)
|
||||
if strings.TrimSpace(k) == "" {
|
||||
m.Add(fieldErr(kpath, "kind cannot be empty"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return m.Err()
|
||||
}
|
||||
|
||||
// ---- error helpers ----
|
||||
|
||||
// fieldErr produces consistent "path: message" errors.
|
||||
func fieldErr(path, msg string) error {
|
||||
return fmt.Errorf("%s: %s", path, msg)
|
||||
}
|
||||
|
||||
// multiError collects many errors and returns them as one error.
|
||||
// This makes config iteration much nicer: you fix several things per run.
|
||||
type multiError struct {
|
||||
errs []error
|
||||
}
|
||||
|
||||
func (m *multiError) Add(err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
m.errs = append(m.errs, err)
|
||||
}
|
||||
|
||||
func (m *multiError) Err() error {
|
||||
if len(m.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
// Sort for stable output (useful in tests and when iterating).
|
||||
sort.Slice(m.errs, func(i, j int) bool {
|
||||
return m.errs[i].Error() < m.errs[j].Error()
|
||||
})
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *multiError) Error() string {
|
||||
var b strings.Builder
|
||||
b.WriteString("config validation failed:\n")
|
||||
for _, e := range m.errs {
|
||||
b.WriteString(" - ")
|
||||
b.WriteString(e.Error())
|
||||
b.WriteString("\n")
|
||||
}
|
||||
return strings.TrimRight(b.String(), "\n")
|
||||
}
|
||||
72
config/params.go
Normal file
72
config/params.go
Normal file
@@ -0,0 +1,72 @@
|
||||
// feedkit/config/params.go
|
||||
package config
|
||||
|
||||
import "strings"
|
||||
|
||||
// ParamString returns the first non-empty string found for any of the provided keys.
|
||||
// Values must actually be strings in the decoded config; other types are ignored.
|
||||
//
|
||||
// This keeps cfg.Params flexible (map[string]any) while letting callers stay type-safe.
|
||||
func (cfg SourceConfig) ParamString(keys ...string) (string, bool) {
|
||||
if cfg.Params == nil {
|
||||
return "", false
|
||||
}
|
||||
for _, k := range keys {
|
||||
v, ok := cfg.Params[k]
|
||||
if !ok || v == nil {
|
||||
continue
|
||||
}
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
return s, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// ParamStringDefault returns ParamString(keys...) if present; otherwise it returns def.
|
||||
// This is the “polite default” helper used by drivers for optional fields like user-agent.
|
||||
func (cfg SourceConfig) ParamStringDefault(def string, keys ...string) string {
|
||||
if s, ok := cfg.ParamString(keys...); ok {
|
||||
return s
|
||||
}
|
||||
return strings.TrimSpace(def)
|
||||
}
|
||||
|
||||
// ParamString returns the first non-empty string found for any of the provided keys
|
||||
// in SinkConfig.Params. (Same rationale as SourceConfig.ParamString.)
|
||||
func (cfg SinkConfig) ParamString(keys ...string) (string, bool) {
|
||||
if cfg.Params == nil {
|
||||
return "", false
|
||||
}
|
||||
for _, k := range keys {
|
||||
v, ok := cfg.Params[k]
|
||||
if !ok || v == nil {
|
||||
continue
|
||||
}
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
return s, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// ParamStringDefault returns ParamString(keys...) if present; otherwise it returns def.
|
||||
// Symmetric helper for sink implementations.
|
||||
func (cfg SinkConfig) ParamStringDefault(def string, keys ...string) string {
|
||||
if s, ok := cfg.ParamString(keys...); ok {
|
||||
return s
|
||||
}
|
||||
return strings.TrimSpace(def)
|
||||
}
|
||||
104
dispatch/dispatch.go
Normal file
104
dispatch/dispatch.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package dispatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/pipeline"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/sinks"
|
||||
)
|
||||
|
||||
type Dispatcher struct {
|
||||
In <-chan event.Event
|
||||
|
||||
Pipeline *pipeline.Pipeline
|
||||
|
||||
// Sinks by name
|
||||
Sinks map[string]sinks.Sink
|
||||
|
||||
// Routing rules (compiled from config)
|
||||
Routes []Route
|
||||
|
||||
// Fanout knobs (global defaults for now; we can wire these from config later).
|
||||
//
|
||||
// These are intentionally simple: one queue size, one enqueue timeout,
|
||||
// one consume timeout for all sinks.
|
||||
SinkQueueSize int
|
||||
SinkEnqueueTimeout time.Duration
|
||||
SinkConsumeTimeout time.Duration
|
||||
}
|
||||
|
||||
type Route struct {
|
||||
SinkName string
|
||||
Kinds map[event.Kind]bool
|
||||
}
|
||||
|
||||
type Logger func(format string, args ...any)
|
||||
|
||||
func (d *Dispatcher) Run(ctx context.Context, logf Logger) error {
|
||||
if d.In == nil {
|
||||
return fmt.Errorf("dispatcher.Run: In channel is nil")
|
||||
}
|
||||
if d.Sinks == nil {
|
||||
return fmt.Errorf("dispatcher.Run: Sinks map is nil")
|
||||
}
|
||||
if d.Pipeline == nil {
|
||||
d.Pipeline = &pipeline.Pipeline{}
|
||||
}
|
||||
|
||||
// Build and start sink workers.
|
||||
fanout, err := NewFanout(ctx, d.Sinks, FanoutOptions{
|
||||
QueueSize: d.SinkQueueSize,
|
||||
EnqueueTimeout: d.SinkEnqueueTimeout,
|
||||
ConsumeTimeout: d.SinkConsumeTimeout,
|
||||
Logf: logf,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fanout.Close()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
|
||||
case e, ok := <-d.In:
|
||||
if !ok {
|
||||
// If someone closes the event bus, treat as clean shutdown.
|
||||
return nil
|
||||
}
|
||||
|
||||
out, err := d.Pipeline.Process(ctx, e)
|
||||
if err != nil {
|
||||
if logf != nil {
|
||||
logf("dispatcher: pipeline error: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if out == nil {
|
||||
// Dropped by policy.
|
||||
continue
|
||||
}
|
||||
|
||||
d.routeToSinks(ctx, fanout, *out, logf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Dispatcher) routeToSinks(ctx context.Context, fanout *Fanout, e event.Event, logf Logger) {
|
||||
for _, r := range d.Routes {
|
||||
if len(r.Kinds) > 0 && !r.Kinds[e.Kind] {
|
||||
continue
|
||||
}
|
||||
|
||||
// Publish is now the ONLY thing we do here.
|
||||
// It is bounded (if configured) and does not call sink adapters directly.
|
||||
if err := fanout.Publish(ctx, r.SinkName, e); err != nil && logf != nil {
|
||||
logf("dispatcher: failed to enqueue event for sink %q (id=%s kind=%s source=%s): %v",
|
||||
r.SinkName, e.ID, e.Kind, e.Source, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
222
dispatch/fanout.go
Normal file
222
dispatch/fanout.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package dispatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/sinks"
|
||||
)
|
||||
|
||||
// Fanout owns one buffered queue + one worker goroutine per sink.
|
||||
//
|
||||
// The goal is to decouple the dispatcher from slow sinks.
|
||||
// The dispatcher enqueues quickly; each sink consumes at its own pace.
|
||||
//
|
||||
// Design notes:
|
||||
// - We use a global enqueue timeout to prevent unbounded blocking if a sink queue fills.
|
||||
// - We use a per-consume timeout (context.WithTimeout) to bound sink work.
|
||||
// - This timeout only works if the sink honors ctx.Done(). Sinks MUST be written to respect context.
|
||||
// (If a sink ignores context and blocks forever, no safe mechanism exists to "kill" that goroutine.)
|
||||
type Fanout struct {
|
||||
workers map[string]*sinkWorker
|
||||
|
||||
enqueueTimeout time.Duration
|
||||
logger Logger
|
||||
|
||||
closeOnce sync.Once
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
type FanoutOptions struct {
|
||||
// QueueSize is the per-sink channel buffer size.
|
||||
// Larger buffers absorb bursts but can also hide a stuck sink longer.
|
||||
QueueSize int
|
||||
|
||||
// EnqueueTimeout bounds how long Publish() will wait if a sink queue is full.
|
||||
// If this fires, the event is dropped for that sink and an error is returned.
|
||||
EnqueueTimeout time.Duration
|
||||
|
||||
// ConsumeTimeout bounds how long we allow a single sink Consume() call to run.
|
||||
// If your sink respects context, it should return early with context deadline exceeded.
|
||||
ConsumeTimeout time.Duration
|
||||
|
||||
Logf Logger
|
||||
}
|
||||
|
||||
type sinkWorker struct {
|
||||
name string
|
||||
sink sinks.Sink
|
||||
ch chan event.Event
|
||||
consumeTimeout time.Duration
|
||||
}
|
||||
|
||||
var (
|
||||
// ErrSinkQueueFull indicates that a sink queue could not accept the event within EnqueueTimeout.
|
||||
ErrSinkQueueFull = errors.New("sink queue full (enqueue timeout)")
|
||||
)
|
||||
|
||||
// NewFanout builds workers for all provided sinks and starts their goroutines.
|
||||
func NewFanout(ctx context.Context, sinkMap map[string]sinks.Sink, opts FanoutOptions) (*Fanout, error) {
|
||||
if sinkMap == nil {
|
||||
return nil, fmt.Errorf("dispatch.NewFanout: sink map is nil")
|
||||
}
|
||||
|
||||
queueSize := opts.QueueSize
|
||||
if queueSize <= 0 {
|
||||
queueSize = 64 // conservative default; adjust later as needed
|
||||
}
|
||||
|
||||
f := &Fanout{
|
||||
workers: make(map[string]*sinkWorker, len(sinkMap)),
|
||||
enqueueTimeout: opts.EnqueueTimeout,
|
||||
logger: opts.Logf,
|
||||
}
|
||||
|
||||
// Create + start one worker per sink.
|
||||
for name, s := range sinkMap {
|
||||
if s == nil {
|
||||
return nil, fmt.Errorf("dispatch.NewFanout: sink %q is nil", name)
|
||||
}
|
||||
|
||||
w := &sinkWorker{
|
||||
name: name,
|
||||
sink: s,
|
||||
ch: make(chan event.Event, queueSize),
|
||||
consumeTimeout: opts.ConsumeTimeout,
|
||||
}
|
||||
|
||||
f.workers[name] = w
|
||||
|
||||
f.wg.Add(1)
|
||||
go func(w *sinkWorker) {
|
||||
defer f.wg.Done()
|
||||
f.runWorker(ctx, w)
|
||||
}(w)
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Close stops all workers by closing their channels and waits for them to exit.
|
||||
// It is safe to call multiple times.
|
||||
func (f *Fanout) Close() {
|
||||
if f == nil {
|
||||
return
|
||||
}
|
||||
|
||||
f.closeOnce.Do(func() {
|
||||
// Closing the per-sink channels tells workers "no more events".
|
||||
for _, w := range f.workers {
|
||||
close(w.ch)
|
||||
}
|
||||
f.wg.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
// Publish enqueues an event to a named sink's queue.
|
||||
//
|
||||
// If the sink queue is full, Publish will wait up to f.enqueueTimeout.
|
||||
// If enqueueTimeout is <= 0, Publish will block until it can enqueue or ctx cancels.
|
||||
//
|
||||
// If Publish returns an error, the event has NOT been enqueued for that sink.
|
||||
func (f *Fanout) Publish(ctx context.Context, sinkName string, e event.Event) error {
|
||||
w, ok := f.workers[sinkName]
|
||||
if !ok {
|
||||
return fmt.Errorf("dispatch.Fanout.Publish: unknown sink %q", sinkName)
|
||||
}
|
||||
|
||||
// Fast path: no timeout configured; block until enqueue or ctx cancels.
|
||||
if f.enqueueTimeout <= 0 {
|
||||
select {
|
||||
case w.ch <- e:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// Bounded enqueue: wait up to enqueueTimeout for space in the buffer.
|
||||
timer := time.NewTimer(f.enqueueTimeout)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case w.ch <- e:
|
||||
return nil
|
||||
|
||||
case <-timer.C:
|
||||
return fmt.Errorf("%w: sink=%q id=%s kind=%s source=%s",
|
||||
ErrSinkQueueFull, sinkName, e.ID, e.Kind, e.Source)
|
||||
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Fanout) runWorker(ctx context.Context, w *sinkWorker) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// Context cancellation means we're shutting down; drop queued work.
|
||||
return
|
||||
|
||||
case e, ok := <-w.ch:
|
||||
if !ok {
|
||||
// Channel closed: dispatcher is done publishing to this sink.
|
||||
return
|
||||
}
|
||||
|
||||
f.consumeOne(ctx, w, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Fanout) consumeOne(parent context.Context, w *sinkWorker, e event.Event) {
|
||||
// Always give sinks a context they can observe.
|
||||
consumeCtx := parent
|
||||
cancel := func() {}
|
||||
if w.consumeTimeout > 0 {
|
||||
consumeCtx, cancel = context.WithTimeout(parent, w.consumeTimeout)
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
// Sinks are adapters. We keep the worker alive even if a sink panics:
|
||||
// it's better to log loudly and continue than to silently lose a sink forever.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
f.logf("dispatch: sink %q PANIC while consuming event (id=%s kind=%s source=%s): %v",
|
||||
w.name, e.ID, e.Kind, e.Source, r)
|
||||
}
|
||||
}()
|
||||
|
||||
err := w.sink.Consume(consumeCtx, e)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// If the sink respects context, timeouts should surface as context deadline exceeded.
|
||||
// We log a distinct message because it's operationally useful.
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
f.logf("dispatch: sink %q timed out after %s consuming event (id=%s kind=%s source=%s)",
|
||||
w.name, elapsed, e.ID, e.Kind, e.Source)
|
||||
return
|
||||
}
|
||||
|
||||
// Normal errors.
|
||||
f.logf("dispatch: sink %q failed consuming event (id=%s kind=%s source=%s): %v",
|
||||
w.name, e.ID, e.Kind, e.Source, err)
|
||||
}
|
||||
|
||||
func (f *Fanout) logf(format string, args ...any) {
|
||||
if f == nil || f.logger == nil {
|
||||
return
|
||||
}
|
||||
f.logger(format, args...)
|
||||
}
|
||||
8
event/doc.go
Normal file
8
event/doc.go
Normal file
@@ -0,0 +1,8 @@
|
||||
// Package event defines the domain-agnostic event envelope used by feedkit-style daemons.
|
||||
//
|
||||
// The core idea:
|
||||
// - feedkit infrastructure moves "events" around without knowing anything about the domain.
|
||||
// - domain-specific code (weatherfeeder, newsfeeder, etc.) provides a concrete Payload.
|
||||
//
|
||||
// This package should NOT import any domain packages (weather model types, etc.).
|
||||
package event
|
||||
101
event/event.go
Normal file
101
event/event.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package event
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ErrInvalidEvent is a sentinel error so callers can do:
|
||||
//
|
||||
// if errors.Is(err, event.ErrInvalidEvent) { ... }
|
||||
var ErrInvalidEvent = errors.New("invalid event")
|
||||
|
||||
// ValidationError reports one or more problems with an Event.
|
||||
//
|
||||
// We keep this structured so you get ALL issues in one pass rather than fixing
|
||||
// them one-by-one.
|
||||
type ValidationError struct {
|
||||
Problems []string
|
||||
}
|
||||
|
||||
func (e *ValidationError) Error() string {
|
||||
if e == nil || len(e.Problems) == 0 {
|
||||
return "invalid event"
|
||||
}
|
||||
var b strings.Builder
|
||||
b.WriteString("invalid event:\n")
|
||||
for _, p := range e.Problems {
|
||||
b.WriteString(" - ")
|
||||
b.WriteString(p)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
return strings.TrimRight(b.String(), "\n")
|
||||
}
|
||||
|
||||
// Is lets errors.Is(err, ErrInvalidEvent) work.
|
||||
func (e *ValidationError) Is(target error) bool {
|
||||
return target == ErrInvalidEvent
|
||||
}
|
||||
|
||||
// Event is the domain-agnostic envelope moved through scheduler → pipeline → dispatcher → sinks.
|
||||
//
|
||||
// Payload is intentionally "any":
|
||||
// - domain code can set it to a struct (recommended), a map, or some other JSON-marshable type.
|
||||
// - feedkit infrastructure never type-asserts Payload.
|
||||
//
|
||||
// If you plan to persist/emit events as JSON, ensure Payload is JSON-marshable.
|
||||
type Event struct {
|
||||
// ID should be stable for dedupe/storage purposes.
|
||||
// Often: "<provider>:<source>:<upstream-id-or-timestamp>".
|
||||
ID string `json:"id"`
|
||||
|
||||
// Kind is used for routing/policy.
|
||||
Kind Kind `json:"kind"`
|
||||
|
||||
// Source is the configured source name (e.g. "OpenMeteoObservation", "NWSAlertsSTL").
|
||||
Source string `json:"source"`
|
||||
|
||||
// EmittedAt is when *your daemon* emitted this event (typically time.Now().UTC()).
|
||||
EmittedAt time.Time `json:"emitted_at"`
|
||||
|
||||
// EffectiveAt is optional: the timestamp the payload is "about" (e.g. observation time).
|
||||
EffectiveAt *time.Time `json:"effective_at,omitempty"`
|
||||
|
||||
// Schema is optional but strongly recommended once multiple domains exist.
|
||||
// Examples:
|
||||
// "weather.observation.v1"
|
||||
// "news.article.v1"
|
||||
// It helps sinks and downstream consumers interpret Payload.
|
||||
Schema string `json:"schema,omitempty"`
|
||||
|
||||
// Payload is domain-defined and must be non-nil.
|
||||
Payload any `json:"payload"`
|
||||
}
|
||||
|
||||
// Validate enforces basic invariants that infrastructure depends on.
|
||||
// Domain-specific validation belongs in domain code (or domain processors).
|
||||
func (e Event) Validate() error {
|
||||
var problems []string
|
||||
|
||||
if strings.TrimSpace(e.ID) == "" {
|
||||
problems = append(problems, "ID is required")
|
||||
}
|
||||
if strings.TrimSpace(string(e.Kind)) == "" {
|
||||
problems = append(problems, "Kind is required")
|
||||
}
|
||||
if strings.TrimSpace(e.Source) == "" {
|
||||
problems = append(problems, "Source is required")
|
||||
}
|
||||
if e.EmittedAt.IsZero() {
|
||||
problems = append(problems, "EmittedAt must be set (non-zero)")
|
||||
}
|
||||
if e.Payload == nil {
|
||||
problems = append(problems, "Payload must be non-nil")
|
||||
}
|
||||
|
||||
if len(problems) > 0 {
|
||||
return &ValidationError{Problems: problems}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
30
event/kind.go
Normal file
30
event/kind.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package event
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Kind identifies the "type/category" of an event for routing and policy decisions.
|
||||
//
|
||||
// Kind is intentionally open-ended (stringly-typed), because different daemons will
|
||||
// have different kinds:
|
||||
//
|
||||
// weatherfeeder: "observation", "forecast", "alert"
|
||||
// newsfeeder: "article", "breaking", ...
|
||||
// stockfeeder: "quote", "bar", "news", ...
|
||||
//
|
||||
// Conventions (recommended, not required):
|
||||
// - lowercase
|
||||
// - words separated by underscores if needed
|
||||
type Kind string
|
||||
|
||||
// ParseKind normalizes and validates a kind string.
|
||||
// It lowercases and trims whitespace, and rejects empty values.
|
||||
func ParseKind(s string) (Kind, error) {
|
||||
k := strings.ToLower(strings.TrimSpace(s))
|
||||
if k == "" {
|
||||
return "", fmt.Errorf("kind cannot be empty")
|
||||
}
|
||||
return Kind(k), nil
|
||||
}
|
||||
5
go.mod
Normal file
5
go.mod
Normal file
@@ -0,0 +1,5 @@
|
||||
module gitea.maximumdirect.net/ejr/feedkit
|
||||
|
||||
go 1.22
|
||||
|
||||
require gopkg.in/yaml.v3 v3.0.1
|
||||
3
go.sum
Normal file
3
go.sum
Normal file
@@ -0,0 +1,3 @@
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
5
pipeline/dedupe.go
Normal file
5
pipeline/dedupe.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package pipeline
|
||||
|
||||
// Placeholder for dedupe processor:
|
||||
// - key by Event.ID or computed key
|
||||
// - in-memory store first; later optional Postgres-backed
|
||||
43
pipeline/pipeline.go
Normal file
43
pipeline/pipeline.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
// Processor can mutate/drop events (dedupe, rate-limit, normalization tweaks).
|
||||
type Processor interface {
|
||||
Process(ctx context.Context, in event.Event) (out *event.Event, err error)
|
||||
}
|
||||
|
||||
type Pipeline struct {
|
||||
Processors []Processor
|
||||
}
|
||||
|
||||
func (p *Pipeline) Process(ctx context.Context, e event.Event) (*event.Event, error) {
|
||||
if err := e.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("pipeline: invalid input event: %w", err)
|
||||
}
|
||||
|
||||
cur := &e
|
||||
|
||||
for _, proc := range p.Processors {
|
||||
out, err := proc.Process(ctx, *cur)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if out == nil {
|
||||
// Dropped by policy.
|
||||
return nil, nil
|
||||
}
|
||||
cur = out
|
||||
}
|
||||
|
||||
if err := cur.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("pipeline: invalid output event: %w", err)
|
||||
}
|
||||
|
||||
return cur, nil
|
||||
}
|
||||
5
pipeline/ratelimit.go
Normal file
5
pipeline/ratelimit.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package pipeline
|
||||
|
||||
// Placeholder for rate limit processor:
|
||||
// - per source/kind sink routing limits
|
||||
// - cooldown windows
|
||||
174
scheduler/scheduler.go
Normal file
174
scheduler/scheduler.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/sources"
|
||||
)
|
||||
|
||||
type Job struct {
|
||||
Source sources.Source
|
||||
Every time.Duration
|
||||
|
||||
// Jitter is the maximum additional delay added before each poll.
|
||||
// Example: if Every=15m and Jitter=30s, each poll will occur at:
|
||||
// tick time + random(0..30s)
|
||||
//
|
||||
// If Jitter == 0, we compute a default jitter based on Every.
|
||||
Jitter time.Duration
|
||||
}
|
||||
|
||||
type Logger func(format string, args ...any)
|
||||
|
||||
type Scheduler struct {
|
||||
Jobs []Job
|
||||
Out chan<- event.Event
|
||||
Logf Logger
|
||||
}
|
||||
|
||||
// Run starts one polling goroutine per job.
|
||||
// Each job runs on its own interval and emits 0..N events per poll.
|
||||
func (s *Scheduler) Run(ctx context.Context) error {
|
||||
if s.Out == nil {
|
||||
return fmt.Errorf("scheduler.Run: Out channel is nil")
|
||||
}
|
||||
if len(s.Jobs) == 0 {
|
||||
return fmt.Errorf("scheduler.Run: no jobs configured")
|
||||
}
|
||||
|
||||
for _, job := range s.Jobs {
|
||||
job := job // capture loop variable
|
||||
go s.runJob(ctx, job)
|
||||
}
|
||||
|
||||
<-ctx.Done()
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
func (s *Scheduler) runJob(ctx context.Context, job Job) {
|
||||
if job.Source == nil {
|
||||
s.logf("scheduler: job has nil source")
|
||||
return
|
||||
}
|
||||
if job.Every <= 0 {
|
||||
s.logf("scheduler: job %s has invalid interval", job.Source.Name())
|
||||
return
|
||||
}
|
||||
|
||||
// Compute jitter: either configured per job, or a sensible default.
|
||||
jitter := effectiveJitter(job.Every, job.Jitter)
|
||||
|
||||
// Each worker gets its own RNG (safe + no lock contention).
|
||||
seed := time.Now().UnixNano() ^ int64(hashStringFNV32a(job.Source.Name()))
|
||||
rng := rand.New(rand.NewSource(seed))
|
||||
|
||||
// Optional startup jitter: avoids all jobs firing at the exact moment the daemon starts.
|
||||
if !sleepJitter(ctx, rng, jitter) {
|
||||
return
|
||||
}
|
||||
|
||||
// Immediate poll at startup (after startup jitter).
|
||||
s.pollOnce(ctx, job)
|
||||
|
||||
t := time.NewTicker(job.Every)
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
// Per-tick jitter: spreads calls out within the interval.
|
||||
if !sleepJitter(ctx, rng, jitter) {
|
||||
return
|
||||
}
|
||||
s.pollOnce(ctx, job)
|
||||
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) pollOnce(ctx context.Context, job Job) {
|
||||
events, err := job.Source.Poll(ctx)
|
||||
if err != nil {
|
||||
s.logf("scheduler: poll failed (%s): %v", job.Source.Name(), err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, e := range events {
|
||||
select {
|
||||
case s.Out <- e:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) logf(format string, args ...any) {
|
||||
if s.Logf == nil {
|
||||
return
|
||||
}
|
||||
s.Logf(format, args...)
|
||||
}
|
||||
|
||||
// effectiveJitter chooses a jitter value.
|
||||
// - If configuredMax > 0, use it (but clamp).
|
||||
// - Else default to min(every/10, 30s).
|
||||
// - Clamp to at most every/2 (so jitter can’t delay more than half the interval).
|
||||
func effectiveJitter(every time.Duration, configuredMax time.Duration) time.Duration {
|
||||
if every <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
j := configuredMax
|
||||
if j <= 0 {
|
||||
j = every / 10
|
||||
if j > 30*time.Second {
|
||||
j = 30 * time.Second
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp jitter so it doesn’t dominate the schedule.
|
||||
maxAllowed := every / 2
|
||||
if j > maxAllowed {
|
||||
j = maxAllowed
|
||||
}
|
||||
if j < 0 {
|
||||
j = 0
|
||||
}
|
||||
return j
|
||||
}
|
||||
|
||||
// sleepJitter sleeps for a random duration in [0, max].
|
||||
// Returns false if the context is cancelled while waiting.
|
||||
func sleepJitter(ctx context.Context, rng *rand.Rand, max time.Duration) bool {
|
||||
if max <= 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Int63n requires a positive argument.
|
||||
// We add 1 so max itself is attainable.
|
||||
n := rng.Int63n(int64(max) + 1)
|
||||
d := time.Duration(n)
|
||||
|
||||
timer := time.NewTimer(d)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case <-timer.C:
|
||||
return true
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func hashStringFNV32a(s string) uint32 {
|
||||
h := fnv.New32a()
|
||||
_, _ = h.Write([]byte(s))
|
||||
return h.Sum32()
|
||||
}
|
||||
7
scheduler/worker.go
Normal file
7
scheduler/worker.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package scheduler
|
||||
|
||||
// Placeholder for per-source worker logic:
|
||||
// - ticker loop
|
||||
// - jitter
|
||||
// - backoff on errors
|
||||
// - emits events into scheduler.Out
|
||||
55
sinks/builtins.go
Normal file
55
sinks/builtins.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
)
|
||||
|
||||
// RegisterBuiltins registers sink drivers included in this binary.
|
||||
//
|
||||
// In feedkit, these are "infrastructure primitives" — they are not domain-specific.
|
||||
// Individual daemons can choose to call this (or register their own custom sinks).
|
||||
func RegisterBuiltins(r *Registry) {
|
||||
// Stdout sink: great for debugging, piping to jq, etc.
|
||||
r.Register("stdout", func(cfg config.SinkConfig) (Sink, error) {
|
||||
return NewStdoutSink(cfg.Name), nil
|
||||
})
|
||||
|
||||
// File sink: writes/archives events somewhere on disk.
|
||||
r.Register("file", func(cfg config.SinkConfig) (Sink, error) {
|
||||
return NewFileSinkFromConfig(cfg)
|
||||
})
|
||||
|
||||
// Postgres sink: persists events durably.
|
||||
r.Register("postgres", func(cfg config.SinkConfig) (Sink, error) {
|
||||
return NewPostgresSinkFromConfig(cfg)
|
||||
})
|
||||
|
||||
// RabbitMQ sink: publishes events to a broker for downstream consumers.
|
||||
r.Register("rabbitmq", func(cfg config.SinkConfig) (Sink, error) {
|
||||
return NewRabbitMQSinkFromConfig(cfg)
|
||||
})
|
||||
}
|
||||
|
||||
// ---- helpers for validating sink params ----
|
||||
//
|
||||
// These helpers live in sinks (not config) on purpose:
|
||||
// - config is domain-agnostic and should not embed driver-specific validation helpers.
|
||||
// - sinks are adapters; validating their own params here keeps the logic near the driver.
|
||||
|
||||
func requireStringParam(cfg config.SinkConfig, key string) (string, error) {
|
||||
v, ok := cfg.Params[key]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("sink %q: params.%s is required", cfg.Name, key)
|
||||
}
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("sink %q: params.%s must be a string", cfg.Name, key)
|
||||
}
|
||||
if strings.TrimSpace(s) == "" {
|
||||
return "", fmt.Errorf("sink %q: params.%s cannot be empty", cfg.Name, key)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
30
sinks/file.go
Normal file
30
sinks/file.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
type FileSink struct {
|
||||
name string
|
||||
path string
|
||||
}
|
||||
|
||||
func NewFileSinkFromConfig(cfg config.SinkConfig) (Sink, error) {
|
||||
path, err := requireStringParam(cfg, "path")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &FileSink{name: cfg.Name, path: path}, nil
|
||||
}
|
||||
|
||||
func (s *FileSink) Name() string { return s.name }
|
||||
|
||||
func (s *FileSink) Consume(ctx context.Context, e event.Event) error {
|
||||
_ = ctx
|
||||
_ = e
|
||||
return fmt.Errorf("file sink: TODO implement (path=%s)", s.path)
|
||||
}
|
||||
37
sinks/postgres.go
Normal file
37
sinks/postgres.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
type PostgresSink struct {
|
||||
name string
|
||||
dsn string
|
||||
}
|
||||
|
||||
func NewPostgresSinkFromConfig(cfg config.SinkConfig) (Sink, error) {
|
||||
dsn, err := requireStringParam(cfg, "dsn")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &PostgresSink{name: cfg.Name, dsn: dsn}, nil
|
||||
}
|
||||
|
||||
func (p *PostgresSink) Name() string { return p.name }
|
||||
|
||||
func (p *PostgresSink) Consume(ctx context.Context, e event.Event) error {
|
||||
_ = ctx
|
||||
|
||||
// Boundary validation: if something upstream violated invariants,
|
||||
// surface it loudly rather than printing partial nonsense.
|
||||
if err := e.Validate(); err != nil {
|
||||
return fmt.Errorf("rabbitmq sink: invalid event: %w", err)
|
||||
}
|
||||
|
||||
// TODO implement Postgres transaction
|
||||
return nil
|
||||
}
|
||||
42
sinks/rabbitmq.go
Normal file
42
sinks/rabbitmq.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
type RabbitMQSink struct {
|
||||
name string
|
||||
url string
|
||||
exchange string
|
||||
}
|
||||
|
||||
func NewRabbitMQSinkFromConfig(cfg config.SinkConfig) (Sink, error) {
|
||||
url, err := requireStringParam(cfg, "url")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ex, err := requireStringParam(cfg, "exchange")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &RabbitMQSink{name: cfg.Name, url: url, exchange: ex}, nil
|
||||
}
|
||||
|
||||
func (r *RabbitMQSink) Name() string { return r.name }
|
||||
|
||||
func (r *RabbitMQSink) Consume(ctx context.Context, e event.Event) error {
|
||||
_ = ctx
|
||||
|
||||
// Boundary validation: if something upstream violated invariants,
|
||||
// surface it loudly rather than printing partial nonsense.
|
||||
if err := e.Validate(); err != nil {
|
||||
return fmt.Errorf("rabbitmq sink: invalid event: %w", err)
|
||||
}
|
||||
|
||||
// TODO implement RabbitMQ publishing
|
||||
return nil
|
||||
}
|
||||
33
sinks/registry.go
Normal file
33
sinks/registry.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
)
|
||||
|
||||
// Factory constructs a sink instance from config.
|
||||
//
|
||||
// This is the mechanism that lets concrete daemons wire in whatever sinks they
|
||||
// want without main.go being full of switch statements.
|
||||
type Factory func(cfg config.SinkConfig) (Sink, error)
|
||||
|
||||
type Registry struct {
|
||||
byDriver map[string]Factory
|
||||
}
|
||||
|
||||
func NewRegistry() *Registry {
|
||||
return &Registry{byDriver: map[string]Factory{}}
|
||||
}
|
||||
|
||||
func (r *Registry) Register(driver string, f Factory) {
|
||||
r.byDriver[driver] = f
|
||||
}
|
||||
|
||||
func (r *Registry) Build(cfg config.SinkConfig) (Sink, error) {
|
||||
f, ok := r.byDriver[cfg.Driver]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown sink driver: %q", cfg.Driver)
|
||||
}
|
||||
return f(cfg)
|
||||
}
|
||||
16
sinks/sink.go
Normal file
16
sinks/sink.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
// Sink is an adapter that consumes a stream of domain-agnostic events.
|
||||
//
|
||||
// Sinks MUST respect ctx.Done() whenever they do I/O or blocking work.
|
||||
// (Fanout timeouts only help if the sink cooperates with context cancellation.)
|
||||
type Sink interface {
|
||||
Name() string
|
||||
Consume(ctx context.Context, e event.Event) error
|
||||
}
|
||||
36
sinks/stdout.go
Normal file
36
sinks/stdout.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
type StdoutSink struct{ name string }
|
||||
|
||||
func NewStdoutSink(name string) *StdoutSink {
|
||||
return &StdoutSink{name: name}
|
||||
}
|
||||
|
||||
func (s *StdoutSink) Name() string { return s.name }
|
||||
|
||||
func (s *StdoutSink) Consume(ctx context.Context, e event.Event) error {
|
||||
_ = ctx
|
||||
|
||||
// Boundary validation: if something upstream violated invariants,
|
||||
// surface it loudly rather than printing partial nonsense.
|
||||
if err := e.Validate(); err != nil {
|
||||
return fmt.Errorf("stdout sink: invalid event: %w", err)
|
||||
}
|
||||
|
||||
// Generic default: one JSON line per event.
|
||||
// This makes stdout useful across all domains and easy to pipe into jq / logs.
|
||||
b, err := json.Marshal(e)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stdout sink: marshal event: %w", err)
|
||||
}
|
||||
fmt.Println(string(b))
|
||||
return nil
|
||||
}
|
||||
49
sources/registry.go
Normal file
49
sources/registry.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
)
|
||||
|
||||
// Factory constructs a configured Source instance from config.
|
||||
//
|
||||
// This is how concrete daemons (weatherfeeder/newsfeeder/...) register their
|
||||
// domain-specific source drivers (Open-Meteo, NWS, RSS, etc.) while feedkit
|
||||
// remains domain-agnostic.
|
||||
type Factory func(cfg config.SourceConfig) (Source, error)
|
||||
|
||||
type Registry struct {
|
||||
byDriver map[string]Factory
|
||||
}
|
||||
|
||||
func NewRegistry() *Registry {
|
||||
return &Registry{byDriver: map[string]Factory{}}
|
||||
}
|
||||
|
||||
// Register associates a driver name (e.g. "openmeteo_observation") with a factory.
|
||||
//
|
||||
// The driver string is the "lookup key" used by config.sources[].driver.
|
||||
func (r *Registry) Register(driver string, f Factory) {
|
||||
driver = strings.TrimSpace(driver)
|
||||
if driver == "" {
|
||||
// Panic is appropriate here: registering an empty driver is always a programmer error,
|
||||
// and it will lead to extremely confusing runtime behavior if allowed.
|
||||
panic("sources.Registry.Register: driver cannot be empty")
|
||||
}
|
||||
if f == nil {
|
||||
panic(fmt.Sprintf("sources.Registry.Register: factory cannot be nil (driver=%q)", driver))
|
||||
}
|
||||
|
||||
r.byDriver[driver] = f
|
||||
}
|
||||
|
||||
// Build constructs a Source from a SourceConfig by looking up cfg.Driver.
|
||||
func (r *Registry) Build(cfg config.SourceConfig) (Source, error) {
|
||||
f, ok := r.byDriver[cfg.Driver]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown source driver: %q", cfg.Driver)
|
||||
}
|
||||
return f(cfg)
|
||||
}
|
||||
30
sources/source.go
Normal file
30
sources/source.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
)
|
||||
|
||||
// Source is a configured polling job that emits 0..N events per poll.
|
||||
//
|
||||
// Source implementations live in domain modules (weatherfeeder/newsfeeder/...)
|
||||
// and are registered into a feedkit sources.Registry.
|
||||
//
|
||||
// feedkit infrastructure treats Source as opaque; it just calls Poll()
|
||||
// on the configured cadence and publishes the resulting events.
|
||||
type Source interface {
|
||||
// Name is the configured source name (used for logs and included in emitted events).
|
||||
Name() string
|
||||
|
||||
// Kind is the "primary kind" emitted by this source.
|
||||
//
|
||||
// This is mainly useful as a *safety check* (e.g. config says kind=forecast but
|
||||
// driver emits observation). Some future sources may emit multiple kinds; if/when
|
||||
// that happens, we can evolve this interface (e.g., make Kind optional, or remove it).
|
||||
Kind() event.Kind
|
||||
|
||||
// Poll fetches from upstream and returns 0..N events.
|
||||
// Implementations should honor ctx.Done() for network calls and other I/O.
|
||||
Poll(ctx context.Context) ([]event.Event, error)
|
||||
}
|
||||
Reference in New Issue
Block a user