sources: standardize Event.ID on Source:EffectiveAt; simplify raw event helper

- Adopt an opinionated Event.ID policy across sources:
  - use upstream-provided ID when available
  - otherwise derive a stable ID from Source:EffectiveAt (RFC3339Nano, UTC)
  - fall back to Source:EmittedAt when EffectiveAt is unavailable
- Add common/id helper to centralize ID selection logic and keep sources consistent
- Simplify common event construction by collapsing SingleRawEventAt/SingleRawEvent
  into a single explicit SingleRawEvent helper (emittedAt passed in)
- Update NWS/Open-Meteo/OpenWeather observation sources to:
  - compute EffectiveAt first
  - generate IDs via the shared helper
  - build envelopes via the unified SingleRawEvent helper
- Improve determinism and dedupe-friendliness without changing schemas or payloads
This commit is contained in:
2026-01-15 19:38:15 -06:00
parent d9474b5a5b
commit d8db58c004
5 changed files with 76 additions and 110 deletions

View File

@@ -9,17 +9,34 @@ import (
// SingleRawEvent constructs, validates, and returns a slice containing exactly one event.
//
// This removes the repetitive "event envelope ceremony" from individual sources.
// This removes repetitive "event envelope ceremony" from individual sources.
// Sources remain responsible for:
// - fetching bytes (raw payload)
// - choosing Schema (raw schema identifier)
// - computing a stable Event.ID and (optional) EffectiveAt
func SingleRawEvent(kind event.Kind, sourceName string, schema string, id string, effectiveAt *time.Time, payload any) ([]event.Event, error) {
// - computing Event.ID and (optional) EffectiveAt
//
// emittedAt is explicit so callers can compute IDs using the same timestamp (or
// so tests can provide a stable value).
func SingleRawEvent(
kind event.Kind,
sourceName string,
schema string,
id string,
emittedAt time.Time,
effectiveAt *time.Time,
payload any,
) ([]event.Event, error) {
if emittedAt.IsZero() {
emittedAt = time.Now().UTC()
} else {
emittedAt = emittedAt.UTC()
}
e := event.Event{
ID: id,
Kind: kind,
Source: sourceName,
EmittedAt: time.Now().UTC(),
EmittedAt: emittedAt,
EffectiveAt: effectiveAt,
// RAW schema (normalizer matches on this).

View File

@@ -0,0 +1,39 @@
// FILE: ./internal/sources/common/id.go
package common
import (
"fmt"
"strings"
"time"
)
// ChooseEventID applies weatherfeeder's opinionated Event.ID policy:
//
// - If upstream provides an ID, use it (trimmed).
// - Otherwise, ID is "<Source>:<EffectiveAt>" when available.
// - If EffectiveAt is unavailable, fall back to "<Source>:<EmittedAt>".
//
// Timestamps are encoded as RFC3339Nano in UTC.
func ChooseEventID(upstreamID, sourceName string, effectiveAt *time.Time, emittedAt time.Time) string {
if id := strings.TrimSpace(upstreamID); id != "" {
return id
}
src := strings.TrimSpace(sourceName)
if src == "" {
src = "UNKNOWN_SOURCE"
}
// Prefer EffectiveAt for dedupe friendliness.
if effectiveAt != nil && !effectiveAt.IsZero() {
return fmt.Sprintf("%s:%s", src, effectiveAt.UTC().Format(time.RFC3339Nano))
}
// Fall back to EmittedAt (still stable within a poll invocation).
t := emittedAt.UTC()
if t.IsZero() {
t = time.Now().UTC()
}
return fmt.Sprintf("%s:%s", src, t.Format(time.RFC3339Nano))
}

View File

@@ -4,7 +4,6 @@ package nws
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
@@ -15,10 +14,6 @@ import (
)
// ObservationSource polls an NWS station observation endpoint and emits a RAW observation Event.
//
// This corresponds to URLs like:
//
// https://api.weather.gov/stations/KSTL/observations/latest
type ObservationSource struct {
http *common.HTTPSource
}
@@ -36,35 +31,14 @@ func NewObservationSource(cfg config.SourceConfig) (*ObservationSource, error) {
func (s *ObservationSource) Name() string { return s.http.Name }
// Kind is used for routing/policy.
// We keep Kind canonical (observation) even for raw events; Schema differentiates raw vs canonical.
func (s *ObservationSource) Kind() event.Kind { return event.Kind("observation") }
// Poll fetches NWS "latest observation" and emits exactly one RAW Event.
// The RAW payload is json.RawMessage and Schema is standards.SchemaRawNWSObservationV1.
func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
raw, meta, err := s.fetchRaw(ctx)
if err != nil {
return nil, err
}
// Event.ID must be set BEFORE normalization (feedkit requires it).
// Prefer NWS-provided "id" (stable URL). Fallback to a stable computed key.
eventID := strings.TrimSpace(meta.ID)
if eventID == "" {
ts := meta.ParsedTimestamp
if ts.IsZero() {
ts = time.Now().UTC()
}
station := strings.TrimSpace(meta.StationID)
if station == "" {
station = "UNKNOWN"
}
eventID = fmt.Sprintf("nws:observation:%s:%s:%s", s.http.Name, station, ts.UTC().Format(time.RFC3339Nano))
}
// EffectiveAt is optional; for observations its naturally the observation timestamp.
var effectiveAt *time.Time
if !meta.ParsedTimestamp.IsZero() {
@@ -72,11 +46,15 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
effectiveAt = &t
}
emittedAt := time.Now().UTC()
eventID := common.ChooseEventID(meta.ID, s.http.Name, effectiveAt, emittedAt)
return common.SingleRawEvent(
s.Kind(),
s.http.Name,
standards.SchemaRawNWSObservationV1,
eventID,
emittedAt,
effectiveAt,
raw,
)
@@ -84,18 +62,13 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
// ---- RAW fetch + minimal metadata decode ----
// observationMeta is a *minimal* decode of the NWS payload used only to build
// a stable Event.ID and a useful EffectiveAt for the envelope.
type observationMeta struct {
ID string `json:"id"`
Properties struct {
StationID string `json:"stationId"`
Timestamp string `json:"timestamp"`
} `json:"properties"`
// Convenience fields populated after decode.
ParsedTimestamp time.Time `json:"-"`
StationID string `json:"-"`
}
func (s *ObservationSource) fetchRaw(ctx context.Context) (json.RawMessage, observationMeta, error) {
@@ -106,12 +79,10 @@ func (s *ObservationSource) fetchRaw(ctx context.Context) (json.RawMessage, obse
var meta observationMeta
if err := json.Unmarshal(raw, &meta); err != nil {
// If metadata decode fails, still return raw; envelope will fall back to computed ID.
// If metadata decode fails, still return raw; envelope will fall back to Source:EffectiveAt.
return raw, observationMeta{}, nil
}
meta.StationID = strings.TrimSpace(meta.Properties.StationID)
tsStr := strings.TrimSpace(meta.Properties.Timestamp)
if tsStr != "" {
if t, err := time.Parse(time.RFC3339, tsStr); err == nil {

View File

@@ -4,8 +4,6 @@ package openmeteo
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"gitea.maximumdirect.net/ejr/feedkit/config"
@@ -23,8 +21,6 @@ type ObservationSource struct {
func NewObservationSource(cfg config.SourceConfig) (*ObservationSource, error) {
const driver = "openmeteo_observation"
// We require params.user_agent for uniformity across sources (even though Open-Meteo
// itself does not strictly require a special User-Agent).
hs, err := common.NewHTTPSource(driver, cfg, "application/json")
if err != nil {
return nil, err
@@ -37,30 +33,27 @@ func (s *ObservationSource) Name() string { return s.http.Name }
func (s *ObservationSource) Kind() event.Kind { return event.Kind("observation") }
// Poll fetches Open-Meteo "current" and emits exactly one RAW Event.
func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
raw, meta, err := s.fetchRaw(ctx)
if err != nil {
return nil, err
}
eventID := buildEventID(s.http.Name, meta)
if strings.TrimSpace(eventID) == "" {
// Extremely defensive fallback: keep the envelope valid no matter what.
eventID = fmt.Sprintf("openmeteo:current:%s:%s", s.http.Name, time.Now().UTC().Format(time.RFC3339Nano))
}
var effectiveAt *time.Time
if !meta.ParsedTimestamp.IsZero() {
t := meta.ParsedTimestamp.UTC()
effectiveAt = &t
}
emittedAt := time.Now().UTC()
eventID := common.ChooseEventID("", s.http.Name, effectiveAt, emittedAt)
return common.SingleRawEvent(
s.Kind(),
s.http.Name,
standards.SchemaRawOpenMeteoCurrentV1,
eventID,
emittedAt,
effectiveAt,
raw,
)
@@ -69,8 +62,6 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
// ---- RAW fetch + minimal metadata decode ----
type openMeteoMeta struct {
Latitude float64 `json:"latitude"`
Longitude float64 `json:"longitude"`
Timezone string `json:"timezone"`
UTCOffsetSeconds int `json:"utc_offset_seconds"`
@@ -89,31 +80,13 @@ func (s *ObservationSource) fetchRaw(ctx context.Context) (json.RawMessage, open
var meta openMeteoMeta
if err := json.Unmarshal(raw, &meta); err != nil {
// If metadata decode fails, still return raw; envelope will fall back to computed ID without EffectiveAt.
// If metadata decode fails, still return raw; envelope will omit EffectiveAt.
return raw, openMeteoMeta{}, nil
}
// Best effort: compute a stable EffectiveAt + event ID component.
// If parsing fails, we simply omit EffectiveAt and fall back to time.Now() in buildEventID.
if t, err := openmeteo.ParseTime(meta.Current.Time, meta.Timezone, meta.UTCOffsetSeconds); err == nil {
meta.ParsedTimestamp = t.UTC()
}
return raw, meta, nil
}
func buildEventID(sourceName string, meta openMeteoMeta) string {
locKey := ""
if meta.Latitude != 0 || meta.Longitude != 0 {
locKey = fmt.Sprintf("coord:%.5f,%.5f", meta.Latitude, meta.Longitude)
} else {
locKey = "loc:unknown"
}
ts := meta.ParsedTimestamp
if ts.IsZero() {
ts = time.Now().UTC()
}
return fmt.Sprintf("openmeteo:current:%s:%s:%s", sourceName, locKey, ts.Format(time.RFC3339Nano))
}

View File

@@ -15,12 +15,6 @@ import (
"gitea.maximumdirect.net/ejr/weatherfeeder/internal/standards"
)
// ObservationSource polls the OpenWeatherMap "Current weather" endpoint and emits a RAW observation Event.
//
// IMPORTANT UNIT POLICY (weatherfeeder convention):
// OpenWeather changes units based on the `units` query parameter but does NOT include the unit
// system in the response body. To keep normalization deterministic, this driver *requires*
// `units=metric`. If absent (or non-metric), the driver returns an error.
type ObservationSource struct {
http *common.HTTPSource
}
@@ -45,7 +39,6 @@ func (s *ObservationSource) Name() string { return s.http.Name }
func (s *ObservationSource) Kind() event.Kind { return event.Kind("observation") }
func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
// Re-check policy defensively (in case the URL is mutated after construction).
if err := requireMetricUnits(s.http.URL); err != nil {
return nil, fmt.Errorf("%s %q: %w", s.http.Driver, s.http.Name, err)
}
@@ -55,22 +48,21 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
return nil, err
}
eventID := buildEventID(s.http.Name, meta)
if strings.TrimSpace(eventID) == "" {
eventID = fmt.Sprintf("openweather:current:%s:%s", s.http.Name, time.Now().UTC().Format(time.RFC3339Nano))
}
var effectiveAt *time.Time
if !meta.ParsedTimestamp.IsZero() {
t := meta.ParsedTimestamp.UTC()
effectiveAt = &t
}
emittedAt := time.Now().UTC()
eventID := common.ChooseEventID("", s.http.Name, effectiveAt, emittedAt)
return common.SingleRawEvent(
s.Kind(),
s.http.Name,
standards.SchemaRawOpenWeatherCurrentV1,
eventID,
emittedAt,
effectiveAt,
raw,
)
@@ -81,14 +73,6 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
type openWeatherMeta struct {
Dt int64 `json:"dt"` // unix seconds, UTC
ID int64 `json:"id"`
Name string `json:"name"`
Coord struct {
Lon float64 `json:"lon"`
Lat float64 `json:"lat"`
} `json:"coord"`
ParsedTimestamp time.Time `json:"-"`
}
@@ -110,24 +94,6 @@ func (s *ObservationSource) fetchRaw(ctx context.Context) (json.RawMessage, open
return raw, meta, nil
}
func buildEventID(sourceName string, meta openWeatherMeta) string {
locKey := ""
if meta.ID != 0 {
locKey = fmt.Sprintf("city:%d", meta.ID)
} else if meta.Coord.Lat != 0 || meta.Coord.Lon != 0 {
locKey = fmt.Sprintf("coord:%.5f,%.5f", meta.Coord.Lat, meta.Coord.Lon)
} else {
locKey = "loc:unknown"
}
ts := meta.ParsedTimestamp
if ts.IsZero() {
ts = time.Now().UTC()
}
return fmt.Sprintf("openweather:current:%s:%s:%s", sourceName, locKey, ts.Format(time.RFC3339Nano))
}
func requireMetricUnits(rawURL string) error {
u, err := url.Parse(strings.TrimSpace(rawURL))
if err != nil {