sources: standardize Event.ID on Source:EffectiveAt; simplify raw event helper

- Adopt an opinionated Event.ID policy across sources:
  - use upstream-provided ID when available
  - otherwise derive a stable ID from Source:EffectiveAt (RFC3339Nano, UTC)
  - fall back to Source:EmittedAt when EffectiveAt is unavailable
- Add common/id helper to centralize ID selection logic and keep sources consistent
- Simplify common event construction by collapsing SingleRawEventAt/SingleRawEvent
  into a single explicit SingleRawEvent helper (emittedAt passed in)
- Update NWS/Open-Meteo/OpenWeather observation sources to:
  - compute EffectiveAt first
  - generate IDs via the shared helper
  - build envelopes via the unified SingleRawEvent helper
- Improve determinism and dedupe-friendliness without changing schemas or payloads
This commit is contained in:
2026-01-15 19:38:15 -06:00
parent d9474b5a5b
commit d8db58c004
5 changed files with 76 additions and 110 deletions

View File

@@ -4,7 +4,6 @@ package nws
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
@@ -15,10 +14,6 @@ import (
)
// ObservationSource polls an NWS station observation endpoint and emits a RAW observation Event.
//
// This corresponds to URLs like:
//
// https://api.weather.gov/stations/KSTL/observations/latest
type ObservationSource struct {
http *common.HTTPSource
}
@@ -36,35 +31,14 @@ func NewObservationSource(cfg config.SourceConfig) (*ObservationSource, error) {
func (s *ObservationSource) Name() string { return s.http.Name }
// Kind is used for routing/policy.
// We keep Kind canonical (observation) even for raw events; Schema differentiates raw vs canonical.
func (s *ObservationSource) Kind() event.Kind { return event.Kind("observation") }
// Poll fetches NWS "latest observation" and emits exactly one RAW Event.
// The RAW payload is json.RawMessage and Schema is standards.SchemaRawNWSObservationV1.
func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
raw, meta, err := s.fetchRaw(ctx)
if err != nil {
return nil, err
}
// Event.ID must be set BEFORE normalization (feedkit requires it).
// Prefer NWS-provided "id" (stable URL). Fallback to a stable computed key.
eventID := strings.TrimSpace(meta.ID)
if eventID == "" {
ts := meta.ParsedTimestamp
if ts.IsZero() {
ts = time.Now().UTC()
}
station := strings.TrimSpace(meta.StationID)
if station == "" {
station = "UNKNOWN"
}
eventID = fmt.Sprintf("nws:observation:%s:%s:%s", s.http.Name, station, ts.UTC().Format(time.RFC3339Nano))
}
// EffectiveAt is optional; for observations its naturally the observation timestamp.
var effectiveAt *time.Time
if !meta.ParsedTimestamp.IsZero() {
@@ -72,11 +46,15 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
effectiveAt = &t
}
emittedAt := time.Now().UTC()
eventID := common.ChooseEventID(meta.ID, s.http.Name, effectiveAt, emittedAt)
return common.SingleRawEvent(
s.Kind(),
s.http.Name,
standards.SchemaRawNWSObservationV1,
eventID,
emittedAt,
effectiveAt,
raw,
)
@@ -84,18 +62,13 @@ func (s *ObservationSource) Poll(ctx context.Context) ([]event.Event, error) {
// ---- RAW fetch + minimal metadata decode ----
// observationMeta is a *minimal* decode of the NWS payload used only to build
// a stable Event.ID and a useful EffectiveAt for the envelope.
type observationMeta struct {
ID string `json:"id"`
Properties struct {
StationID string `json:"stationId"`
Timestamp string `json:"timestamp"`
} `json:"properties"`
// Convenience fields populated after decode.
ParsedTimestamp time.Time `json:"-"`
StationID string `json:"-"`
}
func (s *ObservationSource) fetchRaw(ctx context.Context) (json.RawMessage, observationMeta, error) {
@@ -106,12 +79,10 @@ func (s *ObservationSource) fetchRaw(ctx context.Context) (json.RawMessage, obse
var meta observationMeta
if err := json.Unmarshal(raw, &meta); err != nil {
// If metadata decode fails, still return raw; envelope will fall back to computed ID.
// If metadata decode fails, still return raw; envelope will fall back to Source:EffectiveAt.
return raw, observationMeta{}, nil
}
meta.StationID = strings.TrimSpace(meta.Properties.StationID)
tsStr := strings.TrimSpace(meta.Properties.Timestamp)
if tsStr != "" {
if t, err := time.Parse(time.RFC3339, tsStr); err == nil {