sources: standardize Event.ID on Source:EffectiveAt; simplify raw event helper

- Adopt an opinionated Event.ID policy across sources:
  - use upstream-provided ID when available
  - otherwise derive a stable ID from Source:EffectiveAt (RFC3339Nano, UTC)
  - fall back to Source:EmittedAt when EffectiveAt is unavailable
- Add common/id helper to centralize ID selection logic and keep sources consistent
- Simplify common event construction by collapsing SingleRawEventAt/SingleRawEvent
  into a single explicit SingleRawEvent helper (emittedAt passed in)
- Update NWS/Open-Meteo/OpenWeather observation sources to:
  - compute EffectiveAt first
  - generate IDs via the shared helper
  - build envelopes via the unified SingleRawEvent helper
- Improve determinism and dedupe-friendliness without changing schemas or payloads
This commit is contained in:
2026-01-15 19:38:15 -06:00
parent d9474b5a5b
commit d8db58c004
5 changed files with 76 additions and 110 deletions

View File

@@ -9,17 +9,34 @@ import (
// SingleRawEvent constructs, validates, and returns a slice containing exactly one event.
//
// This removes the repetitive "event envelope ceremony" from individual sources.
// This removes repetitive "event envelope ceremony" from individual sources.
// Sources remain responsible for:
// - fetching bytes (raw payload)
// - choosing Schema (raw schema identifier)
// - computing a stable Event.ID and (optional) EffectiveAt
func SingleRawEvent(kind event.Kind, sourceName string, schema string, id string, effectiveAt *time.Time, payload any) ([]event.Event, error) {
// - computing Event.ID and (optional) EffectiveAt
//
// emittedAt is explicit so callers can compute IDs using the same timestamp (or
// so tests can provide a stable value).
func SingleRawEvent(
kind event.Kind,
sourceName string,
schema string,
id string,
emittedAt time.Time,
effectiveAt *time.Time,
payload any,
) ([]event.Event, error) {
if emittedAt.IsZero() {
emittedAt = time.Now().UTC()
} else {
emittedAt = emittedAt.UTC()
}
e := event.Event{
ID: id,
Kind: kind,
Source: sourceName,
EmittedAt: time.Now().UTC(),
EmittedAt: emittedAt,
EffectiveAt: effectiveAt,
// RAW schema (normalizer matches on this).

View File

@@ -0,0 +1,39 @@
// FILE: ./internal/sources/common/id.go
package common
import (
"fmt"
"strings"
"time"
)
// ChooseEventID applies weatherfeeder's opinionated Event.ID policy:
//
// - If upstream provides an ID, use it (trimmed).
// - Otherwise, ID is "<Source>:<EffectiveAt>" when available.
// - If EffectiveAt is unavailable, fall back to "<Source>:<EmittedAt>".
//
// Timestamps are encoded as RFC3339Nano in UTC.
func ChooseEventID(upstreamID, sourceName string, effectiveAt *time.Time, emittedAt time.Time) string {
if id := strings.TrimSpace(upstreamID); id != "" {
return id
}
src := strings.TrimSpace(sourceName)
if src == "" {
src = "UNKNOWN_SOURCE"
}
// Prefer EffectiveAt for dedupe friendliness.
if effectiveAt != nil && !effectiveAt.IsZero() {
return fmt.Sprintf("%s:%s", src, effectiveAt.UTC().Format(time.RFC3339Nano))
}
// Fall back to EmittedAt (still stable within a poll invocation).
t := emittedAt.UTC()
if t.IsZero() {
t = time.Now().UTC()
}
return fmt.Sprintf("%s:%s", src, t.Format(time.RFC3339Nano))
}