From 9ddcf5e0df66ecf5e5a63c12a35c014568366940 Mon Sep 17 00:00:00 2001 From: Eric Rakestraw Date: Tue, 17 Mar 2026 09:33:07 -0500 Subject: [PATCH] Document the PostgreSQL schema contract in doc.go --- internal/sinks/postgres/doc.go | 208 +++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 internal/sinks/postgres/doc.go diff --git a/internal/sinks/postgres/doc.go b/internal/sinks/postgres/doc.go new file mode 100644 index 0000000..a1603a5 --- /dev/null +++ b/internal/sinks/postgres/doc.go @@ -0,0 +1,208 @@ +// Package postgres documents weatherfeeder's PostgreSQL sink contract for +// downstream SQL consumers. +// +// This package wires weatherfeeder canonical events into normalized relational +// tables. Downstream consumers can reconstruct the same canonical JSON objects +// that were written by joining parent/child tables as described below. +// +// Canonical input schemas: +// - weather.observation.v1 -> model.WeatherObservation +// - weather.forecast.v1 -> model.WeatherForecastRun +// - weather.alert.v1 -> model.WeatherAlertRun +// +// Parent/child relationships: +// - observations.event_id -> observation_cloud_layers.event_id +// - observations.event_id -> observation_present_weather.event_id +// - forecasts.event_id -> forecast_periods.run_event_id +// - alert_runs.event_id -> alerts.run_event_id +// - alerts.(run_event_id, alert_index) -> alert_references.(run_event_id, alert_index) +// +// Dedupe and retention behavior: +// - Parent primary keys (event_id): observations, forecasts, alert_runs. +// - Child primary keys use positional indexes to preserve payload order. +// - Prune columns: +// - observations.observed_at +// - observation_cloud_layers.observed_at +// - observation_present_weather.observed_at +// - forecasts.issued_at +// - forecast_periods.issued_at +// - alert_runs.as_of +// - alerts.as_of +// - alert_references.as_of +// +// Envelope field mapping (shared parent columns) +// +// These columns exist on observations, forecasts, and alert_runs: +// - event_id TEXT -> event.id +// - event_kind TEXT -> event.kind +// - event_source TEXT -> event.source +// - event_schema TEXT -> event.schema +// - event_emitted_at TIMESTAMPTZ -> event.emitted_at +// - event_effective_at TIMESTAMPTZ NULL -> event.effective_at +// +// Table contract +// +// 1. observations (PK: event_id) +// +// - event_id TEXT -> event.id +// - event_kind TEXT -> event.kind +// - event_source TEXT -> event.source +// - event_schema TEXT -> event.schema +// - event_emitted_at TIMESTAMPTZ -> event.emitted_at +// - event_effective_at TIMESTAMPTZ NULL -> event.effective_at +// - station_id TEXT NULL -> payload.stationId +// - station_name TEXT NULL -> payload.stationName +// - observed_at TIMESTAMPTZ -> payload.timestamp +// - condition_code INTEGER -> payload.conditionCode +// - condition_text TEXT NULL -> payload.conditionText +// - is_day BOOLEAN NULL -> payload.isDay +// - provider_raw_description TEXT NULL -> payload.providerRawDescription +// - text_description TEXT NULL -> payload.textDescription +// - icon_url TEXT NULL -> payload.iconUrl +// - temperature_c DOUBLE PRECISION NULL -> payload.temperatureC +// - dewpoint_c DOUBLE PRECISION NULL -> payload.dewpointC +// - wind_direction_degrees DOUBLE PRECISION NULL -> payload.windDirectionDegrees +// - wind_speed_kmh DOUBLE PRECISION NULL -> payload.windSpeedKmh +// - wind_gust_kmh DOUBLE PRECISION NULL -> payload.windGustKmh +// - barometric_pressure_pa DOUBLE PRECISION NULL -> payload.barometricPressurePa +// - sea_level_pressure_pa DOUBLE PRECISION NULL -> payload.seaLevelPressurePa +// - visibility_meters DOUBLE PRECISION NULL -> payload.visibilityMeters +// - relative_humidity_percent DOUBLE PRECISION NULL -> payload.relativeHumidityPercent +// - apparent_temperature_c DOUBLE PRECISION NULL -> payload.apparentTemperatureC +// - elevation_meters DOUBLE PRECISION NULL -> payload.elevationMeters +// - raw_message TEXT NULL -> payload.rawMessage +// +// 2. observation_cloud_layers (PK: event_id, layer_index) +// +// - event_id TEXT -> observations.event_id / payload.cloudLayers[i] +// - layer_index INTEGER -> i (array position in payload.cloudLayers) +// - observed_at TIMESTAMPTZ -> payload.timestamp +// - base_meters DOUBLE PRECISION NULL -> payload.cloudLayers[i].baseMeters +// - amount TEXT NULL -> payload.cloudLayers[i].amount +// +// 3. observation_present_weather (PK: event_id, weather_index) +// +// - event_id TEXT -> observations.event_id / payload.presentWeather[i] +// - weather_index INTEGER -> i (array position in payload.presentWeather) +// - observed_at TIMESTAMPTZ -> payload.timestamp +// - raw_text TEXT NULL -> JSON-encoded payload.presentWeather[i].raw +// +// Note: raw_text stores compact JSON text. Consumers that need the original +// object should parse raw_text as JSON. +// +// 4. forecasts (PK: event_id) +// +// - event_id TEXT -> event.id +// - event_kind TEXT -> event.kind +// - event_source TEXT -> event.source +// - event_schema TEXT -> event.schema +// - event_emitted_at TIMESTAMPTZ -> event.emitted_at +// - event_effective_at TIMESTAMPTZ NULL -> event.effective_at +// - location_id TEXT NULL -> payload.locationId +// - location_name TEXT NULL -> payload.locationName +// - issued_at TIMESTAMPTZ -> payload.issuedAt +// - updated_at TIMESTAMPTZ NULL -> payload.updatedAt +// - product TEXT -> payload.product +// - latitude DOUBLE PRECISION NULL -> payload.latitude +// - longitude DOUBLE PRECISION NULL -> payload.longitude +// - elevation_meters DOUBLE PRECISION NULL -> payload.elevationMeters +// - period_count INTEGER -> len(payload.periods) +// +// 5. forecast_periods (PK: run_event_id, period_index) +// +// - run_event_id TEXT -> forecasts.event_id / payload.periods[i] +// - period_index INTEGER -> i (array position in payload.periods) +// - issued_at TIMESTAMPTZ -> payload.issuedAt (copied from parent) +// - start_time TIMESTAMPTZ -> payload.periods[i].startTime +// - end_time TIMESTAMPTZ -> payload.periods[i].endTime +// - name TEXT NULL -> payload.periods[i].name +// - is_day BOOLEAN NULL -> payload.periods[i].isDay +// - condition_code INTEGER -> payload.periods[i].conditionCode +// - condition_text TEXT NULL -> payload.periods[i].conditionText +// - provider_raw_description TEXT NULL -> payload.periods[i].providerRawDescription +// - text_description TEXT NULL -> payload.periods[i].textDescription +// - detailed_text TEXT NULL -> payload.periods[i].detailedText +// - icon_url TEXT NULL -> payload.periods[i].iconUrl +// - temperature_c DOUBLE PRECISION NULL -> payload.periods[i].temperatureC +// - temperature_c_min DOUBLE PRECISION NULL -> payload.periods[i].temperatureCMin +// - temperature_c_max DOUBLE PRECISION NULL -> payload.periods[i].temperatureCMax +// - dewpoint_c DOUBLE PRECISION NULL -> payload.periods[i].dewpointC +// - relative_humidity_percent DOUBLE PRECISION NULL -> payload.periods[i].relativeHumidityPercent +// - wind_direction_degrees DOUBLE PRECISION NULL -> payload.periods[i].windDirectionDegrees +// - wind_speed_kmh DOUBLE PRECISION NULL -> payload.periods[i].windSpeedKmh +// - wind_gust_kmh DOUBLE PRECISION NULL -> payload.periods[i].windGustKmh +// - barometric_pressure_pa DOUBLE PRECISION NULL -> payload.periods[i].barometricPressurePa +// - visibility_meters DOUBLE PRECISION NULL -> payload.periods[i].visibilityMeters +// - apparent_temperature_c DOUBLE PRECISION NULL -> payload.periods[i].apparentTemperatureC +// - cloud_cover_percent DOUBLE PRECISION NULL -> payload.periods[i].cloudCoverPercent +// - probability_of_precipitation_percent DOUBLE PRECISION NULL -> payload.periods[i].probabilityOfPrecipitationPercent +// - precipitation_amount_mm DOUBLE PRECISION NULL -> payload.periods[i].precipitationAmountMm +// - snowfall_depth_mm DOUBLE PRECISION NULL -> payload.periods[i].snowfallDepthMm +// - uv_index DOUBLE PRECISION NULL -> payload.periods[i].uvIndex +// +// 6. alert_runs (PK: event_id) +// +// - event_id TEXT -> event.id +// - event_kind TEXT -> event.kind +// - event_source TEXT -> event.source +// - event_schema TEXT -> event.schema +// - event_emitted_at TIMESTAMPTZ -> event.emitted_at +// - event_effective_at TIMESTAMPTZ NULL -> event.effective_at +// - location_id TEXT NULL -> payload.locationId +// - location_name TEXT NULL -> payload.locationName +// - as_of TIMESTAMPTZ -> payload.asOf +// - latitude DOUBLE PRECISION NULL -> payload.latitude +// - longitude DOUBLE PRECISION NULL -> payload.longitude +// - alert_count INTEGER -> len(payload.alerts) +// +// 7. alerts (PK: run_event_id, alert_index) +// +// - run_event_id TEXT -> alert_runs.event_id / payload.alerts[i] +// - alert_index INTEGER -> i (array position in payload.alerts) +// - as_of TIMESTAMPTZ -> payload.asOf (copied from parent) +// - alert_id TEXT -> payload.alerts[i].id +// - event TEXT NULL -> payload.alerts[i].event +// - headline TEXT NULL -> payload.alerts[i].headline +// - severity TEXT NULL -> payload.alerts[i].severity +// - urgency TEXT NULL -> payload.alerts[i].urgency +// - certainty TEXT NULL -> payload.alerts[i].certainty +// - status TEXT NULL -> payload.alerts[i].status +// - message_type TEXT NULL -> payload.alerts[i].messageType +// - category TEXT NULL -> payload.alerts[i].category +// - response TEXT NULL -> payload.alerts[i].response +// - description TEXT NULL -> payload.alerts[i].description +// - instruction TEXT NULL -> payload.alerts[i].instruction +// - sent TIMESTAMPTZ NULL -> payload.alerts[i].sent +// - effective TIMESTAMPTZ NULL -> payload.alerts[i].effective +// - onset TIMESTAMPTZ NULL -> payload.alerts[i].onset +// - expires TIMESTAMPTZ NULL -> payload.alerts[i].expires +// - area_description TEXT NULL -> payload.alerts[i].areaDescription +// - sender_name TEXT NULL -> payload.alerts[i].senderName +// - reference_count INTEGER -> len(payload.alerts[i].references) +// +// 8. alert_references (PK: run_event_id, alert_index, reference_index) +// +// - run_event_id TEXT -> alert_runs.event_id / payload.alerts[i].references[j] +// - alert_index INTEGER -> i (array position in payload.alerts) +// - reference_index INTEGER -> j (array position in payload.alerts[i].references) +// - as_of TIMESTAMPTZ -> payload.asOf (copied from parent) +// - id TEXT NULL -> payload.alerts[i].references[j].id +// - identifier TEXT NULL -> payload.alerts[i].references[j].identifier +// - sender TEXT NULL -> payload.alerts[i].references[j].sender +// - sent TIMESTAMPTZ NULL -> payload.alerts[i].references[j].sent +// +// Reconstructing canonical JSON payloads +// +// - WeatherObservation: +// read one row from observations, then join child rows by event_id ordered by +// layer_index / weather_index to rebuild cloudLayers and presentWeather arrays. +// +// - WeatherForecastRun: +// read one row from forecasts, then join forecast_periods by run_event_id +// ordered by period_index to rebuild periods. +// +// - WeatherAlertRun: +// read one row from alert_runs, join alerts by run_event_id ordered by +// alert_index, then join alert_references by (run_event_id, alert_index) +// ordered by reference_index to rebuild references per alert. +package postgres