Added support for Postgres polling sources

This commit is contained in:
2026-03-29 10:53:13 -05:00
parent 247937b65e
commit 5c1b28ee0a
7 changed files with 752 additions and 52 deletions

View File

@@ -9,6 +9,8 @@
// stream exit classification helpers
// - Registry / NewRegistry: source driver registry and builders
// - HTTPSource / NewHTTPSource: reusable HTTP polling helper
// - PostgresQuerySource / NewPostgresQuerySource: reusable Postgres polling
// helper
//
// Source drivers are domain-specific and registered into Registry by driver name.
// Registry can then build configured sources from config.SourceConfig.
@@ -34,4 +36,17 @@
// When validators are available, NewHTTPSource prefers ETag/If-None-Match and
// falls back to Last-Modified/If-Modified-Since. A 304 Not Modified response is
// treated as a successful unchanged poll.
//
// Postgres-backed polling sources can share NewPostgresQuerySource for generic
// DB config parsing and query execution. The helper understands:
// - params.uri
// - params.username
// - params.password
// - params.query
// - params.query_timeout (optional, default 30s)
//
// feedkit does not register a built-in postgres poll driver. Downstream daemons
// should register domain-specific driver names that call
// NewPostgresQuerySource, then keep SQL semantics, row scanning, ordering,
// watermark policy, and event construction in their own source types.
package sources

117
sources/postgres.go Normal file
View File

@@ -0,0 +1,117 @@
package sources
import (
"context"
"database/sql"
"fmt"
"strings"
"time"
"gitea.maximumdirect.net/ejr/feedkit/config"
pgconn "gitea.maximumdirect.net/ejr/feedkit/internal/postgres"
)
const defaultPostgresQueryTimeout = 30 * time.Second
type postgresQueryDB interface {
QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)
}
var openPostgresQueryDB = func(ctx context.Context, cfg pgconn.ConnConfig) (postgresQueryDB, error) {
return pgconn.Open(ctx, cfg)
}
// PostgresQuerySource is a reusable helper for polling Postgres-backed sources.
//
// It centralizes generic source config parsing and query execution. Concrete
// daemon sources remain responsible for SQL semantics, row scanning, cursoring,
// and event construction.
type PostgresQuerySource struct {
Driver string
Name string
SQL string
QueryTimeout time.Duration
db postgresQueryDB
}
// NewPostgresQuerySource builds a generic Postgres polling helper from
// SourceConfig.
//
// Required params:
// - params.uri
// - params.username
// - params.password
// - params.query
//
// Optional params:
// - params.query_timeout (default 30s)
func NewPostgresQuerySource(driver string, cfg config.SourceConfig) (*PostgresQuerySource, error) {
name := strings.TrimSpace(cfg.Name)
if name == "" {
return nil, fmt.Errorf("%s: name is required", driver)
}
if cfg.Params == nil {
return nil, fmt.Errorf("%s %q: params are required (need params.uri, params.username, params.password, and params.query)", driver, cfg.Name)
}
uri, ok := cfg.ParamString("uri")
if !ok {
return nil, fmt.Errorf("%s %q: params.uri is required", driver, cfg.Name)
}
username, ok := cfg.ParamString("username")
if !ok {
return nil, fmt.Errorf("%s %q: params.username is required", driver, cfg.Name)
}
password, ok := cfg.ParamString("password")
if !ok {
return nil, fmt.Errorf("%s %q: params.password is required", driver, cfg.Name)
}
query, ok := cfg.ParamString("query")
if !ok {
return nil, fmt.Errorf("%s %q: params.query is required", driver, cfg.Name)
}
queryTimeout := defaultPostgresQueryTimeout
if _, exists := cfg.Params["query_timeout"]; exists {
var ok bool
queryTimeout, ok = cfg.ParamDuration("query_timeout")
if !ok || queryTimeout <= 0 {
return nil, fmt.Errorf("source %q: params.query_timeout must be a positive duration", cfg.Name)
}
}
db, err := openPostgresQueryDB(context.Background(), pgconn.ConnConfig{
URI: uri,
Username: username,
Password: password,
})
if err != nil {
return nil, fmt.Errorf("%s %q: open db: %w", driver, cfg.Name, err)
}
return &PostgresQuerySource{
Driver: driver,
Name: name,
SQL: query,
QueryTimeout: queryTimeout,
db: db,
}, nil
}
func (s *PostgresQuerySource) Query(ctx context.Context, args ...any) (*sql.Rows, error) {
queryCtx := ctx
if s.QueryTimeout > 0 {
if deadline, ok := ctx.Deadline(); !ok || time.Until(deadline) > s.QueryTimeout {
// We intentionally do not cancel this derived context here because the
// returned rows may still be reading from the database.
queryCtx, _ = context.WithTimeout(ctx, s.QueryTimeout)
}
}
rows, err := s.db.QueryContext(queryCtx, s.SQL, args...)
if err != nil {
return nil, fmt.Errorf("%s %q: query: %w", s.Driver, s.Name, err)
}
return rows, nil
}

352
sources/postgres_test.go Normal file
View File

@@ -0,0 +1,352 @@
package sources
import (
"context"
"database/sql"
"database/sql/driver"
"errors"
"io"
"strings"
"sync"
"testing"
"time"
"gitea.maximumdirect.net/ejr/feedkit/config"
"gitea.maximumdirect.net/ejr/feedkit/event"
pgconn "gitea.maximumdirect.net/ejr/feedkit/internal/postgres"
)
type fakePostgresQueryDB struct {
queryErr error
lastCtx context.Context
lastQuery string
lastArgs []any
returnRows *sql.Rows
}
func (db *fakePostgresQueryDB) QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error) {
db.lastCtx = ctx
db.lastQuery = query
db.lastArgs = append([]any(nil), args...)
if db.queryErr != nil {
return nil, db.queryErr
}
return db.returnRows, nil
}
func withPostgresQuerySourceTestState(t *testing.T) {
t.Helper()
oldOpen := openPostgresQueryDB
t.Cleanup(func() {
openPostgresQueryDB = oldOpen
})
}
func TestNewPostgresQuerySourceMissingParams(t *testing.T) {
withPostgresQuerySourceTestState(t)
tests := []struct {
name string
params map[string]any
want string
}{
{name: "missing uri", params: map[string]any{"username": "u", "password": "p", "query": "SELECT 1"}, want: "params.uri"},
{name: "missing username", params: map[string]any{"uri": "postgres://localhost/db", "password": "p", "query": "SELECT 1"}, want: "params.username"},
{name: "missing password", params: map[string]any{"uri": "postgres://localhost/db", "username": "u", "query": "SELECT 1"}, want: "params.password"},
{name: "missing query", params: map[string]any{"uri": "postgres://localhost/db", "username": "u", "password": "p"}, want: "params.query"},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
_, err := NewPostgresQuerySource("test_driver", config.SourceConfig{
Name: "pg-source",
Driver: "test_driver",
Params: tc.params,
})
if err == nil {
t.Fatalf("NewPostgresQuerySource() error = nil, want error")
}
if !strings.Contains(err.Error(), tc.want) {
t.Fatalf("NewPostgresQuerySource() error = %q, want substring %q", err, tc.want)
}
})
}
}
func TestNewPostgresQuerySourceRejectsInvalidQueryTimeout(t *testing.T) {
_, err := NewPostgresQuerySource("test_driver", config.SourceConfig{
Name: "pg-source",
Driver: "test_driver",
Params: map[string]any{
"uri": "postgres://localhost/db",
"username": "u",
"password": "p",
"query": "SELECT 1",
"query_timeout": "soon",
},
})
if err == nil {
t.Fatalf("NewPostgresQuerySource() error = nil, want error")
}
if !strings.Contains(err.Error(), "params.query_timeout must be a positive duration") {
t.Fatalf("NewPostgresQuerySource() error = %q", err)
}
}
func TestNewPostgresQuerySourceSuccessfulConstruction(t *testing.T) {
withPostgresQuerySourceTestState(t)
db := &fakePostgresQueryDB{}
var gotCfg pgconn.ConnConfig
openPostgresQueryDB = func(_ context.Context, cfg pgconn.ConnConfig) (postgresQueryDB, error) {
gotCfg = cfg
return db, nil
}
src, err := NewPostgresQuerySource("test_driver", config.SourceConfig{
Name: "pg-source",
Driver: "test_driver",
Params: map[string]any{
"uri": "postgres://db.example.local/feedkit",
"username": "app_user",
"password": "app_pass",
"query": "SELECT * FROM observations",
"query_timeout": "45s",
},
})
if err != nil {
t.Fatalf("NewPostgresQuerySource() error = %v", err)
}
if src.Name != "pg-source" {
t.Fatalf("Name = %q, want pg-source", src.Name)
}
if src.QueryTimeout != 45*time.Second {
t.Fatalf("QueryTimeout = %s, want 45s", src.QueryTimeout)
}
if src.SQL != "SELECT * FROM observations" {
t.Fatalf("SQL = %q", src.SQL)
}
if gotCfg.Username != "app_user" || gotCfg.Password != "app_pass" {
t.Fatalf("ConnConfig = %+v", gotCfg)
}
}
func TestNewPostgresQuerySourceOpenFailure(t *testing.T) {
withPostgresQuerySourceTestState(t)
openPostgresQueryDB = func(_ context.Context, _ pgconn.ConnConfig) (postgresQueryDB, error) {
return nil, errors.New("db unavailable")
}
_, err := NewPostgresQuerySource("test_driver", config.SourceConfig{
Name: "pg-source",
Driver: "test_driver",
Params: map[string]any{
"uri": "postgres://localhost/db",
"username": "u",
"password": "p",
"query": "SELECT 1",
},
})
if err == nil {
t.Fatalf("NewPostgresQuerySource() error = nil, want error")
}
if !strings.Contains(err.Error(), `test_driver "pg-source": open db: db unavailable`) {
t.Fatalf("NewPostgresQuerySource() error = %q", err)
}
}
func TestPostgresQuerySourceQueryAppliesTimeoutAndWrapsError(t *testing.T) {
db := &fakePostgresQueryDB{queryErr: errors.New("query failed")}
src := &PostgresQuerySource{
Driver: "test_driver",
Name: "pg-source",
SQL: "SELECT 1",
QueryTimeout: 30 * time.Second,
db: db,
}
ctx := context.Background()
_, err := src.Query(ctx, "arg1")
if err == nil {
t.Fatalf("Query() error = nil, want error")
}
if !strings.Contains(err.Error(), `test_driver "pg-source": query: query failed`) {
t.Fatalf("Query() error = %q", err)
}
if db.lastCtx == nil {
t.Fatalf("lastCtx = nil")
}
if _, ok := db.lastCtx.Deadline(); !ok {
t.Fatalf("expected derived deadline on query context")
}
if db.lastQuery != "SELECT 1" {
t.Fatalf("lastQuery = %q", db.lastQuery)
}
if len(db.lastArgs) != 1 || db.lastArgs[0] != "arg1" {
t.Fatalf("lastArgs = %#v", db.lastArgs)
}
}
func TestPostgresQuerySourceQueryUsesEarlierCallerDeadline(t *testing.T) {
db := &fakePostgresQueryDB{queryErr: errors.New("query failed")}
src := &PostgresQuerySource{
Driver: "test_driver",
Name: "pg-source",
SQL: "SELECT 1",
QueryTimeout: 30 * time.Second,
db: db,
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
defer cancel()
_, _ = src.Query(ctx)
if db.lastCtx != ctx {
t.Fatalf("expected source to reuse earlier caller deadline")
}
}
func TestPostgresQuerySourceSupportsDownstreamPollingPattern(t *testing.T) {
withPostgresQuerySourceTestState(t)
db, cleanup := openRowsTestDB(t, "feedkit_sources_pg_rows", []string{"event_id"}, [][]driver.Value{{"evt-1"}})
defer cleanup()
openPostgresQueryDB = func(_ context.Context, _ pgconn.ConnConfig) (postgresQueryDB, error) {
return db, nil
}
type fakeDownstreamSource struct {
pg *PostgresQuerySource
}
poll := func(s fakeDownstreamSource, ctx context.Context) ([]event.Event, error) {
rows, err := s.pg.Query(ctx)
if err != nil {
return nil, err
}
defer rows.Close()
var out []event.Event
for rows.Next() {
var eventID string
if err := rows.Scan(&eventID); err != nil {
return nil, err
}
out = append(out, event.Event{
ID: eventID,
Kind: event.Kind("observation"),
Source: s.pg.Name,
Schema: "raw.test.v1",
EmittedAt: time.Now().UTC(),
Payload: map[string]any{"event_id": eventID},
})
}
if err := rows.Err(); err != nil {
return nil, err
}
return out, nil
}
pg, err := NewPostgresQuerySource("test_driver", config.SourceConfig{
Name: "pg-source",
Driver: "test_driver",
Params: map[string]any{
"uri": "postgres://localhost/db",
"username": "u",
"password": "p",
"query": "SELECT event_id FROM events",
},
})
if err != nil {
t.Fatalf("NewPostgresQuerySource() error = %v", err)
}
events, err := poll(fakeDownstreamSource{pg: pg}, context.Background())
if err != nil {
t.Fatalf("poll() error = %v", err)
}
if len(events) != 1 {
t.Fatalf("len(events) = %d, want 1", len(events))
}
if events[0].ID != "evt-1" {
t.Fatalf("events[0].ID = %q, want evt-1", events[0].ID)
}
}
var (
rowsDriverMu sync.Mutex
rowsDriverSeen = map[string]bool{}
)
func openRowsTestDB(t *testing.T, driverName string, columns []string, rows [][]driver.Value) (*sql.DB, func()) {
t.Helper()
rowsDriverMu.Lock()
if !rowsDriverSeen[driverName] {
sql.Register(driverName, &rowsTestDriver{columns: append([]string(nil), columns...), rows: cloneDriverRows(rows)})
rowsDriverSeen[driverName] = true
}
rowsDriverMu.Unlock()
db, err := sql.Open(driverName, "")
if err != nil {
t.Fatalf("sql.Open() error = %v", err)
}
return db, func() {
_ = db.Close()
}
}
func cloneDriverRows(in [][]driver.Value) [][]driver.Value {
out := make([][]driver.Value, 0, len(in))
for _, row := range in {
copied := append([]driver.Value(nil), row...)
out = append(out, copied)
}
return out
}
type rowsTestDriver struct {
columns []string
rows [][]driver.Value
}
func (d *rowsTestDriver) Open(string) (driver.Conn, error) {
return &rowsTestConn{columns: append([]string(nil), d.columns...), rows: cloneDriverRows(d.rows)}, nil
}
type rowsTestConn struct {
columns []string
rows [][]driver.Value
}
func (c *rowsTestConn) Prepare(string) (driver.Stmt, error) {
return nil, errors.New("not implemented")
}
func (c *rowsTestConn) Close() error { return nil }
func (c *rowsTestConn) Begin() (driver.Tx, error) { return nil, errors.New("not implemented") }
func (c *rowsTestConn) QueryContext(_ context.Context, _ string, _ []driver.NamedValue) (driver.Rows, error) {
return &rowsTestRows{columns: append([]string(nil), c.columns...), rows: cloneDriverRows(c.rows)}, nil
}
type rowsTestRows struct {
columns []string
rows [][]driver.Value
idx int
}
func (r *rowsTestRows) Columns() []string { return append([]string(nil), r.columns...) }
func (r *rowsTestRows) Close() error { return nil }
func (r *rowsTestRows) Next(dest []driver.Value) error {
if r.idx >= len(r.rows) {
return io.EOF
}
copy(dest, r.rows[r.idx])
r.idx++
return nil
}