Refactor feedkit boundaries ahead of v1

Remove global Postgres schema registration in favor of explicit schema-aware sink factory wiring, and update weatherfeeder to register the Postgres sink explicitly. Add optional per-source HTTP timeout and response body limit overrides while keeping feedkit defaults. Remove remaining legacy source/config compatibility surfaces, including singular kind support and old source registry/type aliases, and migrate weatherfeeder sources to plural `Kinds()` metadata. Clean up related docs, tests, and sample config to match the new Postgres, HTTP, and NATS configuration model.
This commit is contained in:
2026-03-28 13:52:48 -05:00
parent 3281368922
commit eb9a7cb349
22 changed files with 342 additions and 349 deletions

View File

@@ -10,10 +10,10 @@ import (
"time"
)
// maxResponseBodyBytes is a hard safety limit on HTTP response bodies.
// DefaultHTTPResponseBodyLimitBytes is a hard safety limit on HTTP response bodies.
// API responses should be small, so this protects us from accidental
// or malicious large responses.
const maxResponseBodyBytes = 2 << 21 // 4 MiB
const DefaultHTTPResponseBodyLimitBytes int64 = 2 << 21 // 4 MiB
// DefaultHTTPTimeout is the standard timeout used by HTTP sources.
// Individual drivers may override this if they have a specific need.
@@ -29,6 +29,10 @@ func NewHTTPClient(timeout time.Duration) *http.Client {
}
func FetchBody(ctx context.Context, client *http.Client, url, userAgent, accept string) ([]byte, error) {
return FetchBodyWithLimit(ctx, client, url, userAgent, accept, DefaultHTTPResponseBodyLimitBytes)
}
func FetchBodyWithLimit(ctx context.Context, client *http.Client, url, userAgent, accept string, bodyLimitBytes int64) ([]byte, error) {
res, err := doRequest(ctx, client, http.MethodGet, url, userAgent, accept, "", "")
if err != nil {
return nil, err
@@ -39,7 +43,7 @@ func FetchBody(ctx context.Context, client *http.Client, url, userAgent, accept
return nil, fmt.Errorf("HTTP %s", res.Status)
}
return readValidatedBody(res.Body)
return readValidatedBody(res.Body, bodyLimitBytes)
}
// HTTPValidators are cache validators learned from prior successful GET responses.
@@ -68,6 +72,17 @@ func FetchBodyIfChanged(
url, userAgent, accept string,
conditional bool,
validators HTTPValidators,
) ([]byte, bool, HTTPValidators, error) {
return FetchBodyIfChangedWithLimit(ctx, client, url, userAgent, accept, conditional, validators, DefaultHTTPResponseBodyLimitBytes)
}
func FetchBodyIfChangedWithLimit(
ctx context.Context,
client *http.Client,
url, userAgent, accept string,
conditional bool,
validators HTTPValidators,
bodyLimitBytes int64,
) ([]byte, bool, HTTPValidators, error) {
headerName, headerValue := conditionalHeader(conditional, validators)
@@ -89,7 +104,7 @@ func FetchBodyIfChanged(
}
}
b, err := readValidatedBody(res.Body)
b, err := readValidatedBody(res.Body, bodyLimitBytes)
if err != nil {
return nil, false, validators, err
}
@@ -150,9 +165,13 @@ func refreshValidators(current HTTPValidators, header http.Header) HTTPValidator
return current
}
func readValidatedBody(r io.Reader) ([]byte, error) {
// Read at most maxResponseBodyBytes + 1 so we can detect overflow.
limited := io.LimitReader(r, maxResponseBodyBytes+1)
func readValidatedBody(r io.Reader, bodyLimitBytes int64) ([]byte, error) {
if bodyLimitBytes <= 0 {
bodyLimitBytes = DefaultHTTPResponseBodyLimitBytes
}
// Read at most bodyLimitBytes + 1 so we can detect overflow.
limited := io.LimitReader(r, bodyLimitBytes+1)
b, err := io.ReadAll(limited)
if err != nil {
@@ -163,8 +182,8 @@ func readValidatedBody(r io.Reader) ([]byte, error) {
return nil, fmt.Errorf("empty response body")
}
if len(b) > maxResponseBodyBytes {
return nil, fmt.Errorf("response body too large (>%d bytes)", maxResponseBodyBytes)
if int64(len(b)) > bodyLimitBytes {
return nil, fmt.Errorf("response body too large (>%d bytes)", bodyLimitBytes)
}
return b, nil