v0.x: add reusable HTTP source spine; fix routing; upstream HTTP transport helper
- fix dispatch route compilation so empty Kinds matches all (nil), not none - introduce internal/sources/common/HTTPSource to centralize HTTP polling boilerplate: - standard cfg parsing (url + user_agent) - default HTTP client + Accept/User-Agent headers - consistent error wrapping - refactor observation sources (nws/openmeteo/openweather) to use HTTPSource - upstream generic HTTP fetch/limits/timeout helper from weatherfeeder to feedkit: - move internal/sources/common/http.go -> feedkit/transport/http.go - keep behavior: status checks, max-body limit, default timeout
This commit is contained in:
@@ -1,70 +1,76 @@
|
||||
// FILE: ./internal/sources/common/http.go
|
||||
// FILE: ./internal/sources/common/http_source.go
|
||||
package common
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/config"
|
||||
"gitea.maximumdirect.net/ejr/feedkit/transport"
|
||||
)
|
||||
|
||||
// maxResponseBodyBytes is a hard safety limit on HTTP response bodies.
|
||||
// API responses should be small, so this protects us from accidental
|
||||
// or malicious large responses.
|
||||
const maxResponseBodyBytes = 2 << 21 // 4 MiB
|
||||
|
||||
// DefaultHTTPTimeout is the standard timeout used by weatherfeeder HTTP sources.
|
||||
// Individual drivers may override this if they have a specific need.
|
||||
const DefaultHTTPTimeout = 10 * time.Second
|
||||
|
||||
// NewHTTPClient returns a simple http.Client configured with a timeout.
|
||||
// If timeout <= 0, DefaultHTTPTimeout is used.
|
||||
func NewHTTPClient(timeout time.Duration) *http.Client {
|
||||
if timeout <= 0 {
|
||||
timeout = DefaultHTTPTimeout
|
||||
}
|
||||
return &http.Client{Timeout: timeout}
|
||||
// HTTPSource is a tiny, reusable "HTTP polling spine" for weatherfeeder sources.
|
||||
//
|
||||
// It centralizes the boring parts:
|
||||
// - standard config shape (url + user_agent) via RequireHTTPSourceConfig
|
||||
// - a default http.Client with timeout
|
||||
// - FetchBody / headers / max-body safety limit
|
||||
// - consistent error wrapping (driver + source name)
|
||||
//
|
||||
// Individual drivers remain responsible for:
|
||||
// - decoding minimal metadata (for Event.ID / EffectiveAt)
|
||||
// - constructing the event envelope (kind/schema/payload)
|
||||
type HTTPSource struct {
|
||||
Driver string
|
||||
Name string
|
||||
URL string
|
||||
UserAgent string
|
||||
Accept string
|
||||
Client *http.Client
|
||||
}
|
||||
|
||||
func FetchBody(ctx context.Context, client *http.Client, url, userAgent, accept string) ([]byte, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
// NewHTTPSource builds an HTTPSource using weatherfeeder's standard HTTP source
|
||||
// config (params.url + params.user_agent) and a default HTTP client.
|
||||
func NewHTTPSource(driver string, cfg config.SourceConfig, accept string) (*HTTPSource, error) {
|
||||
c, err := RequireHTTPSourceConfig(driver, cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if userAgent != "" {
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
}
|
||||
if accept != "" {
|
||||
req.Header.Set("Accept", accept)
|
||||
return &HTTPSource{
|
||||
Driver: driver,
|
||||
Name: c.Name,
|
||||
URL: c.URL,
|
||||
UserAgent: c.UserAgent,
|
||||
Accept: accept,
|
||||
Client: transport.NewHTTPClient(transport.DefaultHTTPTimeout),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// FetchBytes fetches the URL and returns the raw response body bytes.
|
||||
func (s *HTTPSource) FetchBytes(ctx context.Context) ([]byte, error) {
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
// Defensive: allow tests or callers to nil out Client; keep behavior sane.
|
||||
client = transport.NewHTTPClient(transport.DefaultHTTPTimeout)
|
||||
}
|
||||
|
||||
res, err := client.Do(req)
|
||||
b, err := transport.FetchBody(ctx, client, s.URL, s.UserAgent, s.Accept)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("%s %q: %w", s.Driver, s.Name, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode < 200 || res.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("HTTP %s", res.Status)
|
||||
}
|
||||
|
||||
// Read at most maxResponseBodyBytes + 1 so we can detect overflow.
|
||||
limited := io.LimitReader(res.Body, maxResponseBodyBytes+1)
|
||||
|
||||
b, err := io.ReadAll(limited)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(b) == 0 {
|
||||
return nil, fmt.Errorf("empty response body")
|
||||
}
|
||||
|
||||
if len(b) > maxResponseBodyBytes {
|
||||
return nil, fmt.Errorf("response body too large (>%d bytes)", maxResponseBodyBytes)
|
||||
}
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// FetchJSON fetches the URL and returns the raw body as json.RawMessage.
|
||||
// json.Unmarshal accepts json.RawMessage directly, so callers can decode minimal
|
||||
// metadata without keeping both []byte and RawMessage in their own structs.
|
||||
func (s *HTTPSource) FetchJSON(ctx context.Context) (json.RawMessage, error) {
|
||||
b, err := s.FetchBytes(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return json.RawMessage(b), nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user