Moved common HTTP polling helpers into feedkit and implemented support for ETag and Last-Modified

This commit is contained in:
2026-03-28 09:59:58 -05:00
parent 3b92c2284d
commit 4910440756
6 changed files with 610 additions and 11 deletions

147
sources/http.go Normal file
View File

@@ -0,0 +1,147 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"sync"
"gitea.maximumdirect.net/ejr/feedkit/config"
"gitea.maximumdirect.net/ejr/feedkit/transport"
)
// HTTPSource is a reusable helper for polling HTTP-backed sources.
//
// It centralizes generic source config parsing (`params.url`,
// `params.user_agent`, and optional `params.conditional`), default HTTP client
// setup, and conditional GET validator handling. Concrete daemon sources remain
// responsible for decoding the response body and constructing events.
type HTTPSource struct {
Driver string
Name string
URL string
UserAgent string
Accept string
Conditional bool
Client *http.Client
mu sync.Mutex
validators transport.HTTPValidators
}
// NewHTTPSource builds a generic HTTP polling helper from SourceConfig.
//
// Required params:
// - params.url
// - params.user_agent
//
// Optional params:
// - params.conditional (default true): enable conditional GET using cached
// ETag / Last-Modified validators
func NewHTTPSource(driver string, cfg config.SourceConfig, accept string) (*HTTPSource, error) {
name := strings.TrimSpace(cfg.Name)
if name == "" {
return nil, fmt.Errorf("%s: name is required", driver)
}
if cfg.Params == nil {
return nil, fmt.Errorf("%s %q: params are required (need params.url and params.user_agent)", driver, cfg.Name)
}
url, ok := cfg.ParamString("url", "URL")
if !ok {
return nil, fmt.Errorf("%s %q: params.url is required", driver, cfg.Name)
}
userAgent, ok := cfg.ParamString("user_agent", "userAgent")
if !ok {
return nil, fmt.Errorf("%s %q: params.user_agent is required", driver, cfg.Name)
}
conditional, err := parseConditionalParam(cfg)
if err != nil {
return nil, err
}
return &HTTPSource{
Driver: driver,
Name: name,
URL: url,
UserAgent: userAgent,
Accept: accept,
Conditional: conditional,
Client: transport.NewHTTPClient(transport.DefaultHTTPTimeout),
}, nil
}
// FetchBytesIfChanged fetches the configured URL and reports whether the
// upstream content changed. An unchanged 304 response returns changed=false
// with no body and no error.
func (s *HTTPSource) FetchBytesIfChanged(ctx context.Context) ([]byte, bool, error) {
client := s.Client
if client == nil {
client = transport.NewHTTPClient(transport.DefaultHTTPTimeout)
}
s.mu.Lock()
validators := s.validators
s.mu.Unlock()
body, changed, next, err := transport.FetchBodyIfChanged(
ctx,
client,
s.URL,
s.UserAgent,
s.Accept,
s.Conditional,
validators,
)
if err != nil {
return nil, false, fmt.Errorf("%s %q: %w", s.Driver, s.Name, err)
}
if s.Conditional {
s.mu.Lock()
s.validators = next
s.mu.Unlock()
}
return body, changed, nil
}
// FetchJSONIfChanged fetches the configured URL and returns the raw response
// body as json.RawMessage when content changed. An unchanged 304 response
// returns changed=false with a nil body and no error.
func (s *HTTPSource) FetchJSONIfChanged(ctx context.Context) (json.RawMessage, bool, error) {
body, changed, err := s.FetchBytesIfChanged(ctx)
if err != nil || !changed {
return nil, changed, err
}
return json.RawMessage(body), true, nil
}
func parseConditionalParam(cfg config.SourceConfig) (bool, error) {
raw, ok := cfg.Params["conditional"]
if !ok || raw == nil {
return true, nil
}
switch v := raw.(type) {
case bool:
return v, nil
case string:
s := strings.TrimSpace(v)
if s == "" {
return false, fmt.Errorf("source %q: params.conditional must be a boolean", cfg.Name)
}
parsed, err := strconv.ParseBool(s)
if err != nil {
return false, fmt.Errorf("source %q: params.conditional must be a boolean", cfg.Name)
}
return parsed, nil
default:
return false, fmt.Errorf("source %q: params.conditional must be a boolean", cfg.Name)
}
}