Files
feedkit/transport/http.go

172 lines
4.8 KiB
Go

// FILE: ./transport/http.go
package transport
import (
"context"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// maxResponseBodyBytes is a hard safety limit on HTTP response bodies.
// API responses should be small, so this protects us from accidental
// or malicious large responses.
const maxResponseBodyBytes = 2 << 21 // 4 MiB
// DefaultHTTPTimeout is the standard timeout used by HTTP sources.
// Individual drivers may override this if they have a specific need.
const DefaultHTTPTimeout = 10 * time.Second
// NewHTTPClient returns a simple http.Client configured with a timeout.
// If timeout <= 0, DefaultHTTPTimeout is used.
func NewHTTPClient(timeout time.Duration) *http.Client {
if timeout <= 0 {
timeout = DefaultHTTPTimeout
}
return &http.Client{Timeout: timeout}
}
func FetchBody(ctx context.Context, client *http.Client, url, userAgent, accept string) ([]byte, error) {
res, err := doRequest(ctx, client, http.MethodGet, url, userAgent, accept, "", "")
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode < 200 || res.StatusCode >= 300 {
return nil, fmt.Errorf("HTTP %s", res.Status)
}
return readValidatedBody(res.Body)
}
// HTTPValidators are cache validators learned from prior successful GET responses.
//
// ETag is preferred when present. LastModified is used as a fallback validator
// when ETag is unavailable.
type HTTPValidators struct {
ETag string
LastModified string
}
// FetchBodyIfChanged performs an HTTP GET and opportunistically uses conditional
// request headers based on the provided validators.
//
// Behavior:
// - if conditional is false, this behaves like a normal GET and leaves validators unchanged
// - if validators.ETag is set, sends If-None-Match
// - else if validators.LastModified is set, sends If-Modified-Since
// - 304 Not Modified is treated as success with changed=false and no body
// - 200 responses are treated as changed=true and still enforce the normal body checks
//
// Returned validators reflect any updates learned from the response headers.
func FetchBodyIfChanged(
ctx context.Context,
client *http.Client,
url, userAgent, accept string,
conditional bool,
validators HTTPValidators,
) ([]byte, bool, HTTPValidators, error) {
headerName, headerValue := conditionalHeader(conditional, validators)
res, err := doRequest(ctx, client, http.MethodGet, url, userAgent, accept, headerName, headerValue)
if err != nil {
return nil, false, validators, err
}
defer res.Body.Close()
switch res.StatusCode {
case http.StatusNotModified:
if conditional {
validators = refreshValidators(validators, res.Header)
}
return nil, false, validators, nil
default:
if res.StatusCode < 200 || res.StatusCode >= 300 {
return nil, false, validators, fmt.Errorf("HTTP %s", res.Status)
}
}
b, err := readValidatedBody(res.Body)
if err != nil {
return nil, false, validators, err
}
if conditional {
validators = replaceValidators(res.Header)
}
return b, true, validators, nil
}
func doRequest(ctx context.Context, client *http.Client, method, url, userAgent, accept, headerName, headerValue string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, method, url, nil)
if err != nil {
return nil, err
}
if userAgent != "" {
req.Header.Set("User-Agent", userAgent)
}
if accept != "" {
req.Header.Set("Accept", accept)
}
if headerName != "" && headerValue != "" {
req.Header.Set(headerName, headerValue)
}
return client.Do(req)
}
func conditionalHeader(enabled bool, validators HTTPValidators) (string, string) {
if !enabled {
return "", ""
}
if etag := strings.TrimSpace(validators.ETag); etag != "" {
return "If-None-Match", etag
}
if lastModified := strings.TrimSpace(validators.LastModified); lastModified != "" {
return "If-Modified-Since", lastModified
}
return "", ""
}
func replaceValidators(header http.Header) HTTPValidators {
return HTTPValidators{
ETag: strings.TrimSpace(header.Get("ETag")),
LastModified: strings.TrimSpace(header.Get("Last-Modified")),
}
}
func refreshValidators(current HTTPValidators, header http.Header) HTTPValidators {
if etag := strings.TrimSpace(header.Get("ETag")); etag != "" {
current.ETag = etag
}
if lastModified := strings.TrimSpace(header.Get("Last-Modified")); lastModified != "" {
current.LastModified = lastModified
}
return current
}
func readValidatedBody(r io.Reader) ([]byte, error) {
// Read at most maxResponseBodyBytes + 1 so we can detect overflow.
limited := io.LimitReader(r, maxResponseBodyBytes+1)
b, err := io.ReadAll(limited)
if err != nil {
return nil, err
}
if len(b) == 0 {
return nil, fmt.Errorf("empty response body")
}
if len(b) > maxResponseBodyBytes {
return nil, fmt.Errorf("response body too large (>%d bytes)", maxResponseBodyBytes)
}
return b, nil
}