// FILE: ./transport/http.go package transport import ( "context" "fmt" "io" "net/http" "strings" "time" ) // DefaultHTTPResponseBodyLimitBytes is a hard safety limit on HTTP response bodies. // API responses should be small, so this protects us from accidental // or malicious large responses. const DefaultHTTPResponseBodyLimitBytes int64 = 2 << 21 // 4 MiB // DefaultHTTPTimeout is the standard timeout used by HTTP sources. // Individual drivers may override this if they have a specific need. const DefaultHTTPTimeout = 10 * time.Second // NewHTTPClient returns a simple http.Client configured with a timeout. // If timeout <= 0, DefaultHTTPTimeout is used. func NewHTTPClient(timeout time.Duration) *http.Client { if timeout <= 0 { timeout = DefaultHTTPTimeout } return &http.Client{Timeout: timeout} } func FetchBody(ctx context.Context, client *http.Client, url, userAgent, accept string) ([]byte, error) { return FetchBodyWithLimit(ctx, client, url, userAgent, accept, DefaultHTTPResponseBodyLimitBytes) } func FetchBodyWithLimit(ctx context.Context, client *http.Client, url, userAgent, accept string, bodyLimitBytes int64) ([]byte, error) { res, err := doRequest(ctx, client, http.MethodGet, url, userAgent, accept, "", "") if err != nil { return nil, err } defer res.Body.Close() if res.StatusCode < 200 || res.StatusCode >= 300 { return nil, fmt.Errorf("HTTP %s", res.Status) } return readValidatedBody(res.Body, bodyLimitBytes) } // HTTPValidators are cache validators learned from prior successful GET responses. // // ETag is preferred when present. LastModified is used as a fallback validator // when ETag is unavailable. type HTTPValidators struct { ETag string LastModified string } // FetchBodyIfChanged performs an HTTP GET and opportunistically uses conditional // request headers based on the provided validators. // // Behavior: // - if conditional is false, this behaves like a normal GET and leaves validators unchanged // - if validators.ETag is set, sends If-None-Match // - else if validators.LastModified is set, sends If-Modified-Since // - 304 Not Modified is treated as success with changed=false and no body // - 200 responses are treated as changed=true and still enforce the normal body checks // // Returned validators reflect any updates learned from the response headers. func FetchBodyIfChanged( ctx context.Context, client *http.Client, url, userAgent, accept string, conditional bool, validators HTTPValidators, ) ([]byte, bool, HTTPValidators, error) { return FetchBodyIfChangedWithLimit(ctx, client, url, userAgent, accept, conditional, validators, DefaultHTTPResponseBodyLimitBytes) } func FetchBodyIfChangedWithLimit( ctx context.Context, client *http.Client, url, userAgent, accept string, conditional bool, validators HTTPValidators, bodyLimitBytes int64, ) ([]byte, bool, HTTPValidators, error) { headerName, headerValue := conditionalHeader(conditional, validators) res, err := doRequest(ctx, client, http.MethodGet, url, userAgent, accept, headerName, headerValue) if err != nil { return nil, false, validators, err } defer res.Body.Close() switch res.StatusCode { case http.StatusNotModified: if conditional { validators = refreshValidators(validators, res.Header) } return nil, false, validators, nil default: if res.StatusCode < 200 || res.StatusCode >= 300 { return nil, false, validators, fmt.Errorf("HTTP %s", res.Status) } } b, err := readValidatedBody(res.Body, bodyLimitBytes) if err != nil { return nil, false, validators, err } if conditional { validators = replaceValidators(res.Header) } return b, true, validators, nil } func doRequest(ctx context.Context, client *http.Client, method, url, userAgent, accept, headerName, headerValue string) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, method, url, nil) if err != nil { return nil, err } if userAgent != "" { req.Header.Set("User-Agent", userAgent) } if accept != "" { req.Header.Set("Accept", accept) } if headerName != "" && headerValue != "" { req.Header.Set(headerName, headerValue) } return client.Do(req) } func conditionalHeader(enabled bool, validators HTTPValidators) (string, string) { if !enabled { return "", "" } if etag := strings.TrimSpace(validators.ETag); etag != "" { return "If-None-Match", etag } if lastModified := strings.TrimSpace(validators.LastModified); lastModified != "" { return "If-Modified-Since", lastModified } return "", "" } func replaceValidators(header http.Header) HTTPValidators { return HTTPValidators{ ETag: strings.TrimSpace(header.Get("ETag")), LastModified: strings.TrimSpace(header.Get("Last-Modified")), } } func refreshValidators(current HTTPValidators, header http.Header) HTTPValidators { if etag := strings.TrimSpace(header.Get("ETag")); etag != "" { current.ETag = etag } if lastModified := strings.TrimSpace(header.Get("Last-Modified")); lastModified != "" { current.LastModified = lastModified } return current } func readValidatedBody(r io.Reader, bodyLimitBytes int64) ([]byte, error) { if bodyLimitBytes <= 0 { bodyLimitBytes = DefaultHTTPResponseBodyLimitBytes } // Read at most bodyLimitBytes + 1 so we can detect overflow. limited := io.LimitReader(r, bodyLimitBytes+1) b, err := io.ReadAll(limited) if err != nil { return nil, err } if len(b) == 0 { return nil, fmt.Errorf("empty response body") } if int64(len(b)) > bodyLimitBytes { return nil, fmt.Errorf("response body too large (>%d bytes)", bodyLimitBytes) } return b, nil }