Upgraded feedkit's handling of stream sources
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.maximumdirect.net/ejr/feedkit/event"
|
||||
@@ -28,8 +29,10 @@ type Logger = logging.Logf
|
||||
// - For stream sources: Jitter is applied once at startup only (optional; useful to avoid
|
||||
// reconnect storms when many instances start together).
|
||||
type Job struct {
|
||||
Source sources.Input
|
||||
Every time.Duration
|
||||
Source sources.Input
|
||||
Every time.Duration
|
||||
StreamExitPolicy StreamExitPolicy
|
||||
StreamBackoff StreamBackoff
|
||||
|
||||
// Jitter is the maximum additional delay added before each poll.
|
||||
// Example: if Every=15m and Jitter=30s, each poll will occur at:
|
||||
@@ -41,12 +44,37 @@ type Job struct {
|
||||
Jitter time.Duration
|
||||
}
|
||||
|
||||
// StreamExitPolicy controls how the scheduler handles non-fatal stream exits.
|
||||
type StreamExitPolicy string
|
||||
|
||||
const (
|
||||
StreamExitPolicyRestart StreamExitPolicy = "restart"
|
||||
StreamExitPolicyStop StreamExitPolicy = "stop"
|
||||
StreamExitPolicyFatal StreamExitPolicy = "fatal"
|
||||
)
|
||||
|
||||
// StreamBackoff controls restart pacing for stream supervision.
|
||||
type StreamBackoff struct {
|
||||
Initial time.Duration
|
||||
Max time.Duration
|
||||
Jitter time.Duration
|
||||
}
|
||||
|
||||
type Scheduler struct {
|
||||
Jobs []Job
|
||||
Out chan<- event.Event
|
||||
Logf Logger
|
||||
}
|
||||
|
||||
const (
|
||||
defaultStreamBackoffInitial = 1 * time.Second
|
||||
defaultStreamBackoffMax = 1 * time.Minute
|
||||
defaultStreamBackoffJitter = 250 * time.Millisecond
|
||||
streamBackoffResetAfter = 5 * time.Minute
|
||||
)
|
||||
|
||||
var timeNow = time.Now
|
||||
|
||||
// Run starts one goroutine per job.
|
||||
// Poll jobs run on their own interval and emit 0..N events per poll.
|
||||
// Stream jobs run continuously and emit events as they arrive.
|
||||
@@ -58,16 +86,38 @@ func (s *Scheduler) Run(ctx context.Context) error {
|
||||
return fmt.Errorf("scheduler.Run: no jobs configured")
|
||||
}
|
||||
|
||||
runCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
fatalErrCh := make(chan error, 1)
|
||||
var wg sync.WaitGroup
|
||||
for _, job := range s.Jobs {
|
||||
job := job // capture loop variable
|
||||
go s.runJob(ctx, job)
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
s.runJob(runCtx, job, fatalErrCh)
|
||||
}()
|
||||
}
|
||||
|
||||
<-ctx.Done()
|
||||
return ctx.Err()
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-fatalErrCh:
|
||||
cancel()
|
||||
<-done
|
||||
return err
|
||||
case <-runCtx.Done():
|
||||
<-done
|
||||
return runCtx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) runJob(ctx context.Context, job Job) {
|
||||
func (s *Scheduler) runJob(ctx context.Context, job Job, fatalErrCh chan<- error) {
|
||||
if job.Source == nil {
|
||||
s.logf("scheduler: job has nil source")
|
||||
return
|
||||
@@ -75,7 +125,7 @@ func (s *Scheduler) runJob(ctx context.Context, job Job) {
|
||||
|
||||
// Stream sources: event-driven.
|
||||
if ss, ok := job.Source.(sources.StreamSource); ok {
|
||||
s.runStream(ctx, job, ss)
|
||||
s.runStream(ctx, job, ss, fatalErrCh)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -93,18 +143,51 @@ func (s *Scheduler) runJob(ctx context.Context, job Job) {
|
||||
s.runPoller(ctx, job, ps)
|
||||
}
|
||||
|
||||
func (s *Scheduler) runStream(ctx context.Context, job Job, src sources.StreamSource) {
|
||||
func (s *Scheduler) runStream(ctx context.Context, job Job, src sources.StreamSource, fatalErrCh chan<- error) {
|
||||
policy := effectiveStreamExitPolicy(job.StreamExitPolicy)
|
||||
backoff := effectiveStreamBackoff(job.StreamBackoff)
|
||||
rng := seededRNG(src.Name())
|
||||
|
||||
// Optional startup jitter: helps avoid reconnect storms if many daemons start at once.
|
||||
if job.Jitter > 0 {
|
||||
rng := seededRNG(src.Name())
|
||||
if !sleepJitter(ctx, rng, job.Jitter) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Stream sources should block until ctx cancel or fatal error.
|
||||
if err := src.Run(ctx, s.Out); err != nil && ctx.Err() == nil {
|
||||
s.logf("scheduler: stream source %q exited with error: %v", src.Name(), err)
|
||||
nextDelay := backoff.Initial
|
||||
for {
|
||||
startedAt := timeNow()
|
||||
err := src.Run(ctx, s.Out)
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
normalizedErr := normalizeStreamExitError(src.Name(), err)
|
||||
if sources.IsStreamFatal(normalizedErr) {
|
||||
s.reportFatal(fatalErrCh, fmt.Errorf("scheduler: stream source %q exited fatally: %w", src.Name(), normalizedErr))
|
||||
return
|
||||
}
|
||||
|
||||
switch policy {
|
||||
case StreamExitPolicyStop:
|
||||
s.logf("scheduler: stream source %q stopped after exit: %v", src.Name(), normalizedErr)
|
||||
return
|
||||
case StreamExitPolicyFatal:
|
||||
s.reportFatal(fatalErrCh, fmt.Errorf("scheduler: stream source %q exited under fatal policy: %w", src.Name(), normalizedErr))
|
||||
return
|
||||
}
|
||||
|
||||
if streamRunWasStable(startedAt, timeNow()) {
|
||||
nextDelay = backoff.Initial
|
||||
}
|
||||
|
||||
delay := nextDelay + randomDuration(rng, backoff.Jitter)
|
||||
s.logf("scheduler: stream source %q exited; restarting in %s: %v", src.Name(), delay, normalizedErr)
|
||||
if !sleepDuration(ctx, delay) {
|
||||
return
|
||||
}
|
||||
nextDelay = nextStreamBackoff(nextDelay, backoff.Max)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,10 +247,77 @@ func (s *Scheduler) logf(format string, args ...any) {
|
||||
s.Logf(format, args...)
|
||||
}
|
||||
|
||||
func (s *Scheduler) reportFatal(ch chan<- error, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case ch <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// ---- helpers ----
|
||||
|
||||
func effectiveStreamExitPolicy(policy StreamExitPolicy) StreamExitPolicy {
|
||||
switch policy {
|
||||
case StreamExitPolicyStop, StreamExitPolicyFatal:
|
||||
return policy
|
||||
default:
|
||||
return StreamExitPolicyRestart
|
||||
}
|
||||
}
|
||||
|
||||
func effectiveStreamBackoff(cfg StreamBackoff) StreamBackoff {
|
||||
out := cfg
|
||||
if out.Initial <= 0 {
|
||||
out.Initial = defaultStreamBackoffInitial
|
||||
}
|
||||
if out.Max <= 0 {
|
||||
out.Max = defaultStreamBackoffMax
|
||||
}
|
||||
if out.Max < out.Initial {
|
||||
out.Max = out.Initial
|
||||
}
|
||||
if out.Jitter < 0 {
|
||||
out.Jitter = 0
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeStreamExitError(sourceName string, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return sources.StreamRetryable(fmt.Errorf("stream source %q exited unexpectedly without error", sourceName))
|
||||
}
|
||||
|
||||
func nextStreamBackoff(current, max time.Duration) time.Duration {
|
||||
if current <= 0 {
|
||||
current = defaultStreamBackoffInitial
|
||||
}
|
||||
if max <= 0 {
|
||||
max = defaultStreamBackoffMax
|
||||
}
|
||||
if current >= max {
|
||||
return max
|
||||
}
|
||||
next := current * 2
|
||||
if next < current || next > max {
|
||||
return max
|
||||
}
|
||||
return next
|
||||
}
|
||||
|
||||
func streamRunWasStable(startedAt, endedAt time.Time) bool {
|
||||
if startedAt.IsZero() || endedAt.IsZero() {
|
||||
return false
|
||||
}
|
||||
return endedAt.Sub(startedAt) >= streamBackoffResetAfter
|
||||
}
|
||||
|
||||
func seededRNG(name string) *rand.Rand {
|
||||
seed := time.Now().UnixNano() ^ int64(hashStringFNV32a(name))
|
||||
seed := timeNow().UnixNano() ^ int64(hashStringFNV32a(name))
|
||||
return rand.New(rand.NewSource(seed))
|
||||
}
|
||||
|
||||
@@ -206,11 +356,23 @@ func sleepJitter(ctx context.Context, rng *rand.Rand, max time.Duration) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
return sleepDuration(ctx, randomDuration(rng, max))
|
||||
}
|
||||
|
||||
func randomDuration(rng *rand.Rand, max time.Duration) time.Duration {
|
||||
if max <= 0 {
|
||||
return 0
|
||||
}
|
||||
// Int63n requires a positive argument.
|
||||
// We add 1 so max itself is attainable.
|
||||
n := rng.Int63n(int64(max) + 1)
|
||||
d := time.Duration(n)
|
||||
return time.Duration(n)
|
||||
}
|
||||
|
||||
func sleepDuration(ctx context.Context, d time.Duration) bool {
|
||||
if d <= 0 {
|
||||
return true
|
||||
}
|
||||
timer := time.NewTimer(d)
|
||||
defer timer.Stop()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user