-
Notifications
You must be signed in to change notification settings - Fork 3
feat(redis): survive transient Redis outages with bounded reconnects #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,9 @@ | |
| class Redis implements Publisher, Consumer | ||
| { | ||
| private const int POP_TIMEOUT = 2; | ||
| private const int RECONNECT_BASE_BACKOFF_MS = 100; | ||
| private const int RECONNECT_MAX_BACKOFF_MS = 5_000; | ||
| private const int RECONNECT_BACKOFF_CAP_SHIFT = 10; | ||
|
|
||
| private bool $closed = false; | ||
|
|
||
|
|
@@ -20,18 +23,34 @@ public function __construct(private readonly Connection $connection) | |
|
|
||
| public function consume(Queue $queue, callable $messageCallback, callable $successCallback, callable $errorCallback): void | ||
| { | ||
| $reconnectAttempts = 0; | ||
|
|
||
| while (!$this->closed) { | ||
| /** | ||
| * Waiting for next Job. | ||
| */ | ||
| try { | ||
| $nextMessage = $this->connection->rightPopArray("{$queue->namespace}.queue.{$queue->name}", self::POP_TIMEOUT); | ||
| $reconnectAttempts = 0; | ||
| } catch (\RedisException $e) { | ||
| if ($this->closed) { | ||
| break; | ||
| } | ||
|
|
||
| throw $e; | ||
| // Drop the stale connection so the next pop opens a fresh one, then | ||
| // back off with full jitter before retrying. Keeps the worker alive | ||
| // across transient Redis outages instead of crash-looping. | ||
| $this->connection->close(); | ||
|
|
||
| $reconnectAttempts++; | ||
| $shift = \min(self::RECONNECT_BACKOFF_CAP_SHIFT, $reconnectAttempts - 1); | ||
| $backoffMs = \min( | ||
| self::RECONNECT_MAX_BACKOFF_MS, | ||
| self::RECONNECT_BASE_BACKOFF_MS * (2 ** $shift), | ||
| ); | ||
|
Comment on lines
+46
to
+50
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The constant is described as guarding against integer overflow, but |
||
| \usleep(\mt_rand(0, $backoffMs) * 1000); | ||
|
|
||
| continue; | ||
| } | ||
|
|
||
| if (!$nextMessage) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,10 @@ | |
|
|
||
| class Redis implements Connection | ||
| { | ||
| protected const int CONNECT_MAX_ATTEMPTS = 5; | ||
| protected const int CONNECT_BASE_BACKOFF_MS = 100; | ||
| protected const int CONNECT_MAX_BACKOFF_MS = 3_000; | ||
|
|
||
| protected string $host; | ||
| protected int $port; | ||
| protected ?string $user; | ||
|
|
@@ -178,25 +182,49 @@ public function ping(): bool | |
|
|
||
| public function close(): void | ||
| { | ||
| $this->redis?->close(); | ||
| $this->redis = null; | ||
| try { | ||
| $this->redis?->close(); | ||
| } catch (\Throwable) { | ||
| // best-effort: underlying socket may already be dead | ||
| } finally { | ||
| $this->redis = null; | ||
| } | ||
| } | ||
|
|
||
| protected function getRedis(): \Redis | ||
| { | ||
| if ($this->redis) { | ||
| if ($this->redis instanceof \Redis) { | ||
| return $this->redis; | ||
| } | ||
|
|
||
| $this->redis = new \Redis(); | ||
|
|
||
| $connectTimeout = $this->connectTimeout < 0 ? 0 : $this->connectTimeout; | ||
| $this->redis->connect($this->host, $this->port, $connectTimeout); | ||
|
|
||
| if ($this->readTimeout >= 0) { | ||
| $this->redis->setOption(\Redis::OPT_READ_TIMEOUT, $this->readTimeout); | ||
| for ($attempt = 1; $attempt <= self::CONNECT_MAX_ATTEMPTS; $attempt++) { | ||
| $redis = new \Redis(); | ||
|
|
||
| try { | ||
| $redis->connect($this->host, $this->port, $connectTimeout); | ||
|
|
||
| if ($this->readTimeout >= 0) { | ||
| $redis->setOption(\Redis::OPT_READ_TIMEOUT, $this->readTimeout); | ||
| } | ||
|
|
||
| $this->redis = $redis; | ||
| return $this->redis; | ||
|
Comment on lines
+205
to
+213
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } catch (\RedisException $e) { | ||
| if ($attempt === self::CONNECT_MAX_ATTEMPTS) { | ||
| throw $e; | ||
| } | ||
|
|
||
| // Exponential backoff with full jitter to avoid thundering herd on recovery. | ||
| $backoffMs = \min( | ||
| self::CONNECT_MAX_BACKOFF_MS, | ||
| self::CONNECT_BASE_BACKOFF_MS * (2 ** ($attempt - 1)), | ||
| ); | ||
| \usleep(\mt_rand(0, $backoffMs) * 1000); | ||
| } | ||
| } | ||
|
|
||
| return $this->redis; | ||
| throw new \RedisException('Unreachable: connect loop exited without success or exception.'); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RedisClusteroutages not caughtThe broker catches
\RedisException, but when the connection layer isConnection/RedisCluster, phpredis throws\RedisClusterException(a sibling of\RedisExceptionunderRuntimeException, not a subclass). Any cluster outage duringbrPopwill propagate uncaught and crash the worker just as before this PR — the new reconnect logic silently doesn't apply to the cluster path.To cover both adapters, widen the catch: