-
Notifications
You must be signed in to change notification settings - Fork 383
fix(chain): add rpc retry logic #5341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,8 +9,11 @@ import ( | |
| "encoding/hex" | ||
| "errors" | ||
| "fmt" | ||
| "io" | ||
| "math/big" | ||
| "net/http" | ||
| "strings" | ||
| "syscall" | ||
| "time" | ||
|
|
||
| "github.com/ethereum/go-ethereum/common" | ||
|
|
@@ -37,8 +40,92 @@ const ( | |
| maxDelay = 1 * time.Minute | ||
| cancellationDepth = 12 | ||
| additionalConfirmations = 2 | ||
| maxRetries = 3 | ||
| retryBackoff = 100 * time.Millisecond | ||
| ) | ||
|
|
||
| // retryRoundTripper implements http.RoundTripper with retry logic for transient network errors. | ||
| type retryRoundTripper struct { | ||
| transport http.RoundTripper | ||
| maxRetries int | ||
| backoff time.Duration | ||
| } | ||
|
|
||
| // RoundTrip executes a single HTTP transaction, returning a Response for the Request. | ||
| // It retries on transient errors like EOF, connection reset, and connection refused. | ||
| func (r *retryRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { | ||
| var lastErr error | ||
|
|
||
| for attempt := 0; attempt <= r.maxRetries; attempt++ { | ||
| if attempt > 0 { | ||
| backoffDuration := min(r.backoff*time.Duration(1<<uint(attempt-1)), 5*time.Second) | ||
| time.Sleep(backoffDuration) | ||
|
|
||
| // Reset request body for retry if possible | ||
| if req.Body != nil && req.GetBody != nil { | ||
| var err error | ||
| req.Body, err = req.GetBody() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may be problematic if the Body is already read or closed. There is a check below for non-rewritable body, so it should be ok, I suppose. In general, only idempotent requests should be retried, but in case of rpc, identifying idempotent requests are not easy. Getting information is ok, but posting information may present a problem by duplicating them.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it is the simpliest to increase the values for the timeouts on the RPC side? func newHTTPClient() *http.Client {
return &http.Client{
Transport: &http.Transport{
DialContext: (&net.Dialer{
Timeout: 9 * time.Second, // < HAProxy timeout connect (10s)
KeepAlive: 5 * time.Second, // Keep connections alive but close before idle timeout
}).DialContext,
IdleConnTimeout: 8 * time.Second, // < HAProxy timeout client (10s)
TLSHandshakeTimeout: 9 * time.Second, // < HAProxy timeout connect (10s)
ResponseHeaderTimeout: 9 * time.Second, // < HAProxy timeout server (10s)
},
}
}
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, even reties would make the rpc endpoint do more work in this case. Increasing the timeout would be better in this case. |
||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to reset request body for retry: %w", err) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| resp, err := r.transport.RoundTrip(req) | ||
| if err == nil { | ||
| return resp, nil | ||
| } | ||
|
|
||
| lastErr = err | ||
|
|
||
| if !isRetryableError(err) { | ||
| return nil, err | ||
| } | ||
|
|
||
| if req.Body != nil && req.GetBody == nil { | ||
| return nil, fmt.Errorf("cannot retry request with non-rewindable body: %w", err) | ||
| } | ||
| } | ||
|
|
||
| return nil, fmt.Errorf("request failed after %d retries: %w", r.maxRetries, lastErr) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The loop above runs for attempt in 0 to maxRetries, for example with maxRetries = 3, there are 4 attempts, but the error says 3 retries. |
||
| } | ||
|
|
||
| // isRetryableError determines if an error should trigger a retry. | ||
| func isRetryableError(err error) bool { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these checks for error codes compatible with windows ? |
||
| // Check for EOF | ||
| if errors.Is(err, io.EOF) { | ||
| return true | ||
| } | ||
|
|
||
| // Check for connection reset | ||
| if errors.Is(err, syscall.ECONNRESET) { | ||
| return true | ||
| } | ||
|
|
||
| // Check for connection refused | ||
| if errors.Is(err, syscall.ECONNREFUSED) { | ||
| return true | ||
| } | ||
|
|
||
| // Check for broken pipe | ||
| if errors.Is(err, syscall.EPIPE) { | ||
| return true | ||
| } | ||
|
|
||
| return false | ||
| } | ||
|
|
||
| // newHTTPClient creates an HTTP client configured for blockchain RPC communication. | ||
| func newHTTPClient() *http.Client { | ||
| return &http.Client{ | ||
| Transport: &retryRoundTripper{ | ||
| transport: http.DefaultTransport, | ||
| maxRetries: maxRetries, | ||
| backoff: retryBackoff, | ||
| }, | ||
| } | ||
| } | ||
|
|
||
| // InitChain will initialize the Ethereum backend at the given endpoint and | ||
| // set up the Transaction Service to interact with it using the provided signer. | ||
| func InitChain( | ||
|
|
@@ -55,7 +142,7 @@ func InitChain( | |
| backend := backendnoop.New(chainID) | ||
|
|
||
| if chainEnabled { | ||
| rpcClient, err := rpc.DialContext(ctx, endpoint) | ||
| rpcClient, err := rpc.DialOptions(ctx, endpoint, rpc.WithHTTPClient(newHTTPClient())) | ||
| if err != nil { | ||
| return nil, common.Address{}, 0, nil, nil, fmt.Errorf("dial blockchain client: %w", err) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of sleep, use time.After and req.Context in select to return as soon as possible if context is done.