Skip to content

Commit a9748a9

Browse files
committed
rfq: add client-side keepalive to price oracle connections
In this commit, we add comprehensive client-side gRPC keepalive parameters to the price oracle client connections. This addresses the root cause of issue #1814 where connections to price oracle servers were being silently closed after idle periods, resulting in "connection reset by peer" errors during RFQ operations. The key change is adding PermitWithoutStream set to true, which allows the client to send keepalive pings even when there are no active RPC calls. This is essential for long-lived connections that may experience extended idle periods between price queries. Without this setting, idle connections would be closed by intermediaries or the server itself, leaving the client unaware of the broken connection until the next RPC attempt. We configure the client to ping the server every 30 seconds of inactivity and wait 20 seconds for a response. These values are conservative enough to detect connection issues quickly while avoiding excessive network traffic. The same keepalive parameters are applied to both TLS and insecure (testing-only) connection modes to ensure consistent behavior. Fixes #1814
1 parent f387c63 commit a9748a9

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

rfq/oracle.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"google.golang.org/grpc"
1919
"google.golang.org/grpc/credentials"
2020
"google.golang.org/grpc/credentials/insecure"
21+
"google.golang.org/grpc/keepalive"
2122
)
2223

2324
// PriceQueryIntent is an enum that represents the intent of a price rate
@@ -186,16 +187,38 @@ type RpcPriceOracle struct {
186187
rawConn *grpc.ClientConn
187188
}
188189

190+
// clientKeepaliveDialOption configures bidirectional health probing to prevent
191+
// idle RFQ connections from being silently terminated by network intermediaries
192+
// (NATs, load balancers) or aggressive server timeouts. Without active
193+
// keepalive, the first price query after an idle period would fail with
194+
// "connection reset by peer" and require a retry.
195+
var clientKeepaliveDialOption = grpc.WithKeepaliveParams(
196+
keepalive.ClientParameters{
197+
// Ping server after 30 seconds of inactivity.
198+
Time: 30 * time.Second,
199+
200+
// Wait 20 seconds for ping response.
201+
Timeout: 20 * time.Second,
202+
203+
// Permit keepalive pings even when there are no active
204+
// streams. This is critical for long-lived connections with
205+
// infrequent RFQ requests.
206+
PermitWithoutStream: true,
207+
},
208+
)
209+
189210
// serverDialOpts returns the set of server options needed to connect to the
190211
// price oracle RPC server using a TLS connection.
191212
func serverDialOpts() ([]grpc.DialOption, error) {
192213
var opts []grpc.DialOption
193214

194-
// Skip TLS certificate verification.
195215
tlsConfig := tls.Config{InsecureSkipVerify: true}
196216
transportCredentials := credentials.NewTLS(&tlsConfig)
217+
197218
opts = append(opts, grpc.WithTransportCredentials(transportCredentials))
198219

220+
opts = append(opts, clientKeepaliveDialOption)
221+
199222
return opts, nil
200223
}
201224

@@ -209,6 +232,8 @@ func insecureServerDialOpts() ([]grpc.DialOption, error) {
209232
insecure.NewCredentials(),
210233
))
211234

235+
opts = append(opts, clientKeepaliveDialOption)
236+
212237
return opts, nil
213238
}
214239

0 commit comments

Comments
 (0)