net.snowflake.client.jdbc.RestRequest Maven / Gradle / Ivy
/*
* Copyright (c) 2012-2019 Snowflake Computing Inc. All rights reserved.
*/
package net.snowflake.client.jdbc;
import net.snowflake.client.core.Event;
import net.snowflake.client.core.EventUtil;
import net.snowflake.client.jdbc.telemetryOOB.TelemetryService;
import net.snowflake.client.log.SFLogger;
import net.snowflake.client.log.SFLoggerFactory;
import net.snowflake.client.core.HttpUtil;
import net.snowflake.client.util.DecorrelatedJitterBackoff;
import net.snowflake.common.core.SqlState;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* This is an abstraction on top of http client.
*
* Currently it only has one method for retrying http request execution so that
* the same logic doesn't have to be replicated at difference places where retry
* is needed.
*
* @author jhuang
*/
public class RestRequest
{
static final SFLogger logger = SFLoggerFactory.getLogger(RestRequest.class);
// Request guid per HTTP request
private static final String SF_REQUEST_GUID = "request_guid";
// min backoff in milli before we retry due to transient issues
static private long minBackoffInMilli = 1000;
// max backoff in milli before we retry due to transient issues
// we double the backoff after each retry till we reach the max backoff
static private long maxBackoffInMilli = 16000;
// retry at least once even if timeout limit has been reached
static private int MIN_RETRY_COUNT = 1;
/**
* Execute an http request with retry logic.
*
* @param httpClient client object used to communicate with other machine
* @param httpRequest request object contains all the request information
* @param retryTimeout : retry timeout (in seconds)
* @param injectSocketTimeout : simulate socket timeout
* @param canceling canceling flag
* @param withoutCookies whether the cookie spec should be set to IGNORE
* or not
* @param includeRetryParameters whether to include retry parameters in retried
* requests
* @param includeRequestGuid whether to include request_guid parameter
* @return HttpResponse Object get from server
* @throws net.snowflake.client.jdbc.SnowflakeSQLException Request timeout Exception or Illegal State Exception i.e.
* connection is already shutdown etc
*/
static public CloseableHttpResponse execute(
CloseableHttpClient httpClient,
HttpRequestBase httpRequest,
long retryTimeout,
int injectSocketTimeout,
AtomicBoolean canceling,
boolean withoutCookies,
boolean includeRetryParameters,
boolean includeRequestGuid) throws SnowflakeSQLException
{
CloseableHttpResponse response = null;
// time the client started attempting to submit request
final long startTime = System.currentTimeMillis();
// start time for each request,
// used for keeping track how much time we have spent
// due to network issues so that we can compare against the user
// specified network timeout to make sure we do not retry infinitely
// when there are transient network/GS issues.
long startTimePerRequest = startTime;
// total elapsed time due to transient issues.
long elapsedMilliForTransientIssues = 0;
// retry timeout (ms)
long retryTimeoutInMilliseconds = retryTimeout * 1000;
// amount of time to wait for backing off before retry
long backoffInMilli = minBackoffInMilli;
DecorrelatedJitterBackoff backoff = new DecorrelatedJitterBackoff(
backoffInMilli, maxBackoffInMilli);
int retryCount = 0;
int origSocketTimeout = 0;
Exception savedEx = null;
// label the reason to break retry
String breakRetryReason = "";
// try request till we get a good response or retry timeout
while (true)
{
logger.debug("Retry count: {}", retryCount);
try
{
// update start time
startTimePerRequest = System.currentTimeMillis();
if (withoutCookies)
{
httpRequest.setConfig(HttpUtil.getRequestConfigWithoutcookies());
}
// for first call, simulate a socket timeout by setting socket timeout
// to the injected socket timeout value
if (injectSocketTimeout != 0 && retryCount == 0)
{
logger.debug("Injecting socket timeout by setting " +
"socket timeout to {} millisecond ", injectSocketTimeout);
httpRequest.setConfig(
HttpUtil.getDefaultRequestConfigWithSocketTimeout(
injectSocketTimeout, withoutCookies));
}
/*
* Add retryCount if the first request failed
* GS can uses the parameter for optimization. Specifically GS
* will only check metadata database to see if a query has been running
* for a retry request. This way for the majority of query requests
* which are not part of retry we don't have to pay the performance
* overhead of looking up in metadata database.
*/
URIBuilder builder = new URIBuilder(httpRequest.getURI());
if (retryCount > 0)
{
builder.setParameter(
"retryCount", String.valueOf(retryCount));
if (includeRetryParameters)
{
builder.setParameter(
"clientStartTime", String.valueOf(startTime));
}
}
if (includeRequestGuid)
{
// Add request_guid for better tracing
builder.setParameter(SF_REQUEST_GUID, UUID.randomUUID().toString());
}
httpRequest.setURI(builder.build());
response = httpClient.execute(httpRequest);
}
catch (Exception ex)
{
// if exception is caused by illegal state, e.g shutdown of http client
// because of closing of connection, stop retrying
if (ex instanceof IllegalStateException)
{
throw new SnowflakeSQLException(ex,
ErrorCode.INVALID_STATE.getSqlState(),
ErrorCode.INVALID_STATE.getMessageCode(),
ex.getMessage());
}
savedEx = ex;
// if the request took more than 5 min (socket timeout) log an error
if ((System.currentTimeMillis() - startTimePerRequest) > 300000)
{
logger.error("HTTP request took longer than 5 min: {} sec",
(System.currentTimeMillis() - startTimePerRequest) / 1000);
}
StringWriter sw = new StringWriter();
savedEx.printStackTrace(new PrintWriter(sw));
logger.debug("Exception encountered for: {}, {}, {}",
httpRequest.toString(), ex.getLocalizedMessage(), sw.toString());
}
finally
{
// Reset the socket timeout to its original value if it is not the
// very first iteration.
if ((injectSocketTimeout != 0) && retryCount == 0)
{
httpRequest.setConfig(
HttpUtil.getDefaultRequestConfigWithSocketTimeout(
origSocketTimeout, withoutCookies));
}
}
/*
* If we got a response and the status code is not one of those
* transient failures, no more retry
*
* SNOW-16385: retry for any 5xx errors
*/
if (response != null &&
(response.getStatusLine().getStatusCode() < 500 || // service unavailable
response.getStatusLine().getStatusCode() >= 600) && // gateway timeout
response.getStatusLine().getStatusCode() != 408 && // request timeout
response.getStatusLine().getStatusCode() != 403) // intermittent AWS access issue
{
logger.debug("HTTP response code: {}",
response.getStatusLine().getStatusCode());
if (response.getStatusLine().getStatusCode() != 200)
{
logger.debug("Error response not retriable, " +
"HTTP Response Code={}, request={}",
response.getStatusLine().getStatusCode(),
httpRequest);
EventUtil.triggerBasicEvent(
Event.EventType.NETWORK_ERROR,
"StatusCode: " + response.getStatusLine().getStatusCode() +
", Reason: " + response.getStatusLine().getReasonPhrase() +
", Request: " + httpRequest.toString(),
false);
}
breakRetryReason = "status code does not need retry";
break;
}
else
{
if (response != null)
{
logger.debug(
"HTTP response not ok: status code={}, request={}",
response.getStatusLine().getStatusCode(),
httpRequest);
}
else
{
logger.debug("Null response for request={}", httpRequest);
}
// get the elapsed time for the last request
// elapsed in millisecond for last call, used for calculating the
// remaining amount of time to sleep:
// (backoffInMilli - elapsedMilliForLastCall)
long elapsedMilliForLastCall =
System.currentTimeMillis() - startTimePerRequest;
// check canceling flag
if (canceling != null && canceling.get())
{
logger.debug(
"Stop retrying since canceling is requested");
breakRetryReason = "canceling is requested";
break;
}
if (retryTimeoutInMilliseconds > 0)
{
// increment total elapsed due to transient issues
elapsedMilliForTransientIssues += elapsedMilliForLastCall;
// check if the total elapsed time for transient issues has exceeded
// the retry timeout and we retry at least the min, if so, we will not
// retry
if (elapsedMilliForTransientIssues > retryTimeoutInMilliseconds &&
retryCount >= MIN_RETRY_COUNT)
{
logger.error(
"Stop retrying since elapsed time due to network " +
"issues has reached timeout. " +
"Elapsed={}(ms), timeout={}(ms)",
elapsedMilliForTransientIssues, retryTimeoutInMilliseconds);
breakRetryReason = "retry timeout";
TelemetryService.getInstance().logHttpRequestTelemetryEvent(
"HttpRequestRetryTimeout",
httpRequest,
injectSocketTimeout,
canceling,
withoutCookies,
includeRetryParameters,
includeRequestGuid,
response,
savedEx,
breakRetryReason,
retryTimeout,
retryCount,
SqlState.IO_ERROR,
ErrorCode.NETWORK_ERROR.getMessageCode()
);
if (savedEx != null)
{
// try to upload events in the queue
// before throwing the exception
if (TelemetryService.getInstance().runFlushBeforeException())
{
TelemetryService.getInstance().flush();
}
}
// rethrow the timeout exception
if (response == null && savedEx != null)
{
throw new SnowflakeSQLException(SqlState.IO_ERROR,
ErrorCode.NETWORK_ERROR.getMessageCode(),
"Exception encountered for HTTP request: " +
savedEx.getMessage());
}
// no more retry
break;
}
}
logger.debug("Retrying request: {}", httpRequest);
// sleep for backoff - elapsed amount of time
if (backoffInMilli > elapsedMilliForLastCall)
{
try
{
logger.debug("sleeping in {}(ms)", backoffInMilli);
Thread.sleep(backoffInMilli);
elapsedMilliForTransientIssues += backoffInMilli;
backoffInMilli = backoff.nextSleepTime(backoffInMilli);
}
catch (InterruptedException ex1)
{
logger.debug(
"Backoff sleep before retrying login got interrupted");
}
}
retryCount++;
int numOfRetryToTriggerTelemetry = TelemetryService.getInstance().getNumOfRetryToTriggerTelemetry();
if (retryCount == numOfRetryToTriggerTelemetry)
{
TelemetryService.getInstance().logHttpRequestTelemetryEvent(
String.format("HttpRequestRetry%dTimes", numOfRetryToTriggerTelemetry),
httpRequest,
injectSocketTimeout,
canceling,
withoutCookies,
includeRetryParameters,
includeRequestGuid,
response,
savedEx,
breakRetryReason,
retryTimeout,
retryCount,
SqlState.IO_ERROR,
ErrorCode.NETWORK_ERROR.getMessageCode()
);
}
// release connection before retry
httpRequest.releaseConnection();
}
}
if (response == null)
{
logger.error("Returning null response for request: {}",
httpRequest);
}
else if (response.getStatusLine().getStatusCode() != 200)
{
logger.error(
"Error response: HTTP Response code={}, request={}",
response.getStatusLine().getStatusCode(),
httpRequest);
}
if ((response == null ||
response.getStatusLine().getStatusCode() != 200))
{
String eventName;
if (response == null)
{
eventName = "NullResponseHttpError";
}
else
{
if (response.getStatusLine() == null)
{
eventName = "NullResponseStatusLine";
}
else
{
eventName = String.format("HttpError%d", response.getStatusLine().getStatusCode());
}
}
TelemetryService.getInstance().logHttpRequestTelemetryEvent(
eventName,
httpRequest,
injectSocketTimeout,
canceling,
withoutCookies,
includeRetryParameters,
includeRequestGuid,
response,
savedEx, breakRetryReason,
retryTimeout,
retryCount,
null,
0);
}
return response;
}
}