com.databricks.jdbc.api.impl.volume.VolumeOperationProcessor Maven / Gradle / Ivy
package com.databricks.jdbc.api.impl.volume;
import com.databricks.jdbc.dbclient.IDatabricksHttpClient;
import com.databricks.jdbc.exception.DatabricksHttpException;
import com.databricks.jdbc.log.JdbcLogger;
import com.databricks.jdbc.log.JdbcLoggerFactory;
import java.io.*;
import java.util.*;
import java.util.function.Consumer;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.FileEntity;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.util.EntityUtils;
/** Executor for volume operations */
class VolumeOperationProcessor {
private static final JdbcLogger LOGGER =
JdbcLoggerFactory.getLogger(VolumeOperationProcessor.class);
private static final String COMMA_SEPARATOR = ",";
private static final String PARENT_DIRECTORY_REF = "..";
private static final String GET_OPERATION = "get";
private static final String PUT_OPERATION = "put";
private static final String REMOVE_OPERATION = "remove";
private static final Long PUT_SIZE_LIMITS = 5 * 1024 * 1024 * 1024L; // 5GB
private final String operationType;
private final String operationUrl;
private final String localFilePath;
private final Map headers;
private final Set allowedVolumeIngestionPaths;
private final boolean isAllowedInputStreamForVolumeOperation;
private final IDatabricksHttpClient databricksHttpClient;
private final InputStreamEntity inputStream;
private final Consumer getStreamReceiver;
private VolumeOperationStatus status;
private String errorMessage;
VolumeOperationProcessor(
String operationType,
String operationUrl,
Map headers,
String localFilePath,
String allowedVolumeIngestionPathString,
boolean isAllowedInputStreamForVolumeOperation,
InputStreamEntity inputStream,
IDatabricksHttpClient databricksHttpClient,
Consumer getStreamReceiver) {
this.operationType = operationType;
this.operationUrl = operationUrl;
this.localFilePath = localFilePath;
this.headers = headers;
this.allowedVolumeIngestionPaths = getAllowedPaths(allowedVolumeIngestionPathString);
this.isAllowedInputStreamForVolumeOperation = isAllowedInputStreamForVolumeOperation;
this.inputStream = inputStream;
this.getStreamReceiver = getStreamReceiver;
this.databricksHttpClient = databricksHttpClient;
this.status = VolumeOperationStatus.PENDING;
this.errorMessage = null;
}
private static Set getAllowedPaths(String allowedVolumeIngestionPathString) {
if (allowedVolumeIngestionPathString == null || allowedVolumeIngestionPathString.isEmpty()) {
return Collections.emptySet();
}
return new HashSet<>(Arrays.asList(allowedVolumeIngestionPathString.split(COMMA_SEPARATOR)));
}
void process() {
LOGGER.debug(
String.format(
"Running volume operation {%s} on local file {%s}",
operationType, localFilePath == null ? "" : localFilePath));
if (operationUrl == null || operationUrl.isEmpty()) {
status = VolumeOperationStatus.ABORTED;
errorMessage = "Volume operation URL is not set";
LOGGER.error(errorMessage);
return;
}
validateLocalFilePath();
if (status == VolumeOperationStatus.ABORTED) {
return;
}
status = VolumeOperationStatus.RUNNING;
switch (operationType.toLowerCase()) {
case GET_OPERATION:
executeGetOperation();
break;
case PUT_OPERATION:
executePutOperation();
break;
case REMOVE_OPERATION:
executeDeleteOperation();
break;
default:
status = VolumeOperationStatus.ABORTED;
errorMessage = "Invalid operation type";
}
}
VolumeOperationStatus getStatus() {
return status;
}
String getErrorMessage() {
return errorMessage;
}
private void validateLocalFilePath() {
if (isAllowedInputStreamForVolumeOperation) {
return;
}
if (allowedVolumeIngestionPaths.isEmpty()) {
status = VolumeOperationStatus.ABORTED;
errorMessage = "Volume ingestion paths are not set";
LOGGER.error(errorMessage);
return;
}
if (operationType.equalsIgnoreCase(REMOVE_OPERATION)) {
return;
}
if (localFilePath == null
|| localFilePath.isEmpty()
|| localFilePath.contains(PARENT_DIRECTORY_REF)) {
LOGGER.error("Local file path is invalid {%s}", localFilePath);
status = VolumeOperationStatus.ABORTED;
errorMessage = "Local file path is invalid";
return;
}
Optional pathMatched =
allowedVolumeIngestionPaths.stream()
.map(localFilePath::startsWith)
.filter(x -> x)
.findFirst();
if (pathMatched.isEmpty() || !pathMatched.get()) {
LOGGER.error("Local file path is not allowed {%s}", localFilePath);
status = VolumeOperationStatus.ABORTED;
errorMessage = "Local file path is not allowed";
}
}
private void closeResponse(CloseableHttpResponse response) {
if (response != null) {
try {
if (response.getEntity() != null) {
EntityUtils.consume(response.getEntity());
}
response.close();
} catch (IOException e) {
/* silent close */
}
}
}
private void executeGetOperation() {
HttpGet httpGet = new HttpGet(operationUrl);
headers.forEach(httpGet::addHeader);
HttpEntity entity;
CloseableHttpResponse responseStream = null;
try {
// We return the input stream directly to clients, if they want to consume as input stream
if (isAllowedInputStreamForVolumeOperation) {
responseStream = databricksHttpClient.execute(httpGet);
if (!isSuccessfulHttpResponse(responseStream)) {
status = VolumeOperationStatus.FAILED;
errorMessage =
String.format(
"Failed to fetch content from volume with error code {%s} for input stream and error {%s}",
responseStream.getStatusLine().getStatusCode(),
responseStream.getStatusLine().getReasonPhrase());
LOGGER.error(errorMessage);
closeResponse(responseStream);
return;
}
getStreamReceiver.accept(responseStream.getEntity());
status = VolumeOperationStatus.SUCCEEDED;
return;
}
} catch (DatabricksHttpException e) {
closeResponse(responseStream);
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to execute GET operation for input stream: " + e.getMessage();
LOGGER.error(errorMessage);
return;
}
// Copy the data in local file as requested by user
File localFile = new File(localFilePath);
if (localFile.exists()) {
LOGGER.error("Local file already exists for GET operation {%s}", localFilePath);
status = VolumeOperationStatus.ABORTED;
errorMessage = "Local file already exists";
return;
}
try (CloseableHttpResponse response = databricksHttpClient.execute(httpGet)) {
if (!isSuccessfulHttpResponse(response)) {
LOGGER.error(
"Failed to fetch content from volume with error {%s} for local file {%s}",
response.getStatusLine().getStatusCode(), localFilePath);
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to download file";
return;
}
entity = response.getEntity();
if (entity != null) {
// Get the content of the HttpEntity
InputStream inputStream = entity.getContent();
// Create a FileOutputStream to write the content to a file
try (FileOutputStream outputStream = new FileOutputStream(localFile)) {
// Copy the content of the InputStream to the FileOutputStream
byte[] buffer = new byte[1024];
int length;
while ((length = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, length);
}
status = VolumeOperationStatus.SUCCEEDED;
} catch (FileNotFoundException e) {
LOGGER.error("Local file path is invalid or a directory {%s}", localFilePath);
status = VolumeOperationStatus.FAILED;
errorMessage = "Local file path is invalid or a directory";
} catch (IOException e) {
// TODO: Add retries
LOGGER.error(
e,
"Failed to write to local file {%s} with error {%s}",
localFilePath,
e.getMessage());
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to write to local file: " + e.getMessage();
} finally {
// It's important to consume the entity content fully and ensure the stream is closed
EntityUtils.consume(entity);
}
}
} catch (IOException | DatabricksHttpException e) {
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to download file: " + e.getMessage();
}
}
private void executePutOperation() {
HttpPut httpPut = new HttpPut(operationUrl);
headers.forEach(httpPut::addHeader);
if (isAllowedInputStreamForVolumeOperation) {
if (inputStream == null) {
status = VolumeOperationStatus.ABORTED;
errorMessage = "InputStream not set for PUT operation";
LOGGER.error(errorMessage);
return;
}
httpPut.setEntity(inputStream);
} else {
// Set the FileEntity as the request body
File file = new File(localFilePath);
if (localFileHasErrorForPutOperation(file)) {
return;
}
httpPut.setEntity(new FileEntity(file, ContentType.DEFAULT_BINARY));
}
// Execute the request
try (CloseableHttpResponse response = databricksHttpClient.execute(httpPut)) {
// Process the response
if (isSuccessfulHttpResponse(response)) {
status = VolumeOperationStatus.SUCCEEDED;
} else {
LOGGER.error(
"Failed to upload file {%s} with error code: {%s}",
localFilePath, response.getStatusLine().getStatusCode());
// TODO: Add retries
status = VolumeOperationStatus.FAILED;
errorMessage =
"Failed to upload file with error code: " + response.getStatusLine().getStatusCode();
}
} catch (IOException | DatabricksHttpException e) {
LOGGER.error("Failed to upload file {%s} with error {%s}", localFilePath, e.getMessage());
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to upload file: " + e.getMessage();
}
}
private boolean localFileHasErrorForPutOperation(File file) {
if (!file.exists() || file.isDirectory()) {
LOGGER.error("Local file does not exist or is a directory {%s}", localFilePath);
status = VolumeOperationStatus.ABORTED;
errorMessage = "Local file does not exist or is a directory";
return true;
}
if (file.length() == 0) {
LOGGER.error("Local file is empty {%s}", localFilePath);
status = VolumeOperationStatus.ABORTED;
errorMessage = "Local file is empty";
return true;
}
if (file.length() > PUT_SIZE_LIMITS) {
LOGGER.error("Local file too large {%s}", localFilePath);
status = VolumeOperationStatus.ABORTED;
errorMessage = "Local file too large";
return true;
}
return false;
}
private void executeDeleteOperation() {
// TODO: Implement AWS-specific logic if required
HttpDelete httpDelete = new HttpDelete(operationUrl);
headers.forEach(httpDelete::addHeader);
try (CloseableHttpResponse response = databricksHttpClient.execute(httpDelete)) {
if (isSuccessfulHttpResponse(response)) {
status = VolumeOperationStatus.SUCCEEDED;
} else {
LOGGER.error(
"Failed to delete volume with error code: {%s}",
response.getStatusLine().getStatusCode());
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to delete volume";
}
} catch (DatabricksHttpException | IOException e) {
LOGGER.error(e, "Failed to delete volume with error {%s}", e.getMessage());
status = VolumeOperationStatus.FAILED;
errorMessage = "Failed to delete volume: " + e.getMessage();
}
}
private boolean isSuccessfulHttpResponse(CloseableHttpResponse response) {
return response.getStatusLine().getStatusCode() >= 200
&& response.getStatusLine().getStatusCode() < 300;
}
enum VolumeOperationStatus {
PENDING,
RUNNING,
ABORTED,
SUCCEEDED,
FAILED
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy