org.hpccsystems.dfs.client.FileUtility Maven / Gradle / Ivy
/*******************************************************************************
* HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems®.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package org.hpccsystems.dfs.client;
import java.util.List;
import java.util.Stack;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import java.util.ArrayList;
import java.util.Arrays;
import java.io.BufferedInputStream;
import java.io.Console;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.StatusCode;
import io.opentelemetry.api.OpenTelemetry;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import org.hpccsystems.commons.ecl.FieldDef;
import org.json.JSONArray;
import org.json.JSONObject;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.hpccsystems.ws.client.HPCCWsClient;
import org.hpccsystems.ws.client.platform.Platform;
import org.hpccsystems.ws.client.utils.Connection;
import org.hpccsystems.dfs.cluster.*;
import org.hpccsystems.commons.ecl.RecordDefinitionTranslator;
import org.hpccsystems.commons.errors.HpccFileException;
import org.hpccsystems.ws.client.HPCCWsDFUClient;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUCreateFileWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFilePartWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileTypeWrapper;
public class FileUtility
{
// This value represents the maximum number of splits that will be created during
// the reading process to allow for redistribution of clusters of varying sizes
// IE: A 4GB file part will be redistributable in approximately 32MB blocks.
private static final int DEFAULT_SPLIT_TABLE_SIZE = 128;
private static final int NUM_DEFAULT_THREADS = 4;
static private final int DEFAULT_ACCESS_EXPIRY_SECONDS = 120;
static private final int DEFAULT_READ_REQUEST_SIZE = 4096;
static private final int DEFAULT_READ_REQUEST_DELAY = 0;
private static boolean otelNeedsInit = true;
private static class TaskContext
{
private static class TaskOperation
{
public String currentOperationDesc = "";
public long operationStartNS = 0;
public List errorMessages = new ArrayList();
public List warnMessages = new ArrayList();
public AtomicLong recordsRead = new AtomicLong(0);
public AtomicLong recordsWritten = new AtomicLong(0);
public AtomicLong bytesRead = new AtomicLong(0);
public AtomicLong bytesWritten = new AtomicLong(0);
public Span operationSpan = null;
public JSONObject end(boolean success)
{
if (success)
{
operationSpan.setStatus(StatusCode.OK);
}
else
{
operationSpan.setStatus(StatusCode.ERROR);
}
operationSpan.end();
long totalOperationTime = System.nanoTime();
totalOperationTime -= operationStartNS;
double timeInSeconds = (double) totalOperationTime / 1_000_000_000.0;
JSONObject results = new JSONObject();
results.put("operation", currentOperationDesc);
results.put("successful", success);
JSONArray errors = new JSONArray();
for (String err : errorMessages)
{
errors.put(err);
}
results.put("errors", errors);
JSONArray warns = new JSONArray();
for (String warn : warnMessages)
{
warns.put(warn);
}
results.put("warns", warns);
results.put("bytesWritten", bytesWritten.get());
results.put("recordsWritten", recordsWritten.get());
results.put("bytesRead", bytesRead.get());
results.put("recordsRead", recordsRead.get());
results.put("time", String.format("%.2f s",timeInSeconds));
double readBandwidth = (double) bytesRead.get() / (1_000_000.0 * timeInSeconds);
results.put("Read Bandwidth", String.format("%.2f MB/s", readBandwidth));
double writeBandwidth = (double) bytesWritten.get() / (1_000_000.0 * timeInSeconds);
results.put("Write Bandwidth", String.format("%.2f MB/s", writeBandwidth));
return results;
}
}
private Stack operations = new Stack();
public List operationResults = new ArrayList();
public int readRetries = HpccRemoteFileReader.DEFAULT_READ_RETRIES;
public int socketOpTimeoutMS = RowServiceInputStream.DEFAULT_SOCKET_OP_TIMEOUT_MS;
public int initialReadSizeKB = RowServiceInputStream.DEFAULT_INITIAL_REQUEST_READ_SIZE_KB;
public void setCurrentOperationSpanAttributes(Attributes attributes)
{
if (!hasCurrentOperation())
{
return;
}
TaskOperation op = getCurrentOperation();
synchronized(op.operationSpan)
{
op.operationSpan.setAllAttributes(attributes);
}
}
public void addCurrentOperationSpanAttribute(AttributeKey key, Object value)
{
if (!hasCurrentOperation())
{
return;
}
TaskOperation op = getCurrentOperation();
synchronized(op.operationSpan)
{
op.operationSpan.setAttribute(key, value);
}
}
public void makeCurrentOperationSpanCurrent()
{
if (!hasCurrentOperation())
{
return;
}
TaskOperation op = getCurrentOperation();
synchronized(op.operationSpan)
{
op.operationSpan.makeCurrent();
}
}
public boolean hasError()
{
if (!hasCurrentOperation())
{
return false;
}
TaskOperation op = getCurrentOperation();
boolean err = false;
synchronized(op.errorMessages)
{
err = op.errorMessages.size() > 0;
}
return err;
}
public void addError(String error)
{
if (!hasCurrentOperation())
{
return;
}
TaskOperation op = getCurrentOperation();
synchronized(op.errorMessages)
{
op.errorMessages.add(error);
}
synchronized(op.operationSpan)
{
op.operationSpan.recordException(new Exception(error));
}
}
public void addWarn(String warn)
{
if (!hasCurrentOperation())
{
return;
}
TaskOperation op = getCurrentOperation();
synchronized(op.warnMessages)
{
op.warnMessages.add(warn);
}
synchronized(op.operationSpan)
{
op.operationSpan.addEvent(warn);
}
}
public boolean hasCurrentOperation()
{
if (operations.isEmpty())
{
return false;
}
return true;
}
public TaskOperation getCurrentOperation()
{
if (!hasCurrentOperation())
{
return null;
}
return operations.peek();
}
private void setCurrentOperation(TaskOperation op)
{
operations.push(op);
}
public void startOperation(String operationName)
{
TaskOperation op = new TaskOperation();
op.currentOperationDesc = operationName;
op.operationStartNS = System.nanoTime();
Span parentSpan = null;
TaskOperation prevOp = getCurrentOperation();
if (prevOp != null)
{
parentSpan = prevOp.operationSpan;
}
op.operationSpan = Utils.createChildSpan(parentSpan, operationName);
setCurrentOperation(op);
}
public void endOperation()
{
endOperation(true);
}
public void endOperation(boolean success)
{
if (!hasCurrentOperation())
{
return;
}
operationResults.add(getCurrentOperation().end(success));
operations.pop();
}
public JSONArray generateResultsMessage()
{
JSONArray results = new JSONArray();
for (JSONObject result : operationResults)
{
results.put(result);
}
return results;
}
};
private static String[] getCredentials(CommandLine cmd)
{
Console console = System.console();
String user = cmd.getOptionValue("user");
boolean userIsEmpty = user == null || user.isEmpty();
if (userIsEmpty)
{
user = new String(console.readLine("Enter username: "));
userIsEmpty = user == null || user.isEmpty();
}
String pass = cmd.getOptionValue("pass");
boolean passIsEmpty = pass == null || pass.isEmpty();
if (!userIsEmpty && passIsEmpty)
{
pass = new String(console.readPassword("Enter password for " + user + ": "));
}
return new String[] {user, pass};
}
private static void applyGlobalConfig(CommandLine cmd)
{
int concurrentStartups = -1;
String concurrentStartupsStr = cmd.getOptionValue("connection_startup_limit", "" + -1);
try
{
concurrentStartups = Integer.parseInt(concurrentStartupsStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for connection_startup_limit: "
+ concurrentStartupsStr + ", must be an integer.");
}
if (concurrentStartups > 0)
{
RowServiceInputStream.setMaxConcurrentConnectionStartups(concurrentStartups);
}
}
private static enum FileFormat
{
THOR,
PARQUET
};
private static class SplitEntry
{
public long recordCount = 0;
public long splitStart = 0;
public long splitEnd = 0;
public JSONObject toJson()
{
JSONObject res = new JSONObject();
res.put("recordCount", recordCount);
res.put("splitStart", splitStart);
res.put("splitEnd", splitEnd);
return res;
}
public static SplitEntry fromJson(JSONObject json) throws IOException
{
SplitEntry split = new SplitEntry();
split.recordCount = json.getLong("recordCount");
split.splitStart = json.getLong("splitStart");
split.splitEnd = json.getLong("splitEnd");
return split;
}
}
private static class SplitTable
{
public List splits = new ArrayList();
private long splitStride = 1;
private int maxSplitEntries = DEFAULT_SPLIT_TABLE_SIZE;
private SplitEntry currentSplit = new SplitEntry();
public SplitTable(int maxSplits)
{
maxSplitEntries = maxSplits;
if (maxSplitEntries % 2 == 1)
{
maxSplitEntries++;
}
}
public void addRecordPosition(long fileOffset)
{
if (currentSplit.recordCount == splitStride)
{
currentSplit.splitEnd = fileOffset;
splits.add(currentSplit);
currentSplit = new SplitEntry();
currentSplit.splitStart = fileOffset;
}
if (splits.size() == maxSplitEntries)
{
compactSplitTable();
}
currentSplit.recordCount++;
}
public void finish(long fileSize)
{
currentSplit.splitEnd = fileSize;
splits.add(currentSplit);
}
private void compactSplitTable()
{
splitStride *= 2;
List newSplits = new ArrayList();
for (int i = 0; i < splits.size(); i+=2)
{
SplitEntry first = splits.get(i);
SplitEntry second = splits.get(i+1);
SplitEntry combined = new SplitEntry();
combined.splitStart = first.splitStart;
combined.splitEnd = second.splitEnd;
combined.recordCount = first.recordCount + second.recordCount;
newSplits.add(combined);
}
splits = newSplits;
}
public JSONObject toJson()
{
JSONObject res = new JSONObject();
res.put("splitStride", splitStride);
res.put("maxSplitEntries", maxSplitEntries);
JSONArray splitsJson = new JSONArray();
for (int i = 0; i < splits.size(); i++)
{
splitsJson.put(splits.get(i).toJson());
}
res.put("splits", splitsJson);
return res;
}
public static SplitTable fromJson(JSONObject json) throws IOException
{
int maxSplits = json.getInt("maxSplitEntries");
SplitTable table = new SplitTable(maxSplits);
table.splitStride = json.getLong("splitStride");
JSONArray splitsJson = json.getJSONArray("splits");
if (splitsJson != null)
{
for (int i = 0; i < splitsJson.length(); i++)
{
table.splits.add(SplitEntry.fromJson(splitsJson.getJSONObject(i)));
}
}
return table;
}
}
private static class SplitFile
{
private List splitTables = new ArrayList();
public SplitFile()
{
}
public SplitFile(SplitTable[] tables)
{
splitTables.addAll(Arrays.asList(tables));
}
public SplitTable[] getSplitTableArray()
{
return splitTables.toArray(new SplitTable[0]);
}
public void load(FileInputStream inStream) throws IOException
{
long fileSize = inStream.getChannel().size();
if (fileSize > Integer.MAX_VALUE)
{
throw new IOException("Error: Input file is too large to load.");
}
byte[] byteData = new byte[(int) fileSize];
inStream.read(byteData);
String jsonStr = new String(byteData, StandardCharsets.UTF_8);
JSONObject data = new JSONObject(jsonStr);
int version = data.getInt("version");
if (version != 0)
{
throw new IOException("Error: Unsupported file format version: " + version + ", halting file load.");
}
JSONArray jsonSplitTables = data.getJSONArray("tables");
if (jsonSplitTables != null)
{
for (int i = 0; i < jsonSplitTables.length(); i++)
{
splitTables.add(SplitTable.fromJson(jsonSplitTables.getJSONObject(i)));
}
}
}
public void save(OutputStream outStream) throws IOException
{
JSONObject data = new JSONObject();
JSONArray splitTablesJson = new JSONArray();
for (int i = 0; i < splitTables.size(); i++)
{
splitTablesJson.put(splitTables.get(i).toJson());
}
data.put("version", 0);
data.put("tables", splitTablesJson);
byte[] byteData = data.toString().getBytes(StandardCharsets.UTF_8);
outStream.write(byteData);
}
}
private static int getReadRetries(CommandLine cmd)
{
int readRetries = HpccRemoteFileReader.DEFAULT_READ_RETRIES;
String retriesStr = cmd.getOptionValue("read_retries");
if (retriesStr != null)
{
try
{
readRetries = Integer.parseInt(retriesStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for read_retries: "
+ retriesStr + ", must be an integer. Defaulting to: " + HpccRemoteFileReader.DEFAULT_READ_RETRIES + " retries.");
}
}
return readRetries;
}
private static int getSocketOpTimeoutMS(CommandLine cmd)
{
int socketOpTimeoutS = RowServiceInputStream.DEFAULT_SOCKET_OP_TIMEOUT_MS / 1000;
String timeoutStr = cmd.getOptionValue("socket_timeout_seconds");
if (timeoutStr != null)
{
try
{
socketOpTimeoutS = Integer.parseInt(timeoutStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for socket_timeout: "
+ timeoutStr + ", must be an integer. Defaulting to: " + socketOpTimeoutS + " seconds.");
}
}
return socketOpTimeoutS * 1000;
}
private static int getInitialReadSizeKB(CommandLine cmd)
{
int initialReadSizeKB = RowServiceInputStream.DEFAULT_INITIAL_REQUEST_READ_SIZE_KB;
String initialReadSizeStr = cmd.getOptionValue("initial_read_size", "" + initialReadSizeKB);
try
{
initialReadSizeKB = Integer.parseInt(initialReadSizeStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for initial_read_size: "
+ initialReadSizeStr + ", must be an integer. Defaulting to: " + RowServiceInputStream.DEFAULT_INITIAL_REQUEST_READ_SIZE_KB + "KB.");
}
return initialReadSizeKB;
}
private static Options getReadOptions()
{
Options options = new Options();
options.addRequiredOption("url", "Source Cluster URL", true, "Specifies the URL of the ESP to connect to.");
options.addOption("user", true, "Specifies the username used to connect. Defaults to null.");
options.addOption("pass", true, "Specifies the password used to connect. Defaults to null.");
options.addOption("format", true, "Specifies the output format to be used when writing files to disk. Defaults to Thor files.");
options.addOption("num_threads", true, "Specifies the number of parallel to use to perform operations.");
options.addOption("out", true, "Specifies the directory that the files should be written to.");
options.addOption("filter", true, "Specifies a filter to apply to the files read from the cluster.");
options.addOption("ignore_tlk", false, "Ignore the TLK file when reading Index files.");
options.addOption("read_retries", true, "Sets the maximum number of retries to attempt when reading a file.");
options.addOption("socket_timeout_seconds", true, "Sets the socket operation timeout in seconds.");
options.addOption("connection_startup_limit", true, "Specifies the maximum number of connections to startup concurrently."
+ " useful in cases where starting up connections too quickly can overwhelm intermediate processes.");
options.addOption(Option.builder("read")
.argName("files")
.hasArgs()
.valueSeparator(',')
.desc("Reads the specified file(s) and writes a copy of the files to the local directory")
.required(true)
.build());
return options;
}
private static Options getReadTestOptions()
{
Options options = new Options();
options.addRequiredOption("read_test", "Read test", true, "Specifies the file that should be read.");
options.addRequiredOption("url", "Source Cluster URL", true, "Specifies the URL of the ESP to connect to.");
options.addOption("user", true, "Specifies the username used to connect. Defaults to null.");
options.addOption("pass", true, "Specifies the password used to connect. Defaults to null.");
options.addOption("num_threads", true, "Specifies the number of parallel to use to perform operations.");
options.addOption("access_expiry_seconds", true, "Access token expiration seconds.");
options.addOption("initial_read_size", true, "The size of the initial read request in KB sent to the rowservice,"
+ " useful in cases where starting up connections too quickly can overwhelm intermediate processes.");
options.addOption("read_request_size", true, "The size of the read requests in KB sent to the rowservice.");
options.addOption("read_request_delay", true, "The delay in MS between read requests sent to the rowservice.");
options.addOption("filter", true, "Specifies a filter to apply to the files read from the cluster.");
options.addOption("ignore_tlk", false, "Ignore the TLK file when reading Index files.");
options.addOption("read_retries", true, "Sets the maximum number of retries to attempt when reading a file.");
options.addOption("socket_timeout_seconds", true, "Sets the socket operation timeout in seconds.");
options.addOption("connection_startup_limit", true, "Specifies the maximum number of connections to startup concurrently."
+ " useful in cases where starting up connections too quickly can overwhelm intermediate processes.");
options.addOption(Option.builder("file_parts")
.argName("_file_parts")
.hasArgs()
.valueSeparator(',')
.desc("Specifies the file parts that should be read. Defaults to all file parts.")
.build());
return options;
}
private static Options getCopyOptions()
{
Options options = new Options();
options.addRequiredOption("url", "Source Cluster URL", true, "Specifies the URL of the ESP to read from / write to.");
options.addOption("user", true, "Specifies the username used to connect. Defaults to null.");
options.addOption("pass", true, "Specifies the password used to connect. Defaults to null.");
options.addRequiredOption("dest_cluster", "Destination Cluster Name", true, "Specifies the name of the cluster to write files back to.");
options.addOption("dest_url", "Destination Cluster URL", true, "Specifies the URL of the ESP to write to.");
options.addOption("num_threads", true, "Specifies the number of parallel to use to perform operations.");
options.addOption("filter", true, "Specifies a filter to apply to the files read from the cluster.");
options.addOption("ignore_tlk", false, "Ignore the TLK file when reading Index files.");
options.addOption("read_retries", true, "Sets the maximum number of retries to attempt when reading a file.");
options.addOption("socket_timeout_seconds", true, "Sets the socket operation timeout in seconds.");
options.addOption("connection_startup_limit", true, "Specifies the maximum number of connections to startup concurrently."
+ " useful in cases where starting up connections too quickly can overwhelm intermediate processes.");
options.addOption(Option.builder("copy")
.argName("files")
.hasArgs()
.valueSeparator(' ')
.desc("Copies the specified remote source file to the specified remote destination cluster / file.")
.required(true)
.build());
return options;
}
private static Options getWriteOptions()
{
Options options = new Options();
options.addRequiredOption("url", "Source Cluster URL", true, "Specifies the URL of the ESP to read from / write to.");
options.addOption("user", true, "Specifies the username used to connect. Defaults to null.");
options.addOption("pass", true, "Specifies the password used to connect. Defaults to null.");
options.addOption("dest_url", "Destination Cluster URL", true, "Specifies the URL of the ESP to write to.");
options.addRequiredOption("dest_cluster", "Destination Cluster Name", true, "Specifies the name of the cluster to write files back to.");
options.addOption("num_threads", true, "Specifies the number of parallel to use to perform operations.");
options.addOption("socket_timeout_seconds", true, "Sets the socket operation timeout in seconds.");
options.addOption("connection_startup_limit", true, "Specifies the maximum number of connections to startup concurrently."
+ " useful in cases where starting up connections too quickly can overwhelm intermediate processes.");
options.addOption(Option.builder("write")
.argName("files")
.hasArgs()
.valueSeparator(' ')
.desc("Write the specified local files to the specified remote destination cluster / file.")
.required(true)
.build());
return options;
}
private static Options getTopLevelOptions()
{
Options options = new Options();
options.addOption("read", "Reads the specified file(s) and writes a copy of the files to the local directory.");
options.addOption("read_test", "Reads the specified file and/or particular file parts without writing it locally.");
options.addOption("copy", "Copies the specified remote source file to the specified remote destination cluster / file.");
options.addOption("write", "Writes the specified local source file to the specified remote destination cluster / file.");
return options;
}
public static String[] findFilesMatching(String filePath) throws Exception
{
boolean isWildcard = filePath.endsWith("*");
if (!isWildcard)
{
File file = new File(filePath);
if (!file.exists())
{
throw new Exception("File path is invalid: " + filePath);
}
String[] res = {filePath};
return res;
}
int indexOfSep = filePath.lastIndexOf(File.separator)+1;
String dirStr = filePath.substring(0,indexOfSep);
String filePattern = filePath.substring(indexOfSep,filePath.length()-1);
File dir = new File(dirStr);
if (!dir.isDirectory() || !dir.exists())
{
throw new Exception("File path is invalid: " + filePath);
}
List result = new ArrayList();
for(File file : dir.listFiles())
{
String name = file.getName();
boolean startsWithPattern = name.startsWith(filePattern);
if (startsWithPattern)
{
result.add(file.getAbsolutePath());
}
}
return result.toArray(new String[0]);
}
private static FileFormat getFormat(String[] srcFiles) throws Exception
{
return FileFormat.THOR;
}
private static String getFormatExtension(FileFormat format)
{
return "";
}
private static FieldDef getRecordDefinition(String[] srcFiles, FileFormat format) throws Exception
{
switch (format)
{
case THOR:
{
String metaFile = null;
for (int i = 0; i < srcFiles.length; i++)
{
String file = srcFiles[i].toLowerCase();
if (file.endsWith(".meta"))
{
metaFile = file;
}
}
if (metaFile == null)
{
throw new Exception("Unable to find Thor meta-data file.");
}
byte[] metaData = Files.readAllBytes(Paths.get(metaFile));
String metaStr = new String(metaData, Charset.defaultCharset());
JSONObject metaJson = new JSONObject(metaStr);
return RecordDefinitionTranslator.parseJsonRecordDefinition(metaJson);
}
case PARQUET:
default:
throw new Exception("File format: " + format + " is not currently supported");
}
}
private static SplitTable[] getSplitTables(String[] srcFiles, FileFormat format) throws Exception
{
if (format != FileFormat.THOR)
{
return null;
}
String splitFile = null;
for (int i = 0; i < srcFiles.length; i++)
{
String file = srcFiles[i].toLowerCase();
if (file.endsWith(".split"))
{
splitFile = file;
break;
}
}
if (splitFile == null)
{
return null;
}
FileInputStream inStream = new FileInputStream(splitFile);
SplitFile file = new SplitFile();
file.load(inStream);
inStream.close();
return file.getSplitTableArray();
}
private static String[] filterFilesByFormat(String[] srcFiles, FileFormat format) throws Exception
{
Pattern pattern = null;
switch (format)
{
case THOR:
{
pattern = Pattern.compile("^[^\\.]*\\._[0-9]+_of_[0-9]+");
break;
}
case PARQUET:
default:
throw new Exception("File format: " + format + " is not currently supported");
}
List filteredFiles = new ArrayList();
for (int i = 0; i < srcFiles.length; i++)
{
int indexOfSep = srcFiles[i].lastIndexOf(File.separator)+1;
String fileName = srcFiles[i].substring(indexOfSep);
if (pattern.matcher(fileName).matches())
{
filteredFiles.add(srcFiles[i]);
}
}
return filteredFiles.toArray(new String[0]);
}
private static void executeTasks(Runnable[] tasks, int numThreads, TaskContext context) throws Exception
{
int numTasksPerThread = tasks.length / numThreads;
int numResidualTasks = tasks.length % numThreads;
int taskNum = 0;
Thread[] taskThreads = new Thread[numThreads];
for (int threadNum = 0; threadNum < numThreads; threadNum++)
{
int residualTasks = 0;
if (threadNum < numResidualTasks)
{
residualTasks = 1;
}
final int currentTaskStart = taskNum;
final int currentNumTasks = numTasksPerThread + residualTasks;
taskThreads[threadNum] = new Thread(new Runnable()
{
Runnable[] subTasks = tasks;
int startingSubTask = currentTaskStart;
int numSubTasks = currentNumTasks;
public void run()
{
// Make sure the span is current for the thread, otherwise spans created
// within this thread will not be children of the task span
context.makeCurrentOperationSpanCurrent();
for (int j = 0; j < numSubTasks; j++)
{
subTasks[startingSubTask + j].run();
}
}
});
taskNum += currentNumTasks;
taskThreads[threadNum].start();
}
for (int threadNum = 0; threadNum < numThreads; threadNum++)
{
taskThreads[threadNum].join();
}
}
private static Runnable[] createReadTestTasks(DataPartition[] fileParts, FieldDef recordDef, TaskContext context, int readRequestSize, int readRequestDelay) throws Exception
{
Runnable[] tasks = new Runnable[fileParts.length];
for (int i = 0; i < tasks.length; i++)
{
final int taskIndex = i;
final DataPartition filePart = fileParts[taskIndex];
tasks[taskIndex] = new Runnable()
{
public void run()
{
try
{
HpccRemoteFileReader.FileReadContext readContext = new HpccRemoteFileReader.FileReadContext();
readContext.parentSpan = context.getCurrentOperation().operationSpan;
readContext.originalRD = recordDef;
readContext.initialReadSizeKB = context.initialReadSizeKB;
readContext.readSizeKB = readRequestSize;
readContext.socketOpTimeoutMS = context.socketOpTimeoutMS;
HpccRemoteFileReader fileReader = new HpccRemoteFileReader(readContext, filePart, new HPCCRecordBuilder(recordDef));
fileReader.getInputStream().setReadRequestDelay(readRequestDelay);
fileReader.setMaxReadRetries(context.readRetries);
long recCount = 0;
while (fileReader.hasNext())
{
HPCCRecord record = fileReader.next();
recCount++;
}
context.getCurrentOperation().recordsRead.addAndGet(recCount);
fileReader.close();
context.getCurrentOperation().bytesRead.addAndGet(fileReader.getStreamPosition());
}
catch (Exception e)
{
context.addError("Error while reading file part index: '" + filePart.getThisPart() + " Error message: " + e.getMessage());
return;
}
}
};
}
return tasks;
}
private static Runnable[] createReadToThorTasks(DataPartition[] fileParts, SplitTable[] splitTables, String[] outFilePaths, FieldDef recordDef, TaskContext context) throws Exception
{
Runnable[] tasks = new Runnable[fileParts.length];
for (int i = 0; i < tasks.length; i++)
{
final int taskIndex = i;
HpccRemoteFileReader.FileReadContext readContext = new HpccRemoteFileReader.FileReadContext();
readContext.parentSpan = context.getCurrentOperation().operationSpan;
readContext.originalRD = recordDef;
readContext.socketOpTimeoutMS = context.socketOpTimeoutMS;
readContext.initialReadSizeKB = context.initialReadSizeKB;
final HpccRemoteFileReader filePartReader = new HpccRemoteFileReader(readContext, fileParts[taskIndex], new HPCCRecordBuilder(recordDef));
filePartReader.setMaxReadRetries(context.readRetries);
final String filePath = outFilePaths[taskIndex];
final FileOutputStream outStream = new FileOutputStream(filePath);
final BinaryRecordWriter filePartWriter = new BinaryRecordWriter(outStream);
filePartWriter.initialize(new HPCCRecordAccessor(recordDef));
tasks[taskIndex] = new Runnable()
{
HpccRemoteFileReader fileReader = filePartReader;
BinaryRecordWriter fileWriter = filePartWriter;
FileOutputStream outputStream = outStream;
SplitTable splitTable = splitTables[taskIndex];
public void run()
{
try
{
long recCount = 0;
while (fileReader.hasNext())
{
splitTable.addRecordPosition(fileReader.getStreamPosition());
HPCCRecord record = fileReader.next();
fileWriter.writeRecord(record);
recCount++;
}
context.getCurrentOperation().recordsRead.addAndGet(recCount);
splitTable.finish(fileReader.getStreamPosition());
fileReader.close();
context.getCurrentOperation().bytesRead.addAndGet(fileReader.getStreamPosition());
fileWriter.finalize();
outputStream.close();
}
catch (Exception e)
{
context.addError("Error while reading file: '" + filePath + "'," + taskIndex + ": " + e.getMessage());
return;
}
}
};
}
return tasks;
}
private static Runnable[] createThorSplitTableTasks(String[] thorFiles, SplitTable[] splitTables, FieldDef recordDef, TaskContext context) throws Exception
{
Runnable[] tasks = new Runnable[thorFiles.length];
for (int i = 0; i < tasks.length; i++)
{
final int taskIndex = i;
final SplitTable splitTable = new SplitTable(DEFAULT_SPLIT_TABLE_SIZE);
splitTables[taskIndex] = splitTable;
BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(thorFiles[taskIndex]));
BinaryRecordReader filePartReader = new BinaryRecordReader(bufferedInputStream);
filePartReader.initialize(new HPCCRecordBuilder(recordDef));
tasks[taskIndex] = new Runnable()
{
InputStream inputStream = bufferedInputStream;
BinaryRecordReader fileReader = filePartReader;
public void run()
{
try
{
while (fileReader.hasNext())
{
splitTable.addRecordPosition(fileReader.getStreamPosAfterLastRecord());
HPCCRecord record = (HPCCRecord) fileReader.getNext();
}
splitTable.finish(fileReader.getStreamPosAfterLastRecord());
inputStream.close();
}
catch (Exception e)
{
context.addError("Error while writing file taskIndex: " + taskIndex + " - " + e.getMessage());
return;
}
}
};
}
return tasks;
}
private static Runnable[] createNonRedistributingCopyTasks(HPCCFile file, DFUCreateFileWrapper createResult, TaskContext context) throws Exception
{
FieldDef recordDef = null;
DataPartition[] inFileParts = null;
DataPartition[] outFileParts = null;
inFileParts = file.getFileParts();
recordDef = file.getRecordDefinition();
DFUFilePartWrapper[] dfuFileParts = createResult.getFileParts();
NullRemapper remapper = new NullRemapper(new RemapInfo(), createResult.getFileAccessInfo());
outFileParts = DataPartition.createPartitions(dfuFileParts, remapper, dfuFileParts.length, createResult.getFileAccessInfoBlob());
int incomingPerOutgoing = inFileParts.length / outFileParts.length;
int residualIncomingFileParts = inFileParts.length % outFileParts.length;
int incomingFilePartIndex = 0;
Runnable[] tasks = new Runnable[outFileParts.length];
for (int i = 0; i < tasks.length; i++)
{
final int taskIndex = i;
DataPartition outFilePart = outFileParts[taskIndex];
final int numIncomingParts = incomingPerOutgoing + ((taskIndex < residualIncomingFileParts) ? 1 : 0);
HpccRemoteFileReader[] filePartReaders = new HpccRemoteFileReader[numIncomingParts];
for (int j = 0; j < numIncomingParts; j++)
{
DataPartition inFilePart = inFileParts[incomingFilePartIndex + j];
HpccRemoteFileReader.FileReadContext readContext = new HpccRemoteFileReader.FileReadContext();
readContext.parentSpan = context.getCurrentOperation().operationSpan;
readContext.originalRD = recordDef;
readContext.socketOpTimeoutMS = context.socketOpTimeoutMS;
readContext.initialReadSizeKB = context.initialReadSizeKB;
filePartReaders[j] = new HpccRemoteFileReader(readContext, inFilePart, new HPCCRecordBuilder(file.getProjectedRecordDefinition()));
filePartReaders[j].setMaxReadRetries(context.readRetries);
}
incomingFilePartIndex += numIncomingParts;
HPCCRecordAccessor recordAccessor = new HPCCRecordAccessor(recordDef);
HPCCRemoteFileWriter.FileWriteContext writeContext = new HPCCRemoteFileWriter.FileWriteContext();
writeContext.parentSpan = context.getCurrentOperation().operationSpan;
writeContext.recordDef = recordDef;
writeContext.fileCompression = CompressionAlgorithm.NONE;
final HPCCRemoteFileWriter partFileWriter = new HPCCRemoteFileWriter(writeContext, outFilePart, recordAccessor);
tasks[taskIndex] = new Runnable()
{
HpccRemoteFileReader[] fileReaders = filePartReaders;
HPCCRemoteFileWriter fileWriter = partFileWriter;
public void run()
{
try
{
for (int k = 0; k < fileReaders.length; k++)
{
long recordsRead = 0;
long recordsWritten = 0;
HpccRemoteFileReader fileReader = fileReaders[k];
while (fileReader.hasNext())
{
HPCCRecord record = fileReader.next();
fileWriter.writeRecord(record);
recordsRead++;
recordsWritten++;
}
context.getCurrentOperation().recordsWritten.addAndGet(recordsWritten);
context.getCurrentOperation().recordsRead.addAndGet(recordsRead);
fileReader.close();
context.getCurrentOperation().bytesRead.addAndGet(fileReader.getStreamPosition());
}
System.out.println("Closing file writer for task: " + taskIndex);
fileWriter.close();
context.getCurrentOperation().bytesWritten.addAndGet(fileWriter.getBytesWritten());
}
catch (Exception e)
{
context.addError("Error while copying file: '" + file.getFileName() + "'," + taskIndex + ": " + e.getMessage());
return;
}
}
};
}
return tasks;
}
/*
* Redistribution notes:
* Download file locally and build split table, or build split table if one does not exist.
* Create write with redistribution using the split table
*/
private static class SplitEntryMapping
{
int startingSrcFile = 0;
int splitEntryStart = 0;
int endingSrcFile = 0;
int splitEntryEnd = 0;
}
private static Runnable[] createWriteTasks(String[] srcFiles, SplitTable[] splitTables, FieldDef recordDef, FileFormat format, DFUCreateFileWrapper createResult, TaskContext context) throws Exception
{
DataPartition[] outFileParts = null;
DFUFilePartWrapper[] dfuFileParts = createResult.getFileParts();
NullRemapper remapper = new NullRemapper(new RemapInfo(), createResult.getFileAccessInfo());
outFileParts = DataPartition.createPartitions(dfuFileParts, remapper, dfuFileParts.length, createResult.getFileAccessInfoBlob());
// Determine mapping from split entries to output file parts
SplitEntryMapping[] srcFileToOutPartsMapping = new SplitEntryMapping[outFileParts.length];
if (srcFiles.length != outFileParts.length)
{
int totalSplitEntries = 0;
for (int i = 0; i < splitTables.length; i++)
{
totalSplitEntries += splitTables[i].splits.size();
}
int splitsPerOutFile = totalSplitEntries / outFileParts.length;
int residualSplits = totalSplitEntries % outFileParts.length;
int currentSrcFile = 0;
int currentSrcFileSplitStart = 0;
int currentSrcFileSplitEnd = splitTables[0].splits.size();
int splitStart = 0;
for (int i = 0; i < srcFileToOutPartsMapping.length; i++)
{
int numSplits = splitsPerOutFile + ((i < residualSplits ) ? 1 : 0);
SplitEntryMapping mapping = new SplitEntryMapping();
mapping.startingSrcFile = currentSrcFile;
mapping.splitEntryStart = splitStart - currentSrcFileSplitStart;
int splitEnd = splitStart + numSplits;
while (currentSrcFileSplitEnd < splitEnd)
{
currentSrcFile++;
currentSrcFileSplitStart = currentSrcFileSplitEnd;
currentSrcFileSplitEnd += splitTables[currentSrcFile].splits.size();
}
mapping.endingSrcFile = currentSrcFile;
mapping.splitEntryEnd = splitEnd - currentSrcFileSplitStart;
srcFileToOutPartsMapping[i] = mapping;
splitStart = splitEnd;
}
}
else
{
for (int i = 0; i < srcFileToOutPartsMapping.length; i++)
{
SplitEntryMapping mapping = new SplitEntryMapping();
mapping.startingSrcFile = i;
mapping.splitEntryStart = 0;
mapping.endingSrcFile = i;
mapping.splitEntryEnd = splitTables[i].splits.size();
srcFileToOutPartsMapping[i] = mapping;
}
}
Runnable[] tasks = new Runnable[outFileParts.length];
for (int i = 0; i < tasks.length; i++)
{
final int taskIndex = i;
DataPartition outFilePart = outFileParts[taskIndex];
HPCCRecordAccessor recordAccessor = new HPCCRecordAccessor(recordDef);
HPCCRemoteFileWriter.FileWriteContext writeContext = new HPCCRemoteFileWriter.FileWriteContext();
writeContext.parentSpan = context.getCurrentOperation().operationSpan;
writeContext.recordDef = recordDef;
writeContext.fileCompression = CompressionAlgorithm.NONE;
writeContext.socketOpTimeoutMs = context.socketOpTimeoutMS;
HPCCRemoteFileWriter filePartWriter = new HPCCRemoteFileWriter(writeContext, outFilePart, recordAccessor);
tasks[taskIndex] = new Runnable()
{
SplitEntryMapping mapping = srcFileToOutPartsMapping[taskIndex];
HPCCRemoteFileWriter fileWriter = filePartWriter;
public void run()
{
try
{
int numIncomingParts = (mapping.endingSrcFile+1) - mapping.startingSrcFile;
BinaryRecordReader[] fileReaders = new BinaryRecordReader[numIncomingParts];
BufferedInputStream[] inputStreams = new BufferedInputStream[numIncomingParts];
for (int j = 0; j < numIncomingParts; j++)
{
String srcFile = srcFiles[mapping.startingSrcFile + j];
inputStreams[j] = new BufferedInputStream(new FileInputStream(srcFile));
if (j == 0)
{
SplitEntry startingSplit = splitTables[mapping.startingSrcFile].splits.get(mapping.splitEntryStart);
fileReaders[j] = new BinaryRecordReader(inputStreams[j], startingSplit.splitStart);
}
else
{
fileReaders[j] = new BinaryRecordReader(inputStreams[j]);
}
fileReaders[j].initialize(new HPCCRecordBuilder(recordDef));
}
for (int j = 0; j < fileReaders.length; j++)
{
BinaryRecordReader fileReader = fileReaders[j];
long splitEnd = Long.MAX_VALUE;
if (j == (fileReaders.length-1))
{
SplitEntry endingSplit = splitTables[mapping.endingSrcFile].splits.get(mapping.splitEntryEnd-1);
splitEnd = endingSplit.splitEnd;
}
long recordsRead = 0;
long recordsWritten = 0;
while (fileReader.hasNext() && fileReader.getStreamPosAfterLastRecord() < splitEnd)
{
HPCCRecord record = (HPCCRecord) fileReader.getNext();
fileWriter.writeRecord(record);
recordsRead++;
recordsWritten++;
}
context.getCurrentOperation().recordsWritten.addAndGet(recordsWritten);
context.getCurrentOperation().recordsRead.addAndGet(recordsRead);
context.getCurrentOperation().bytesRead.addAndGet(fileReader.getStreamPosAfterLastRecord());
inputStreams[j].close();
}
fileWriter.close();
context.getCurrentOperation().bytesWritten.addAndGet(fileWriter.getBytesWritten());
}
catch (Exception e)
{
context.addError("Error while writing file taskIndex: " + taskIndex + " - " + e.getMessage());
return;
}
}
};
}
return tasks;
}
private static void performRead(String[] args, TaskContext context)
{
Options options = getReadOptions();
CommandLineParser parser = new DefaultParser();
CommandLine cmd = null;
try
{
cmd = parser.parse(options, args);
}
catch (ParseException e)
{
System.out.println("Error parsing commandline options:\n" + e.getMessage());
return;
}
String connString = cmd.getOptionValue("url");
String[] creds = getCredentials(cmd);
String user = creds[0];
String pass = creds[1];
applyGlobalConfig(cmd);
String outputPath = cmd.getOptionValue("out",".");
int numThreads = NUM_DEFAULT_THREADS;
String numThreadsStr = cmd.getOptionValue("num_threads", "" + numThreads);
try
{
numThreads = Integer.parseInt(numThreadsStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for num_threads: "
+ numThreadsStr + ", must be an integer. Defaulting to: " + NUM_DEFAULT_THREADS + " threads.");
}
String formatStr = cmd.getOptionValue("format");
if (formatStr == null)
{
formatStr = "THOR";
}
context.readRetries = getReadRetries(cmd);
context.socketOpTimeoutMS = getSocketOpTimeoutMS(cmd);
context.initialReadSizeKB = getInitialReadSizeKB(cmd);
FileFormat format = FileFormat.THOR;
switch (formatStr.toUpperCase())
{
case "THOR":
format = FileFormat.THOR;
break;
case "PARQUET":
format = FileFormat.PARQUET;
break;
default:
System.out.println("Error unsupported format specified: " + format);
return;
}
String filter = cmd.getOptionValue("filter");
boolean ignoreTLK = cmd.hasOption("ignore_tlk");
String[] datasets = cmd.getOptionValues("read");
for (int i = 0; i < datasets.length; i++)
{
String datasetName = datasets[i];
context.startOperation("FileUtility.Read_" + datasetName);
context.setCurrentOperationSpanAttributes(Attributes.of(AttributeKey.stringKey("server.url"), connString));
HPCCFile file = null;
try
{
file = new HPCCFile(datasetName, connString, user, pass);
}
catch (Exception e)
{
String error = "Error while attempting to open file: '" + datasetName + "': " + e.getMessage();
context.addError(error);
return;
}
file.setUseTLK(!ignoreTLK);
if (filter != null)
{
try
{
file.setFilter(filter);
}
catch (Exception e)
{
String error = "Error while attempting to set filter for: '" + datasetName + "': " + e.getMessage();
context.addError(error);
return;
}
}
DataPartition[] fileParts = null;
FieldDef recordDef = null;
try
{
fileParts = file.getFileParts();
recordDef = file.getRecordDefinition();
}
catch (Exception e)
{
String error = "Error while retrieving file parts for: '" + datasetName + "': " + e.getMessage();
context.addError(error);
return;
}
// Figure out the format string needed based on the number of file parts
String lenStr = "" + fileParts.length;
String fileNumFormat = "%0" + lenStr.length() + "d";
String fileExt = getFormatExtension(format);
File outDir = new File(outputPath);
if (!outDir.exists())
{
outDir.mkdirs();
}
SplitTable[] splitTables = new SplitTable[fileParts.length];
String[] outFilePaths = new String[fileParts.length];
for (int j = 0; j < fileParts.length; j++)
{
String fileName = file.getFileName().replace(":","_");
outFilePaths[j] = outputPath + File.separator + fileName + "._" + String.format(fileNumFormat, j+1) + "_of_" + fileParts.length + fileExt;
splitTables[j] = new SplitTable(DEFAULT_SPLIT_TABLE_SIZE);
}
Runnable[] tasks = null;
try
{
switch (format)
{
case THOR:
tasks = createReadToThorTasks(fileParts, splitTables, outFilePaths, recordDef, context);
break;
case PARQUET:
default:
throw new Exception("Error unsupported format specified: " + format);
};
}
catch (Exception e)
{
context.addError("Error while attempting to create read tasks for file: '" + datasetName + "': " + e.getMessage());
return;
}
try
{
executeTasks(tasks, numThreads, context);
}
catch (Exception e)
{
context.addError("Error while attempting to execute read tasks for file: '" + datasetName + "': " + e.getMessage());
return;
}
if (context.hasError())
{
return;
}
try
{
String fileName = file.getFileName().replace(":","_");
String filePath = outputPath + File.separator + fileName + ".meta";
FileOutputStream metaFile = new FileOutputStream(filePath);
String metaStr = RecordDefinitionTranslator.toJsonRecord(file.getRecordDefinition()).toString();
metaFile.write(metaStr.getBytes());
metaFile.close();
}
catch (Exception e)
{
context.addError("Error while attempting to write meta-data for file: '" + datasetName + "': " + e.getMessage());
return;
}
try
{
String fileName = file.getFileName().replace(":","_");
String filePath = outputPath + File.separator + fileName + ".split";
FileOutputStream splitFileOut = new FileOutputStream(filePath);
SplitFile splitFile = new SplitFile(splitTables);
splitFile.save(splitFileOut);
splitFileOut.close();
}
catch (Exception e)
{
context.addError("Error while attempting to write split table file for dataset: '" + datasetName + "': " + e.getMessage());
return;
}
context.endOperation();
}
}
private static void performReadTest(String[] args, TaskContext context)
{
Options options = getReadTestOptions();
CommandLineParser parser = new DefaultParser();
CommandLine cmd = null;
try
{
cmd = parser.parse(options, args);
}
catch (ParseException e)
{
System.out.println("Error parsing commandline options:\n" + e.getMessage());
return;
}
String connString = cmd.getOptionValue("url");
String[] creds = getCredentials(cmd);
String user = creds[0];
String pass = creds[1];
applyGlobalConfig(cmd);
String outputPath = cmd.getOptionValue("out",".");
int numThreads = NUM_DEFAULT_THREADS;
String numThreadsStr = cmd.getOptionValue("num_threads", "" + numThreads);
try
{
numThreads = Integer.parseInt(numThreadsStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for num_threads: "
+ numThreadsStr + ", must be an integer. Defaulting to: " + NUM_DEFAULT_THREADS + " threads.");
}
int expirySeconds = DEFAULT_ACCESS_EXPIRY_SECONDS;
String expirySecondsStr = cmd.getOptionValue("access_expiry_seconds", "" + expirySeconds);
try
{
expirySeconds = Integer.parseInt(expirySecondsStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for access_expiry_seconds: "
+ numThreadsStr + ", must be an integer. Defaulting to: " + DEFAULT_ACCESS_EXPIRY_SECONDS + "s.");
}
int readRequestSize = DEFAULT_READ_REQUEST_SIZE;
String readRequestSizeStr = cmd.getOptionValue("read_request_size", "" + readRequestSize);
try
{
readRequestSize = Integer.parseInt(readRequestSizeStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for read_request_size: "
+ readRequestSizeStr + ", must be an integer. Defaulting to: " + DEFAULT_READ_REQUEST_SIZE + "KB.");
}
int readRequestDelay = DEFAULT_READ_REQUEST_DELAY;
String readRequestDelayStr = cmd.getOptionValue("read_request_delay", "" + readRequestDelay);
try
{
readRequestDelay = Integer.parseInt(readRequestDelayStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for read_request_delay: "
+ readRequestDelayStr + ", must be an integer. Defaulting to: " + DEFAULT_READ_REQUEST_DELAY + "ms.");
}
context.readRetries = getReadRetries(cmd);
context.socketOpTimeoutMS = getSocketOpTimeoutMS(cmd);
context.initialReadSizeKB = getInitialReadSizeKB(cmd);
String formatStr = cmd.getOptionValue("format");
if (formatStr == null)
{
formatStr = "THOR";
}
FileFormat format = FileFormat.THOR;
switch (formatStr.toUpperCase())
{
case "THOR":
format = FileFormat.THOR;
break;
case "PARQUET":
format = FileFormat.PARQUET;
break;
default:
System.out.println("Error unsupported format specified: " + format);
return;
}
String filter = cmd.getOptionValue("filter");
boolean ignoreTLK = cmd.hasOption("ignore_tlk");
String datasetName = cmd.getOptionValue("read_test");
context.startOperation("FileUtility.ReadTest_" + datasetName);
context.setCurrentOperationSpanAttributes(Attributes.of(AttributeKey.stringKey("server.url"), connString));
HPCCFile file = null;
try
{
file = new HPCCFile(datasetName, connString, user, pass);
file.setFileAccessExpirySecs(expirySeconds);
}
catch (Exception e)
{
context.addError("Error while attempting to open file: '" + datasetName + "': " + e.getMessage());
return;
}
file.setUseTLK(!ignoreTLK);
if (filter != null)
{
try
{
file.setFilter(filter);
}
catch (Exception e)
{
String error = "Error while attempting to set filter for: '" + datasetName + "': " + e.getMessage();
context.addError(error);
return;
}
}
DataPartition[] fileParts = null;
FieldDef recordDef = null;
try
{
fileParts = file.getFileParts();
recordDef = file.getRecordDefinition();
}
catch (Exception e)
{
context.addError("Error while retrieving file parts for: '" + datasetName + "': " + e.getMessage());
return;
}
String[] filePartsStrs = cmd.getOptionValues("file_parts");
if (filePartsStrs != null && filePartsStrs.length > 0)
{
ArrayList filePartList = new ArrayList();
for (int i = 0; i < filePartsStrs.length; i++)
{
try
{
int filePartIndex = Integer.parseInt(filePartsStrs[i]) - 1;
if (filePartIndex < 0 || filePartIndex >= fileParts.length)
{
context.addWarn("InvalidParams: Skipping invalid file part index: " + filePartsStrs[i]
+ " outside of range: [0," + fileParts.length + "]");
}
filePartList.add(fileParts[filePartIndex]);
}
catch (NumberFormatException e)
{
context.addWarn("InvalidParams: Skipping invalid file part index: " + filePartsStrs[i]);
}
}
fileParts = filePartList.toArray(new DataPartition[0]);
}
Runnable[] tasks = null;
try
{
switch (format)
{
case THOR:
tasks = createReadTestTasks(fileParts, recordDef, context, readRequestSize, readRequestDelay);
break;
case PARQUET:
default:
throw new Exception("Error unsupported format specified: " + format);
};
}
catch (Exception e)
{
context.addError("Error while attempting to create read tasks for file: '" + datasetName + "': " + e.getMessage());
return;
}
try
{
executeTasks(tasks, numThreads, context);
}
catch (Exception e)
{
context.addError("Error while attempting to execute read tasks for file: '" + datasetName + "': " + e.getMessage());
return;
}
if (context.hasError())
{
return;
}
try
{
String fileName = file.getFileName().replace(":","_");
String filePath = outputPath + File.separator + fileName + ".meta";
FileOutputStream metaFile = new FileOutputStream(filePath);
String metaStr = RecordDefinitionTranslator.toJsonRecord(file.getRecordDefinition()).toString();
metaFile.write(metaStr.getBytes());
metaFile.close();
}
catch (Exception e)
{
context.addError("Error while attempting to write meta-data for file: '" + datasetName + "': " + e.getMessage());
return;
}
context.endOperation();
}
private static void performCopy(String[] args, TaskContext context)
{
Options options = getCopyOptions();
CommandLineParser parser = new DefaultParser();
CommandLine cmd = null;
try
{
cmd = parser.parse(options, args);
}
catch (ParseException e)
{
System.out.println("Error parsing commandline options:\n" + e.getMessage());
return;
}
int numThreads = NUM_DEFAULT_THREADS;
String numThreadsStr = cmd.getOptionValue("num_threads", "" + numThreads);
try
{
numThreads = Integer.parseInt(numThreadsStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for num_threads: "
+ numThreadsStr + ", must be an integer. Defaulting to: " + NUM_DEFAULT_THREADS + " threads.");
}
String[] creds = getCredentials(cmd);
String user = creds[0];
String pass = creds[1];
applyGlobalConfig(cmd);
String destClusterName = cmd.getOptionValue("dest_cluster");
String srcURL = cmd.getOptionValue("url");
String destURL = cmd.getOptionValue("dest_url");
if (destURL == null)
{
destURL = srcURL;
}
Connection destConn = null;
try
{
destConn = new Connection(destURL);
}
catch (Exception e)
{
System.out.println("Error while attempting to construct connection: " + e.getMessage());
return;
}
destConn.setCredentials(user, pass);
Platform platform = Platform.get(destConn);
HPCCWsClient wsclient = null;
try
{
wsclient = platform.checkOutHPCCWsClient();
}
catch (Exception e)
{
System.out.println("Error while attempting to connect to platform: " + e.getMessage());
return;
}
HPCCWsDFUClient dfuClient = wsclient.getWsDFUClient();
String[] copyPairs = cmd.getOptionValues("copy");
if ((copyPairs.length % 2) != 0)
{
System.out.println("Error copy operation must specify both a source and destination file pairs separated by a space.");
return;
}
String filter = cmd.getOptionValue("filter");
boolean ignoreTLK = cmd.hasOption("ignore_tlk");
context.readRetries = getReadRetries(cmd);
context.socketOpTimeoutMS = getSocketOpTimeoutMS(cmd);
context.initialReadSizeKB = getInitialReadSizeKB(cmd);
for (int i = 0; i < copyPairs.length; i+=2)
{
String srcFile = copyPairs[i];
String destFile = copyPairs[i+1];
context.startOperation("FileUtility.Copy_ " + srcFile + " -> " + destFile);
context.setCurrentOperationSpanAttributes(Attributes.of(AttributeKey.stringKey("server.src.url"), srcURL,
AttributeKey.stringKey("server.dest.url"), destURL));
HPCCFile file = null;
try
{
file = new HPCCFile(srcFile, srcURL, user, pass);
}
catch (Exception e)
{
context.addError("Error while attempting to open file: '" + srcFile + "': " + e.getMessage());
return;
}
file.setUseTLK(!ignoreTLK);
if (filter != null)
{
try
{
file.setFilter(filter);
}
catch (Exception e)
{
String error = "Error while attempting to set filter for: '" + srcFile + "': " + e.getMessage();
context.addError(error);
return;
}
}
DataPartition[] srcFileParts = null;
try
{
srcFileParts = file.getFileParts();
}
catch (HpccFileException e)
{
context.addError("Error while retrieving file parts for: '" + srcFile + "': " + e.getMessage());
return;
}
boolean shouldRedistribute = true;
if (!shouldRedistribute)
{
int expirySecs = 300;
DFUCreateFileWrapper createResult = null;
String eclRecordDefn = null;
try
{
eclRecordDefn = RecordDefinitionTranslator.toECLRecord(file.getRecordDefinition());
createResult = dfuClient.createFile(destFile, destClusterName, eclRecordDefn,
expirySecs, false, DFUFileTypeWrapper.Flat, "");
}
catch (Exception e)
{
context.addError("Error while attempting to start file creation for: '" + destFile + "': " + e.getMessage());
return;
}
Runnable[] tasks = null;
try
{
tasks = createNonRedistributingCopyTasks(file, createResult, context);
}
catch (Exception e)
{
context.addError("Error while attempting to create copy tasks for file: '" + srcFile + "': " + e.getMessage());
return;
}
try
{
executeTasks(tasks, numThreads, context);
}
catch (Exception e)
{
context.addError("Error while attempting to execute copy tasks for file: '" + srcFile + "': " + e.getMessage());
return;
}
if (context.hasError())
{
return;
}
try
{
long bytesWritten = context.getCurrentOperation().bytesWritten.get();
long recordsWritten = context.getCurrentOperation().recordsWritten.get();
dfuClient.publishFile(createResult.getFileID(), eclRecordDefn, recordsWritten, bytesWritten, true);
}
catch (Exception e)
{
context.addError("Error while attempting to publish file: '" + destFile + "': " + e.getMessage());
return;
}
}
else
{
String readArgs[] = {"-read", srcFile, "-url", srcURL,
"-format", "thor", "-user", user, "-pass", pass,
"-out", "tmp-read"};
performRead(readArgs, context);
String writeArgs[] = {"-write", "tmp-read" + File.separator + srcFile.replace(':', '_') + "*" + " " + destFile,
"-url", srcURL, "-dest_url", destURL,
"-dest_cluster", destClusterName,
"-user", user, "-pass", pass };
performWrite(writeArgs, context);
}
context.endOperation();
}
}
private static void performWrite(String[] args, TaskContext context)
{
Options options = getWriteOptions();
CommandLineParser parser = new DefaultParser();
CommandLine cmd = null;
try
{
cmd = parser.parse(options, args);
}
catch (ParseException e)
{
System.out.println("Error parsing commandline options:\n" + e.getMessage());
return;
}
int numThreads = NUM_DEFAULT_THREADS;
String numThreadsStr = cmd.getOptionValue("num_threads", "" + numThreads);
try
{
numThreads = Integer.parseInt(numThreadsStr);
}
catch(Exception e)
{
System.out.println("Invalid option value for num_threads: "
+ numThreadsStr + ", must be an integer. Defaulting to: " + NUM_DEFAULT_THREADS + " threads.");
}
String[] creds = getCredentials(cmd);
String user = creds[0];
String pass = creds[1];
applyGlobalConfig(cmd);
String destClusterName = cmd.getOptionValue("dest_cluster");
String srcURL = cmd.getOptionValue("url");
String destURL = cmd.getOptionValue("dest_url");
if (destURL == null)
{
destURL = srcURL;
}
Connection destConn = null;
try
{
destConn = new Connection(destURL);
}
catch (Exception e)
{
System.out.println("Error while attempting to construct connection: " + e.getMessage());
return;
}
destConn.setCredentials(user, pass);
Platform platform = Platform.get(destConn);
HPCCWsClient wsclient = null;
try
{
wsclient = platform.checkOutHPCCWsClient();
}
catch (Exception e)
{
System.out.println("Error while attempting to connect to platform: " + e.getMessage());
return;
}
HPCCWsDFUClient dfuClient = wsclient.getWsDFUClient();
String[] writePairs = cmd.getOptionValues("write");
if ((writePairs.length % 2) != 0)
{
System.out.println("Error write operation must specify both a source and destination file pairs separated by a space.");
return;
}
for (int pairIdx = 0; pairIdx < writePairs.length; pairIdx += 2)
{
String srcFile = writePairs[pairIdx];
String destFile = writePairs[pairIdx+1];
context.startOperation( "FileUtility.Write_" + srcFile + "_to_" + destFile);
Attributes attributes = Attributes.of(AttributeKey.stringKey("server.url"), destURL);
context.setCurrentOperationSpanAttributes(attributes);
SplitTable[] splitTables = null;
String[] srcFiles = null;
FileFormat format = FileFormat.THOR;
FieldDef recordDefinition = null;
try
{
srcFiles = findFilesMatching(srcFile);
format = getFormat(srcFiles);
recordDefinition = getRecordDefinition(srcFiles, format);
splitTables = getSplitTables(srcFiles, format);
srcFiles = filterFilesByFormat(srcFiles, format);
}
catch (Exception e)
{
context.addError("Error while constructing source file list: " + e.getMessage());
return;
}
Arrays.sort(srcFiles);
if (srcFiles.length == 0)
{
context.addError("Error no files matching: " + srcFile);
return;
}
boolean needToCreateSplitTable = (splitTables == null || splitTables.length == 0) && format == FileFormat.THOR;
if (needToCreateSplitTable)
{
Runnable[] tasks = null;
try
{
splitTables = new SplitTable[srcFiles.length];
tasks = createThorSplitTableTasks(srcFiles, splitTables, recordDefinition, context);
}
catch (Exception e)
{
context.addError("Error while attempting to create split table creation tasks for file: '" + srcFile + "': " + e.getMessage());
return;
}
try
{
executeTasks(tasks, numThreads, context);
}
catch (Exception e)
{
context.addError("Error while attempting to execute create split table creation tasks for file: '" + srcFile + "': " + e.getMessage());
return;
}
}
if (needToCreateSplitTable)
{
try
{
String fileName = srcFiles[0].substring(0,srcFiles[0].lastIndexOf('.'));
String filePath = fileName + ".split";
FileOutputStream splitFileOut = new FileOutputStream(filePath);
SplitFile splitFile = new SplitFile(splitTables);
splitFile.save(splitFileOut);
splitFileOut.close();
}
catch (Exception e)
{
context.addError("Error while attempting to write split table file for dataset: '" + srcFile + "': " + e.getMessage());
return;
}
}
int expirySecs = 300;
DFUCreateFileWrapper createResult = null;
String eclRecordDefn = null;
try
{
eclRecordDefn = RecordDefinitionTranslator.toECLRecord(recordDefinition);
createResult = dfuClient.createFile(destFile, destClusterName, eclRecordDefn,
expirySecs, false, DFUFileTypeWrapper.Flat, "");
}
catch (Exception e)
{
context.addError("Error while attempting to start file creation for: '" + destFile + "': " + e.getMessage());
return;
}
Runnable[] tasks = null;
try
{
tasks = createWriteTasks(srcFiles, splitTables, recordDefinition, format, createResult, context);
}
catch (Exception e)
{
context.addError("Error while attempting to create write tasks for file: '" + srcFile + "': " + e.getMessage());
return;
}
try
{
executeTasks(tasks, numThreads, context);
}
catch (Exception e)
{
context.addError("Error while attempting to execute write tasks for file: '" + srcFile + "': " + e.getMessage());
return;
}
if (context.hasError())
{
return;
}
try
{
long bytesWritten = context.getCurrentOperation().bytesWritten.get();
long recordsWritten = context.getCurrentOperation().recordsWritten.get();
dfuClient.publishFile(createResult.getFileID(), eclRecordDefn, recordsWritten, bytesWritten, true);
}
catch (Exception e)
{
context.addError("Error while attempting to publish file: '" + destFile + "': " + e.getMessage());
return;
}
context.endOperation();
}
}
/**
* Executes an operation based on the provided args.
* @param args Operation args
* @return JSONArray
*/
public static JSONArray run(String[] args)
{
if (otelNeedsInit)
{
if (Boolean.getBoolean("otel.java.global-autoconfigure.enabled"))
{
System.out.println("OpenTelemetry autoconfiguration enabled with following values.");
System.out.println("If any of these options are not provided, they will defalt to values which could require additional CLASSPATH dependancies.");
System.out.println("If missing dependancies arise, utility will halt!");
System.out.println(" otel.traces.exporter sys property: " + System.getProperty("otel.traces.exporter"));
System.out.println(" OTEL_TRACES_EXPORTER Env var: " + System.getenv("OTEL_TRACES_EXPORTER"));
System.out.println(" OTEL_TRACES_SAMPLER Env var: " + System.getenv("OTEL_TRACES_SAMPLER"));
System.out.println(" otel.traces.sampler sys property: " + System.getProperty("otel.traces.sampler"));
System.out.println(" otel.logs.exporter: "+ System.getProperty("otel.logs.exporter"));
System.out.println(" OTEL_LOGS_EXPORTER Env var: " + System.getenv("OTEL_LOGS_EXPORTER"));
System.out.println(" otel.metrics.exporter: "+ System.getProperty("otel.metrics.exporter"));
System.out.println(" OTEL_METRICS_EXPORTER Env var: " + System.getenv("OTEL_METRICS_EXPORTER"));
if (!org.hpccsystems.ws.client.utils.Utils.isOtelJavaagentUsed())
{
AutoConfiguredOpenTelemetrySdk.initialize().getOpenTelemetrySdk();
}
}
otelNeedsInit = false;
}
Options options = getTopLevelOptions();
CommandLineParser parser = new DefaultParser();
CommandLine cmd = null;
try
{
boolean stopAtNonOption = false;
// Stop at non-option doesn't seem to work 1.5, so we are only taking the first arg to prevent unknown option exception
String[] truncatedArgs = new String[1];
truncatedArgs[0] = args[0];
cmd = parser.parse(options, truncatedArgs, stopAtNonOption);
}
catch (ParseException e)
{
System.out.println("Error parsing commandline options:\n" + e.getMessage());
return new JSONArray();
}
TaskContext context = new TaskContext();
if (cmd.hasOption("read"))
{
performRead(args, context);
}
else if (cmd.hasOption("read_test"))
{
performReadTest(args, context);
}
else if (cmd.hasOption("copy"))
{
performCopy(args, context);
}
else if (cmd.hasOption("write"))
{
performWrite(args, context);
}
// If we are still in the middle of an operation there was a failure
if (context.hasCurrentOperation())
{
boolean succeded = false;
context.endOperation(succeded);
}
return context.generateResultsMessage();
}
public static void main(String[] args)
{
JSONArray results = run(args);
System.out.println("Results:\n--------------------------------------------------\n");
System.out.println(results.toString(2));
return;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy