org.hpccsystems.dfs.client.HPCCFile Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dfsclient Show documentation
Show all versions of dfsclient Show documentation
Client interface into HPCC Systems' Distributed File System.
The newest version!
/*
* ##############################################################################
*
* HPCC SYSTEMS software Copyright (C) 2018 HPCC Systems®.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
* ##############################################################################
*/
package org.hpccsystems.dfs.client;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.UUID;
import java.util.Arrays;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.hpccsystems.commons.ecl.FieldDef;
import org.hpccsystems.commons.ecl.FileFilter;
import org.hpccsystems.commons.ecl.HpccSrcType;
import org.hpccsystems.commons.ecl.RecordDefinitionTranslator;
import org.hpccsystems.commons.errors.HpccFileException;
import org.hpccsystems.dfs.cluster.ClusterRemapper;
import org.hpccsystems.dfs.cluster.RemapInfo;
import org.hpccsystems.ws.client.HPCCWsDFUClient;
import org.hpccsystems.ws.client.utils.Connection;
import org.hpccsystems.ws.client.wrappers.ArrayOfEspExceptionWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileAccessInfoWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileDetailWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileTypeWrapper;
import org.json.JSONObject;
/**
* Access to file content on a collection of one or more HPCC Systems clusters.
*
*/
public class HPCCFile implements Serializable
{
static private final long serialVersionUID = 1L;
private static final Logger log = LogManager.getLogger(HPCCFile.class);
private DataPartition[] dataParts;
private DataPartition tlkPartition = null;
private boolean useTLK = true;
private boolean readBlobs = true;
private PartitionProcessor partitionProcessor = null;
private long dataPartsCreationTimeMS = -1;
private FieldDef recordDefinition;
private FieldDef projectedRecordDefinition;
private boolean isIndex = false;
static private final int DEFAULT_ACCESS_EXPIRY_SECONDS = 120;
private int fileAccessExpirySecs = DEFAULT_ACCESS_EXPIRY_SECONDS;
private transient Connection espConnInfo;
private String fileName;
private String targetfilecluster = "";
private RemapInfo clusterRemapInfo = new RemapInfo();
private FileFilter filter;
private ColumnPruner columnPruner;
private DFUFileDetailWrapper originalFileMetadata = null;
/**
* Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
* the ESP named by the Connection.
*
* @param fileName
* The HPCC file name
* @param espconninfo
* The ESP connection info (protocol,address,port,user,pass)
* @throws HpccFileException
* the hpcc file exception
*/
public HPCCFile(String fileName, Connection espconninfo) throws HpccFileException
{
this(fileName, espconninfo, "", "", new RemapInfo(), 0, "");
}
/**
* Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
* the ESP named by the Connection.
*
* @param fileName
* The HPCC file name
* @param connectionString
* to eclwatch. Format: {http|https}://{HOST}:{PORT}.
* @param user
* the user
* @param pass
* the pass
* @throws MalformedURLException
* the malformed URL exception
* @throws HpccFileException
* the hpcc file exception
*/
public HPCCFile(String fileName, String connectionString, String user, String pass) throws MalformedURLException, HpccFileException
{
this(fileName, new Connection(connectionString));
espConnInfo.setUserName(user);
espConnInfo.setPassword(pass);
}
/**
* Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
* the ESP named by the IP address and re-maps the address information for the THOR nodes to visible addresses when
* the THOR clusters are virtual.
*
* @param fileName
* The HPCC file name
* @param espconninfo
* the espconninfo
* @param targetColumnList
* a comma separated list of column names in dotted notation for columns within compound columns.
* @param filter
* a file filter to select records of interest (SQL where syntax)
* @param remap_info
* address and port re-mapping info for THOR cluster
* @param maxParts
* optional the maximum number of partitions or zero for no max
* @param targetfilecluster
* optional - the hpcc cluster the target file resides in
* @throws HpccFileException
* the hpcc file exception
*/
public HPCCFile(String fileName, Connection espconninfo, String targetColumnList, String filter, RemapInfo remap_info, int maxParts,
String targetfilecluster) throws HpccFileException
{
this(fileName, espconninfo, targetColumnList, filter, remap_info, maxParts, targetfilecluster, true);
}
/**
* Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
* the ESP named by the IP address and re-maps the address information for the THOR nodes to visible addresses when
* the THOR clusters are virtual.
*
* @param fileName
* The HPCC file name
* @param espconninfo
* the espconninfo
* @param targetColumnList
* a comma separated list of column names in dotted notation for columns within compound columns.
* @param filter
* a file filter to select records of interest (SQL where syntax)
* @param remap_info
* address and port re-mapping info for THOR cluster
* @param maxParts
* optional the maximum number of partitions or zero for no max
* @param targetfilecluster
* optional - the hpcc cluster the target file resides in
* @param useTLK
* optional - whether or not the top level key should be used to help filter index files
* @throws HpccFileException
* the hpcc file exception
*/
public HPCCFile(String fileName, Connection espconninfo, String targetColumnList, String filter, RemapInfo remap_info, int maxParts,
String targetfilecluster, boolean useTLK) throws HpccFileException
{
this.fileName = fileName;
this.recordDefinition = null;
this.projectedRecordDefinition = null;
this.columnPruner = new ColumnPruner(targetColumnList);
this.espConnInfo = espconninfo;
this.useTLK = useTLK;
try
{
if (filter != null && !filter.isEmpty())
this.filter = new FileFilter(filter);
}
catch (Exception e)
{
throw new HpccFileException("Could not create HPCCFile due to invalid FileFilter", e);
}
clusterRemapInfo = remap_info;
this.targetfilecluster = targetfilecluster;
}
/**
* Extracts the file part from a fileposition value.
*
* @param fpos file position
* @return the project list
*/
public static int getFilePartFromFPos(long fpos)
{
// Last 16 bits store information on file part & if this is a local fpos
return (int) ((fpos >>> 48L) & 0x7fffL);
}
/**
* Extracts the offset in the file part from a fileposition value.
*
* @param fpos file position
* @return the project list
*/
public static long getOffsetFromFPos(long fpos)
{
// First 48 bits store the offset
return fpos & 0xffffffffffffL;
}
/**
* Gets the project list.
*
* @return the project list
*/
public String getProjectList()
{
return columnPruner.getFieldListString();
}
/**
* Sets the project list.
*
* @param projectList
* the project list
* @return the HPCC file
* @throws Exception
* the exception
*/
public HPCCFile setProjectList(String projectList) throws Exception
{
this.columnPruner = new ColumnPruner(projectList);
if (this.recordDefinition != null)
{
updateProjectedRecordDef();
}
return this;
}
private void updateProjectedRecordDef() throws Exception
{
this.projectedRecordDefinition = this.columnPruner.pruneRecordDefinition(this.recordDefinition);
// By default project all sub-integer types to standard integers and all blobs to non-blobs
for (int i = 0; i < this.projectedRecordDefinition.getNumDefs(); i++)
{
FieldDef field = this.projectedRecordDefinition.getDef(i);
if (field.isNonStandardInt())
{
field.setSourceType(HpccSrcType.LITTLE_ENDIAN);
}
// Project blobs to non-blobs, otherwise we will only get back the file position of the blob
if (readBlobs && field.isBlob())
{
field.setIsBlob(false);
}
}
}
/**
* Gets the file access expiry secs.
*
* @return initial file access expiry in seconds
*/
public int getFileAccessExpirySecs()
{
return fileAccessExpirySecs;
}
/**
* Sets the file access expiry secs.
*
* @param fileAccessExpirySecs
* initial access to a file is granted for a period of time. This param can change the duration of that
* file access.
* @return this HPCCFile
*/
public HPCCFile setFileAccessExpirySecs(int fileAccessExpirySecs)
{
this.fileAccessExpirySecs = fileAccessExpirySecs;
// Force the data parts to be recreated
this.dataParts = null;
return this;
}
/**
* Gets the targetfilecluster.
*
* @return the targetfilecluster
*/
public String getTargetfilecluster()
{
return targetfilecluster;
}
/**
* Sets the targetfilecluster.
*
* @param targetfilecluster
* sets the target file cluster
* @return this HPCCFile
*/
public HPCCFile setTargetfilecluster(String targetfilecluster)
{
this.targetfilecluster = targetfilecluster;
// Force the data parts to be recreated
this.dataParts = null;
return this;
}
/**
* Gets the cluster remap info.
*
* @return the cluster remap info
*/
public RemapInfo getClusterRemapInfo()
{
return clusterRemapInfo;
}
/**
* Sets the cluster remap info.
*
* @param remapinfo
* the remapinfo
* @return this HPCCFile
*/
public HPCCFile setClusterRemapInfo(RemapInfo remapinfo)
{
this.clusterRemapInfo = remapinfo;
// Force the data parts to be recreated
this.dataParts = null;
return this;
}
/**
* Get the value of useTLK option
*
* @return a boolean value indicating use of the TLK to filter index file reads
*/
public boolean getUseTLK()
{
return this.useTLK;
}
/**
* Sets the useTLK option.
* Note: the value must be set before querying any data from the file, including record definition information.
*
* @param useTLK should the TLK be used to filter index file reads
*
* @return this HPCCFile
*/
public HPCCFile setUseTLK(boolean useTLK)
{
this.useTLK = useTLK;
// Force the data parts to be re-created
this.dataParts = null;
return this;
}
/**
* Sets the read blobs options
* Note: Blobs are read by default, on older HPCC systems reading blobs can cause issues reading blobs should be disabled for these systems.
*
* @param readBlobs should blobs be read
*
* @return this file
*/
public HPCCFile setReadBlobs(boolean readBlobs)
{
this.readBlobs = readBlobs;
// Force the data parts to be re-created
this.dataParts = null;
return this;
}
/**
* Gets the filter.
*
* @return the filter
*/
public FileFilter getFilter()
{
return filter;
}
/**
* Sets the filter.
*
* @param filterexpression
* - uses SQL 'where' syntax
* @return this HPCCFile
* @throws Exception
* the exception
*/
public HPCCFile setFilter(String filterexpression) throws Exception
{
setFilter(new FileFilter(filterexpression));
return this;
}
/**
* Sets the filter.
*
* @param filefilter the filter
* @return this HPCCFile
* @throws Exception
* the exception
*/
public HPCCFile setFilter(FileFilter filefilter) throws Exception
{
filter = filefilter;
if (this.dataParts != null)
{
for (int i = 0; i < this.dataParts.length; i++)
{
this.dataParts[i].setFilter(filter);
}
}
return this;
}
/**
* Gets the file name.
*
* @return the file name
*/
public String getFileName()
{
return fileName;
}
/**
* Creates the data parts.
*
* @throws HpccFileException
* the hpcc file exception
*/
private void createDataParts() throws HpccFileException
{
long fileAccessExpiryMS = fileAccessExpirySecs * 1000;
long dataPartsAgeMS = System.currentTimeMillis() - dataPartsCreationTimeMS;
boolean accessTokenExpired = dataPartsAgeMS >= fileAccessExpiryMS;
if (dataParts != null)
{
if (accessTokenExpired)
log.info("Refreshing data parts due to access token expiration.");
else
return;
}
dataPartsCreationTimeMS = System.currentTimeMillis();
HPCCWsDFUClient dfuClient = HPCCWsDFUClient.get(espConnInfo);
if (dfuClient.hasInitError())
{
String errmesg = "Could not fetch '" + fileName + "' info from WsDFU ESP due to wsdfuclient init error: " + dfuClient.getInitError();
log.error(errmesg);
throw new HpccFileException(errmesg);
}
String originalRecDefInJSON = "";
DFUFileAccessInfoWrapper fileinfoforread = null;
try
{
fileinfoforread = fetchReadFileInfo(fileName, dfuClient, fileAccessExpirySecs, targetfilecluster);
this.isIndex = fileinfoforread.getFileType().isIndex();
originalRecDefInJSON = fileinfoforread.getRecordTypeInfoJson();
if (originalRecDefInJSON == null)
{
throw new Exception("File record definiton returned from ESP was null");
}
}
catch (Exception e)
{
log.error("Unable to retrieve file or record information: " + e.getMessage());
throw new HpccFileException("Unable to retrieve file or record information: " + e.getMessage(), e);
}
DataPartition.FileType fileType = DataPartition.FileType.FLAT;
try
{
fileType = DataPartition.FileType.fromWrappedFileType(fileinfoforread.getFileType());
}
catch (Exception e)
{
throw new HpccFileException(e);
}
try
{
if (fileinfoforread.getNumParts() > 0)
{
ClusterRemapper clusterremapper = ClusterRemapper.makeMapper(clusterRemapInfo, fileinfoforread);
this.dataParts = DataPartition.createPartitions(fileinfoforread.getFileParts(), clusterremapper,
/* maxParts currently ignored anyway */0, filter, fileinfoforread.getFileAccessInfoBlob(), fileType,this.getFileName());
// Check to see if this file has a TLK. The TLK will always be the last partition.
// If we do have a TLK remove it from the standard list of data partitions.
if (this.isIndex())
{
DataPartition lastPart = this.dataParts[this.dataParts.length-1];
if (lastPart.isTLK())
{
this.tlkPartition = lastPart;
this.dataParts = Arrays.copyOfRange(this.dataParts,0,this.dataParts.length-1);
}
}
this.recordDefinition = RecordDefinitionTranslator.parseJsonRecordDefinition(new JSONObject(originalRecDefInJSON));
if (this.useTLK)
{
try
{
this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, this.tlkPartition);
}
catch (Exception e)
{
log.error("Error while constructing partition processor, reading will continue without partition filtering: " + e.getMessage());
this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, null);
}
}
else
{
this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, null);
}
updateProjectedRecordDef();
}
else
throw new HpccFileException("Could not fetch metadata for file: '" + fileName + "'");
}
catch (Exception e)
{
StringBuilder sb = new StringBuilder();
sb.append("Failed to acquire file access or retrieve meta info for: '").append(fileName).append("'");
sb.append(" with error: " + e.getMessage());
throw new HpccFileException(sb.toString(), e);
}
}
/**
* The partitions for the file residing on an HPCC cluster. If a filter has been set on an index file
* Only the partitions matching the filter will be returned.
*
* @return the file parts
* @throws HpccFileException
* the hpcc file exception
*/
public DataPartition[] getFileParts() throws HpccFileException
{
return findMatchingPartitions(this.filter).toArray(new DataPartition[0]);
}
/**
* Return the list of partitions with records matching the provided filter
*
* @param filter the filter
* @return the file parts
* @throws HpccFileException the exception
*/
public List findMatchingPartitions(FileFilter filter) throws HpccFileException
{
createDataParts();
List matchedPartitions = this.partitionProcessor.findMatchingPartitions(filter);
return matchedPartitions;
}
public PartitionProcessor getPartitionProcessor()
{
return this.partitionProcessor;
}
/**
* The record definition for a file on an HPCC cluster.
*
* @return the record definition
* @throws HpccFileException
* the hpcc file exception
*/
public final FieldDef getRecordDefinition() throws HpccFileException
{
createDataParts();
return recordDefinition;
}
/**
* Whether the file is an index with a tlk partition
*
* @return true if the file is an index and has a tlk partition, false otherwise
* @throws HpccFileException
* the hpcc file exception
*/
public final boolean isTlkIndex() throws HpccFileException
{
createDataParts();
return this.tlkPartition != null;
}
/**
* The record definition for a file on an HPCC cluster.
*
* @return the projected record definition
* @throws HpccFileException
* the hpcc file exception
*/
public final FieldDef getProjectedRecordDefinition() throws HpccFileException
{
createDataParts();
return projectedRecordDefinition;
}
/**
* Is this an index?.
*
* @return true if yes
*/
public boolean isIndex()
{
return this.isIndex;
}
/**
* Fetch read file info.
*
* @param fileName
* the file name
* @param hpccClient
* the hpcc client
* @param expirySeconds
* the expiry seconds
* @param clusterName
* the cluster name
* @return the DFU file access info wrapper
* @throws Exception
* the exception
* @throws ArrayOfEspExceptionWrapper
* the array of esp exception wrapper
*/
private static DFUFileAccessInfoWrapper fetchReadFileInfo(String fileName, HPCCWsDFUClient hpccClient, int expirySeconds, String clusterName)
throws Exception, ArrayOfEspExceptionWrapper
{
String uniqueID = "HPCC-FILE: " + UUID.randomUUID().toString();
return hpccClient.getFileAccess(fileName, clusterName, expirySeconds, uniqueID);
}
/**
* Acquire read file access.
*
* @param fileName
* the file name
* @param hpccClient
* the hpcc client
* @param expirySeconds
* the expiry seconds
* @param clusterName
* the cluster name
* @return the string
* @throws Exception
* the exception
* @throws ArrayOfEspExceptionWrapper
* the array of esp exception wrapper
*/
private static String acquireReadFileAccess(String fileName, HPCCWsDFUClient hpccClient, int expirySeconds, String clusterName)
throws Exception, ArrayOfEspExceptionWrapper
{
return acquireFileAccess(fileName, hpccClient, expirySeconds, clusterName);
}
/**
* Acquire write file access.
*
* @param fileName
* the file name
* @param hpccClient
* the hpcc client
* @param expirySeconds
* the expiry seconds
* @param clusterName
* the cluster name
* @return the string
* @throws Exception
* the exception
* @throws ArrayOfEspExceptionWrapper
* the array of esp exception wrapper
*/
private static String acquireWriteFileAccess(String fileName, HPCCWsDFUClient hpccClient, int expirySeconds, String clusterName)
throws Exception, ArrayOfEspExceptionWrapper
{
return acquireFileAccess(fileName, hpccClient, expirySeconds, clusterName);
}
/**
* Acquire file access.
*
* @param fileName
* the file name
* @param hpcc
* the hpcc
* @param expirySeconds
* the expiry seconds
* @param clusterName
* the cluster name
* @return the string
* @throws Exception
* the exception
* @throws ArrayOfEspExceptionWrapper
* the array of esp exception wrapper
*/
private static String acquireFileAccess(String fileName, HPCCWsDFUClient hpcc, int expirySeconds, String clusterName)
throws Exception, ArrayOfEspExceptionWrapper
{
String uniqueID = "HPCC-FILE: " + UUID.randomUUID().toString();
return hpcc.getFileAccessBlob(fileName, clusterName, expirySeconds, uniqueID);
}
/**
* @return the file metadata information for this HPCCFile (if it exists)
*/
public DFUFileDetailWrapper getOriginalFileMetadata()
{
if (originalFileMetadata==null)
{
HPCCWsDFUClient dfuClient = HPCCWsDFUClient.get(espConnInfo);
if (dfuClient.hasInitError())
{
String errmesg = "Could not fetch '" + fileName + "' info from WsDFU ESP due to wsdfuclient init error: " + dfuClient.getInitError();
log.error(errmesg);
}
try
{
originalFileMetadata=dfuClient.getFileDetails(fileName,targetfilecluster);
}
catch (Exception e)
{
log.error("Unable to retrieve file or record information: " + e.getMessage(),e);
}
}
return originalFileMetadata;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy