All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hpccsystems.dfs.client.HPCCFile Maven / Gradle / Ivy

The newest version!
/*
 * ##############################################################################
 *
 * HPCC SYSTEMS software Copyright (C) 2018 HPCC Systems®.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 * ##############################################################################
 */

package org.hpccsystems.dfs.client;

import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.UUID;
import java.util.Arrays;
import java.util.List;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.hpccsystems.commons.ecl.FieldDef;
import org.hpccsystems.commons.ecl.FileFilter;
import org.hpccsystems.commons.ecl.HpccSrcType;
import org.hpccsystems.commons.ecl.RecordDefinitionTranslator;
import org.hpccsystems.commons.errors.HpccFileException;
import org.hpccsystems.dfs.cluster.ClusterRemapper;
import org.hpccsystems.dfs.cluster.RemapInfo;
import org.hpccsystems.ws.client.HPCCWsDFUClient;
import org.hpccsystems.ws.client.utils.Connection;
import org.hpccsystems.ws.client.wrappers.ArrayOfEspExceptionWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileAccessInfoWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileDetailWrapper;
import org.hpccsystems.ws.client.wrappers.wsdfu.DFUFileTypeWrapper;
import org.json.JSONObject;

/**
 * Access to file content on a collection of one or more HPCC Systems clusters.
 *
 */
public class HPCCFile implements Serializable
{
    static private final long    serialVersionUID              = 1L;

    private static final Logger  log                           = LogManager.getLogger(HPCCFile.class);

    private DataPartition[]      dataParts;
    private DataPartition        tlkPartition                  = null;
    private boolean              useTLK                        = true;
    private boolean              readBlobs                     = true;
    private PartitionProcessor   partitionProcessor            = null;
    private long                 dataPartsCreationTimeMS       = -1;

    private FieldDef             recordDefinition;
    private FieldDef             projectedRecordDefinition;
    private boolean              isIndex                       = false;
    static private final int     DEFAULT_ACCESS_EXPIRY_SECONDS = 120;
    private int                  fileAccessExpirySecs          = DEFAULT_ACCESS_EXPIRY_SECONDS;

    private transient Connection espConnInfo;
    private String               fileName;
    private String               targetfilecluster             = "";
    private RemapInfo            clusterRemapInfo              = new RemapInfo();
    private FileFilter           filter;
    private ColumnPruner         columnPruner;
    private DFUFileDetailWrapper originalFileMetadata          = null;
    /**
     * Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
     * the ESP named by the Connection.
     *
     * @param fileName
     *            The HPCC file name
     * @param espconninfo
     *            The ESP connection info (protocol,address,port,user,pass)
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public HPCCFile(String fileName, Connection espconninfo) throws HpccFileException
    {
        this(fileName, espconninfo, "", "", new RemapInfo(), 0, "");
    }

    /**
     * Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
     * the ESP named by the Connection.
     *
     * @param fileName
     *            The HPCC file name
     * @param connectionString
     *            to eclwatch. Format: {http|https}://{HOST}:{PORT}.
     * @param user
     *            the user
     * @param pass
     *            the pass
     * @throws MalformedURLException
     *             the malformed URL exception
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public HPCCFile(String fileName, String connectionString, String user, String pass) throws MalformedURLException, HpccFileException
    {
        this(fileName, new Connection(connectionString));
        espConnInfo.setUserName(user);
        espConnInfo.setPassword(pass);
    }

    /**
     * Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
     * the ESP named by the IP address and re-maps the address information for the THOR nodes to visible addresses when
     * the THOR clusters are virtual.
     *
     * @param fileName
     *            The HPCC file name
     * @param espconninfo
     *            the espconninfo
     * @param targetColumnList
     *            a comma separated list of column names in dotted notation for columns within compound columns.
     * @param filter
     *            a file filter to select records of interest (SQL where syntax)
     * @param remap_info
     *            address and port re-mapping info for THOR cluster
     * @param maxParts
     *            optional the maximum number of partitions or zero for no max
     * @param targetfilecluster
     *            optional - the hpcc cluster the target file resides in
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public HPCCFile(String fileName, Connection espconninfo, String targetColumnList, String filter, RemapInfo remap_info, int maxParts,
            String targetfilecluster) throws HpccFileException
    {
        this(fileName, espconninfo, targetColumnList, filter, remap_info, maxParts, targetfilecluster, true);
    }

    /**
     * Constructor for the HpccFile. Captures HPCC logical file information from the DALI Server for the clusters behind
     * the ESP named by the IP address and re-maps the address information for the THOR nodes to visible addresses when
     * the THOR clusters are virtual.
     *
     * @param fileName
     *            The HPCC file name
     * @param espconninfo
     *            the espconninfo
     * @param targetColumnList
     *            a comma separated list of column names in dotted notation for columns within compound columns.
     * @param filter
     *            a file filter to select records of interest (SQL where syntax)
     * @param remap_info
     *            address and port re-mapping info for THOR cluster
     * @param maxParts
     *            optional the maximum number of partitions or zero for no max
     * @param targetfilecluster
     *            optional - the hpcc cluster the target file resides in
     * @param useTLK
     *            optional - whether or not the top level key should be used to help filter index files
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public HPCCFile(String fileName, Connection espconninfo, String targetColumnList, String filter, RemapInfo remap_info, int maxParts,
            String targetfilecluster, boolean useTLK) throws HpccFileException
    {
        this.fileName = fileName;
        this.recordDefinition = null;
        this.projectedRecordDefinition = null;
        this.columnPruner = new ColumnPruner(targetColumnList);
        this.espConnInfo = espconninfo;
        this.useTLK = useTLK;

        try
        {
            if (filter != null && !filter.isEmpty())
                this.filter = new FileFilter(filter);
        }
        catch (Exception e)
        {
            throw new HpccFileException("Could not create HPCCFile due to invalid FileFilter", e);
        }

        clusterRemapInfo = remap_info;
        this.targetfilecluster = targetfilecluster;
    }

    /**
     * Extracts the file part from a fileposition value.
     *
     * @param fpos file position
     * @return the project list
     */
    public static int getFilePartFromFPos(long fpos)
    {
        // Last 16 bits store information on file part & if this is a local fpos
        return (int) ((fpos >>> 48L) & 0x7fffL);
    }

    /**
     * Extracts the offset in the file part from a fileposition value.
     *
     * @param fpos file position
     * @return the project list
     */
    public static long getOffsetFromFPos(long fpos)
    {
        // First 48 bits store the offset
        return fpos & 0xffffffffffffL;
    }

    /**
     * Gets the project list.
     *
     * @return the project list
     */
    public String getProjectList()
    {
        return columnPruner.getFieldListString();
    }

    /**
     * Sets the project list.
     *
     * @param projectList
     *            the project list
     * @return the HPCC file
     * @throws Exception
     *             the exception
     */
    public HPCCFile setProjectList(String projectList) throws Exception
    {
        this.columnPruner = new ColumnPruner(projectList);
        if (this.recordDefinition != null)
        {
            updateProjectedRecordDef();
        }
        return this;
    }

    private void updateProjectedRecordDef() throws Exception
    {
        this.projectedRecordDefinition = this.columnPruner.pruneRecordDefinition(this.recordDefinition);

        // By default project all sub-integer types to standard integers and all blobs to non-blobs
        for (int i = 0; i < this.projectedRecordDefinition.getNumDefs(); i++)
        {
            FieldDef field = this.projectedRecordDefinition.getDef(i);
            if (field.isNonStandardInt())
            {
                field.setSourceType(HpccSrcType.LITTLE_ENDIAN);
            }

            // Project blobs to non-blobs, otherwise we will only get back the file position of the blob
            if (readBlobs && field.isBlob())
            {
                field.setIsBlob(false);
            }
        }
    }

    /**
     * Gets the file access expiry secs.
     *
     * @return initial file access expiry in seconds
     */
    public int getFileAccessExpirySecs()
    {
        return fileAccessExpirySecs;
    }

    /**
     * Sets the file access expiry secs.
     *
     * @param fileAccessExpirySecs
     *            initial access to a file is granted for a period of time. This param can change the duration of that
     *            file access.
     * @return this HPCCFile
     */
    public HPCCFile setFileAccessExpirySecs(int fileAccessExpirySecs)
    {
        this.fileAccessExpirySecs = fileAccessExpirySecs;

        // Force the data parts to be recreated
        this.dataParts = null;

        return this;
    }

    /**
     * Gets the targetfilecluster.
     *
     * @return the targetfilecluster
     */
    public String getTargetfilecluster()
    {
        return targetfilecluster;
    }

    /**
     * Sets the targetfilecluster.
     *
     * @param targetfilecluster
     *            sets the target file cluster
     * @return this HPCCFile
     */
    public HPCCFile setTargetfilecluster(String targetfilecluster)
    {
        this.targetfilecluster = targetfilecluster;

        // Force the data parts to be recreated
        this.dataParts = null;

        return this;
    }

    /**
     * Gets the cluster remap info.
     *
     * @return the cluster remap info
     */
    public RemapInfo getClusterRemapInfo()
    {
        return clusterRemapInfo;
    }

    /**
     * Sets the cluster remap info.
     *
     * @param remapinfo
     *            the remapinfo
     * @return this HPCCFile
     */
    public HPCCFile setClusterRemapInfo(RemapInfo remapinfo)
    {
        this.clusterRemapInfo = remapinfo;

        // Force the data parts to be recreated
        this.dataParts = null;

        return this;
    }

    /**
     * Get the value of useTLK option
     *
     * @return a boolean value indicating use of the TLK to filter index file reads
     */
    public boolean getUseTLK()
    {
        return this.useTLK;
    }

    /**
     * Sets the useTLK option.
     * Note: the value must be set before querying any data from the file, including record definition information.
     *
     * @param useTLK should the TLK be used to filter index file reads
     *
     * @return this HPCCFile
     */
    public HPCCFile setUseTLK(boolean useTLK)
    {
        this.useTLK = useTLK;

        // Force the data parts to be re-created
        this.dataParts = null;

        return this;
    }

    /**
     * Sets the read blobs options
     * Note: Blobs are read by default, on older HPCC systems reading blobs can cause issues reading blobs should be disabled for these systems.
     *
     * @param readBlobs should blobs be read
     *
     * @return this file
     */
    public HPCCFile setReadBlobs(boolean readBlobs)
    {
        this.readBlobs = readBlobs;

        // Force the data parts to be re-created
        this.dataParts = null;

        return this;
    }

    /**
     * Gets the filter.
     *
     * @return the filter
     */
    public FileFilter getFilter()
    {
        return filter;
    }

    /**
     * Sets the filter.
     *
     * @param filterexpression
     *            - uses SQL 'where' syntax
     * @return this HPCCFile
     * @throws Exception
     *             the exception
     */
    public HPCCFile setFilter(String filterexpression) throws Exception
    {
        setFilter(new FileFilter(filterexpression));

        return this;
    }

    /**
     * Sets the filter.
     *
     * @param filefilter the filter
     * @return this HPCCFile
     * @throws Exception
     *             the exception
     */
    public HPCCFile setFilter(FileFilter filefilter) throws Exception
    {
        filter = filefilter;

        if (this.dataParts != null)
        {
            for (int i = 0; i < this.dataParts.length; i++)
            {
                this.dataParts[i].setFilter(filter);
            }
        }

        return this;
    }

    /**
     * Gets the file name.
     *
     * @return the file name
     */
    public String getFileName()
    {
        return fileName;
    }

    /**
     * Creates the data parts.
     *
     * @throws HpccFileException
     *             the hpcc file exception
     */
    private void createDataParts() throws HpccFileException
    {
        long fileAccessExpiryMS = fileAccessExpirySecs * 1000;
        long dataPartsAgeMS = System.currentTimeMillis() - dataPartsCreationTimeMS;
        boolean accessTokenExpired = dataPartsAgeMS >= fileAccessExpiryMS;
        if (dataParts != null)
        {
            if (accessTokenExpired)
                log.info("Refreshing data parts due to access token expiration.");
            else
                return;
        }

        dataPartsCreationTimeMS = System.currentTimeMillis();

        HPCCWsDFUClient dfuClient = HPCCWsDFUClient.get(espConnInfo);
        if (dfuClient.hasInitError())
        {
            String errmesg = "Could not fetch '" + fileName + "' info from WsDFU ESP due to wsdfuclient init error: " + dfuClient.getInitError();
            log.error(errmesg);
            throw new HpccFileException(errmesg);
        }

        String originalRecDefInJSON = "";
        DFUFileAccessInfoWrapper fileinfoforread = null;
        try
        {
            fileinfoforread = fetchReadFileInfo(fileName, dfuClient, fileAccessExpirySecs, targetfilecluster);
            this.isIndex = fileinfoforread.getFileType().isIndex();

            originalRecDefInJSON = fileinfoforread.getRecordTypeInfoJson();
            if (originalRecDefInJSON == null)
            {
                throw new Exception("File record definiton returned from ESP was null");
            }
        }
        catch (Exception e)
        {
            log.error("Unable to retrieve file or record information: " + e.getMessage());
            throw new HpccFileException("Unable to retrieve file or record information: " + e.getMessage(), e);
        }

        DataPartition.FileType fileType = DataPartition.FileType.FLAT;
        try
        {
            fileType = DataPartition.FileType.fromWrappedFileType(fileinfoforread.getFileType());
        }
        catch (Exception e)
        {
            throw new HpccFileException(e);
        }

        try
        {
            if (fileinfoforread.getNumParts() > 0)
            {
                ClusterRemapper clusterremapper = ClusterRemapper.makeMapper(clusterRemapInfo, fileinfoforread);
                this.dataParts = DataPartition.createPartitions(fileinfoforread.getFileParts(), clusterremapper,
                        /* maxParts currently ignored anyway */0, filter, fileinfoforread.getFileAccessInfoBlob(), fileType,this.getFileName());

                // Check to see if this file has a TLK. The TLK will always be the last partition.
                // If we do have a TLK remove it from the standard list of data partitions.
                if (this.isIndex())
                {
                    DataPartition lastPart = this.dataParts[this.dataParts.length-1];
                    if (lastPart.isTLK())
                    {
                        this.tlkPartition = lastPart;
                        this.dataParts = Arrays.copyOfRange(this.dataParts,0,this.dataParts.length-1);
                    }
                }

                this.recordDefinition = RecordDefinitionTranslator.parseJsonRecordDefinition(new JSONObject(originalRecDefInJSON));

                if (this.useTLK)
                {
                    try
                    {
                        this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, this.tlkPartition);
                    }
                    catch (Exception e)
                    {
                        log.error("Error while constructing partition processor, reading will continue without partition filtering: " + e.getMessage());
                        this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, null);
                    }
                }
                else
                {
                    this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, null);
                }

                updateProjectedRecordDef();
            }
            else
                throw new HpccFileException("Could not fetch metadata for file: '" + fileName + "'");

        }
        catch (Exception e)
        {
            StringBuilder sb = new StringBuilder();
            sb.append("Failed to acquire file access or retrieve meta info for: '").append(fileName).append("'");
            sb.append(" with error: " + e.getMessage());
            throw new HpccFileException(sb.toString(), e);
        }
    }

    /**
     * The partitions for the file residing on an HPCC cluster. If a filter has been set on an index file
     * Only the partitions matching the filter will be returned.
     *
     * @return the file parts
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public DataPartition[] getFileParts() throws HpccFileException
    {
        return findMatchingPartitions(this.filter).toArray(new DataPartition[0]);
    }

    /**
     * Return the list of partitions with records matching the provided filter
     *
     * @param filter the filter
     * @return the file parts
     * @throws HpccFileException the exception
     */
    public List findMatchingPartitions(FileFilter filter) throws HpccFileException
    {
        createDataParts();
        List matchedPartitions = this.partitionProcessor.findMatchingPartitions(filter);
        return matchedPartitions;
    }

    public PartitionProcessor getPartitionProcessor()
    {
        return this.partitionProcessor;
    }

    /**
     * The record definition for a file on an HPCC cluster.
     *
     * @return the record definition
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public final FieldDef getRecordDefinition() throws HpccFileException
    {
        createDataParts();
        return recordDefinition;
    }

    /**
     * Whether the file is an index with a tlk partition
     *
     * @return true if the file is an index and has a tlk partition, false otherwise
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public final boolean isTlkIndex() throws HpccFileException
    {
        createDataParts();
        return this.tlkPartition != null;
    }
    /**
     * The record definition for a file on an HPCC cluster.
     *
     * @return the projected record definition
     * @throws HpccFileException
     *             the hpcc file exception
     */
    public final FieldDef getProjectedRecordDefinition() throws HpccFileException
    {
        createDataParts();
        return projectedRecordDefinition;
    }

    /**
     * Is this an index?.
     *
     * @return true if yes
     */
    public boolean isIndex()
    {
        return this.isIndex;
    }

    /**
     * Fetch read file info.
     *
     * @param fileName
     *            the file name
     * @param hpccClient
     *            the hpcc client
     * @param expirySeconds
     *            the expiry seconds
     * @param clusterName
     *            the cluster name
     * @return the DFU file access info wrapper
     * @throws Exception
     *             the exception
     * @throws ArrayOfEspExceptionWrapper
     *             the array of esp exception wrapper
     */
    private static DFUFileAccessInfoWrapper fetchReadFileInfo(String fileName, HPCCWsDFUClient hpccClient, int expirySeconds, String clusterName)
            throws Exception, ArrayOfEspExceptionWrapper
    {
        String uniqueID = "HPCC-FILE: " + UUID.randomUUID().toString();
        return hpccClient.getFileAccess(fileName, clusterName, expirySeconds, uniqueID);
    }

    /**
     * Acquire read file access.
     *
     * @param fileName
     *            the file name
     * @param hpccClient
     *            the hpcc client
     * @param expirySeconds
     *            the expiry seconds
     * @param clusterName
     *            the cluster name
     * @return the string
     * @throws Exception
     *             the exception
     * @throws ArrayOfEspExceptionWrapper
     *             the array of esp exception wrapper
     */
    private static String acquireReadFileAccess(String fileName, HPCCWsDFUClient hpccClient, int expirySeconds, String clusterName)
            throws Exception, ArrayOfEspExceptionWrapper
    {
        return acquireFileAccess(fileName, hpccClient, expirySeconds, clusterName);
    }

    /**
     * Acquire write file access.
     *
     * @param fileName
     *            the file name
     * @param hpccClient
     *            the hpcc client
     * @param expirySeconds
     *            the expiry seconds
     * @param clusterName
     *            the cluster name
     * @return the string
     * @throws Exception
     *             the exception
     * @throws ArrayOfEspExceptionWrapper
     *             the array of esp exception wrapper
     */
    private static String acquireWriteFileAccess(String fileName, HPCCWsDFUClient hpccClient, int expirySeconds, String clusterName)
            throws Exception, ArrayOfEspExceptionWrapper
    {
        return acquireFileAccess(fileName, hpccClient, expirySeconds, clusterName);
    }

    /**
     * Acquire file access.
     *
     * @param fileName
     *            the file name
     * @param hpcc
     *            the hpcc
     * @param expirySeconds
     *            the expiry seconds
     * @param clusterName
     *            the cluster name
     * @return the string
     * @throws Exception
     *             the exception
     * @throws ArrayOfEspExceptionWrapper
     *             the array of esp exception wrapper
     */
    private static String acquireFileAccess(String fileName, HPCCWsDFUClient hpcc, int expirySeconds, String clusterName)
            throws Exception, ArrayOfEspExceptionWrapper
    {
        String uniqueID = "HPCC-FILE: " + UUID.randomUUID().toString();
        return hpcc.getFileAccessBlob(fileName, clusterName, expirySeconds, uniqueID);
    }

    /**
     * @return the file metadata information for this HPCCFile (if it exists)
     */
    public DFUFileDetailWrapper getOriginalFileMetadata()
    {
        if (originalFileMetadata==null)
        {
            HPCCWsDFUClient dfuClient = HPCCWsDFUClient.get(espConnInfo);
            if (dfuClient.hasInitError())
            {
                String errmesg = "Could not fetch '" + fileName + "' info from WsDFU ESP due to wsdfuclient init error: " + dfuClient.getInitError();
                log.error(errmesg);
            }
            try
            {
                originalFileMetadata=dfuClient.getFileDetails(fileName,targetfilecluster);
            }
            catch (Exception e)
            {
                log.error("Unable to retrieve file or record information: " + e.getMessage(),e);
            }
        }
        return originalFileMetadata;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy