All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hpccsystems.dfs.client.PartitionProcessor Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * HPCC SYSTEMS software Copyright (C) 2020 HPCC Systems®.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *******************************************************************************/
package org.hpccsystems.dfs.client;

import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;

import java.math.BigDecimal;
import java.math.BigInteger;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

import org.hpccsystems.dfs.client.HPCCRecord;
import org.hpccsystems.dfs.client.HPCCRecordBuilder;
import org.hpccsystems.dfs.client.HpccRemoteFileReader;
import org.hpccsystems.dfs.client.DataPartition;
import org.hpccsystems.dfs.client.CompiledFieldFilter;

import org.hpccsystems.commons.ecl.FieldDef;
import org.hpccsystems.commons.ecl.FieldType;
import org.hpccsystems.commons.ecl.FieldFilter;
import org.hpccsystems.commons.ecl.FileFilter;
import org.hpccsystems.commons.ecl.FieldFilterRange;

/**
 * Uses the Top Level Key partition, if present, to determine which file parts in an index match a given FileFilter.
 */
public class PartitionProcessor 
{
    // A set field filters that are all OR'd togther
    private class CompiledFieldFilterSet
    {
        public ArrayList filters = new ArrayList();

        public String toString()
        {
            String ret = "{";
            for (CompiledFieldFilter filter : filters)
            {
                ret += filter.toString() + " ";
            }
            ret += "}";
            return ret;
        }
    };

    // A set of field filters AND'd together
    private class CompiledFileFilter
    {
        ArrayList filterSets = new ArrayList();

        CompiledFileFilter(FieldDef recordDef, FileFilter filter) throws Exception
        {
            CompiledFieldFilterSet filterSet = new CompiledFieldFilterSet();
            for (int i = 0; i < filter.getFieldFiltersCount(); i++)
            {
                FieldFilter fieldFilter = filter.getFieldFilter(i);

                int fieldIndex = recordDef.getDefIndexWithFieldName(fieldFilter.getFieldName());
                if (fieldIndex < 0)
                {
                    throw new Exception("Unable to find field definition for field: " + fieldFilter.getFieldName());
                }

                FieldType type = recordDef.getDef(fieldIndex).getFieldType();

                for (FieldFilterRange filterRange : fieldFilter.getRanges())
                {
                    filterSet.filters.add(new CompiledFieldFilter(fieldIndex, type, filterRange));
                }
            }
            filterSets.add(filterSet);

            List andFileFilters = filter.getAndFileFilters();
            for (int i = 0; i < andFileFilters.size(); i++)
            {
                filterSet = new CompiledFieldFilterSet();
                FileFilter andFilter = andFileFilters.get(i);

                for (int j = 0; j < andFilter.getFieldFiltersCount(); j++)
                {
                    FieldFilter fieldFilter = andFilter.getFieldFilter(j);

                    int fieldIndex = recordDef.getDefIndexWithFieldName(fieldFilter.getFieldName());
                    if (fieldIndex < 0)
                    {
                        throw new Exception("Unable to find field definition for field: " + fieldFilter.getFieldName());
                    }

                    FieldType type = recordDef.getDef(fieldIndex).getFieldType();
                    
                    List ranges = fieldFilter.getRanges();
                    for (FieldFilterRange range : ranges)
                    {
                        filterSet.filters.add(new CompiledFieldFilter(fieldIndex, type, range));
                    }
                }
                filterSets.add(filterSet);
            }
        }

        private boolean partitionMatchesFilter(DataPartitionRecordRange partRange)
        {
            // Filter sets are AND'd together. So if there is any unmatched field break
            boolean hasMatch = true;
            for (CompiledFieldFilterSet filterSet : filterSets)
            {
                // All filters in the set filter on the same field
                CompiledFieldFilter firstFilter = filterSet.filters.get(0);
                Object fieldRangeBegin = partRange.begin.getField(firstFilter.getFieldIndex());
                Object fieldRangeEnd = partRange.end.getField(firstFilter.getFieldIndex());

                // The field set is OR'd together. So if any filter matches break
                boolean fieldWasMatched = false;
                for (CompiledFieldFilter filter : filterSet.filters)
                {
                    fieldWasMatched = filter.matchesRange(fieldRangeBegin, fieldRangeEnd);
                    if (fieldWasMatched)
                    {
                        break;
                    }
                }

                hasMatch = hasMatch && fieldWasMatched;
                if (hasMatch == false)
                {
                    break;
                }
            }

            return hasMatch;
        }

        @Override
        public String toString()
        {
            String ret = "[\n";
            for (CompiledFieldFilterSet filterSet : filterSets)
            {
                ret += "\t" + filterSet.toString() + "\n";
            }
            
            ret += "]\n";
            return ret;
        }

    }

    private class DataPartitionRecordRange
    {
        public HPCCRecord begin = null;
        public HPCCRecord end = null;
        public DataPartition dataPartition = null;
    };

    private static final Logger log = LogManager.getLogger(PartitionProcessor.class);
    private DataPartition[] dataPartitions = null;
    private ArrayList dataPartitionRanges = new ArrayList();
    private FieldDef recordDef = null;

    PartitionProcessor(FieldDef recDef, DataPartition[] partitions, DataPartition tlkPartition) throws Exception 
    {
        this.dataPartitions = partitions;
        this.recordDef = recDef;

        if (tlkPartition == null)
        {
            log.warn("No TLK partition provided to partition processor; all partitions will be returned for all filters.");
            return;
        }

        //------------------------------------------------------------------------------
        // Read the TLK file part
        //------------------------------------------------------------------------------
        
        boolean success = false;
        int numRetries = 0;
        Exception readFailureException = null;

        HPCCRecordBuilder recordBuilder = new HPCCRecordBuilder(recDef);
        ArrayList tlkRecords = new ArrayList(partitions.length);
        
        while (numRetries < 3 && success == false)
        {
            tlkRecords.clear();
            try
            {
                HpccRemoteFileReader fileReader = new HpccRemoteFileReader(tlkPartition, recDef, recordBuilder);
                while (fileReader.hasNext() && tlkRecords.size() < this.dataPartitions.length)
                {
                    HPCCRecord record = fileReader.next();

                    // Correct integer key fields.
                    // Signed values are coming back as expectedValue - (2^31)
                    // Max unsigned value is negative
                    for (int i = 0; i < record.getNumFields(); i++)
                    {
                        Object field = record.getField(i);
                        FieldDef fd = recordDef.getDef(i);
                        if (fd.getFieldType() == FieldType.INTEGER)
                        {
                            Long longVal = (Long) field;
                            long intBitLength = recordDef.getDef(i).getDataLen() * 8;

                            if (fd.isUnsigned())
                            {
                                if (longVal < 0)
                                {
                                    longVal = Long.MAX_VALUE;
                                }
                            }

                            record.setField(i,longVal);
                        }
                        else if (fd.getFieldType() == FieldType.STRING)
                        {
                            String strVal = (String) field;

                            // rtrim
                            strVal = strVal.replaceAll("\\s+$", "");
                            record.setField(i,strVal);
                        }
                    }

                    tlkRecords.add(record);
                }

                success = true;
                fileReader.close();
            }
            catch (Exception e)
            {
                numRetries++;
                log.error("Failed to read TLK file part retry: " + numRetries + " error: " + e.getMessage());
                readFailureException = e;
            }
        }

        if (success == false)
        {
            throw new Exception("PartitionProcessor: Error while attempting to read TLK: " + readFailureException.getMessage());
        }

        //------------------------------------------------------------------------------
        // Construct data partition ranges
        //------------------------------------------------------------------------------

        if (tlkRecords.size() != this.dataPartitions.length)
        {
            // This would represent some form of corruption or an issue during the read
            throw new Exception("Mismatch between # of data partitions: " + dataPartitions.length
                              + " and # of records in TLK: " + tlkRecords.size());
        }

        // Create a max value cap record
        if (tlkRecords.size() > 0)
        {
            HPCCRecord endRecord = tlkRecords.get(tlkRecords.size()-1);
            Object[] capFields = new Object[endRecord.getNumFields()];
            for (int i = 0; i < endRecord.getNumFields(); i++)
            {
                FieldDef fd = endRecord.getFieldDefintion(i);
                FieldType type = fd.getFieldType();

                // If the type is scalar it might be part of the index so create a max value
                // Otherwise it is part of the payload and its value doesn't matter so reuse the end record value
                if (type.isScalar())
                {
                    capFields[i] = CompiledFieldFilter.getMaxValueForType(type,(int)fd.getDataLen());
                }
                else
                {
                    capFields[i] = endRecord.getField(i);
                }
            }

            tlkRecords.add(new HPCCRecord(capFields, endRecord.getRecordDefinition()));
        }

        // Construct ranges
        for (int i = 0; i < tlkRecords.size()-1; i++)
        {
            DataPartitionRecordRange range = new DataPartitionRecordRange();
            range.begin = tlkRecords.get(i);
            range.end = tlkRecords.get(i+1);
            range.dataPartition = this.dataPartitions[i];
            dataPartitionRanges.add(range);
        }
    }

    public List findMatchingPartitions(FileFilter filter)
    {
        // If we don't have a set of partition ranges. There wasn't a TLK
        // associated with the file, so return all partitions.
        if (dataPartitionRanges.size() == 0 || filter == null)
        {
            return Arrays.asList(dataPartitions);
        }

        CompiledFileFilter compiledFileFilter = null;
        try
        {
            compiledFileFilter = new CompiledFileFilter(this.recordDef,filter);
        }
        catch (Exception e)
        {
            log.error("Invalid filter, returning all partitions: " + e.getMessage());
            return Arrays.asList(dataPartitions);
        }

        ArrayList matchingPartitions = new ArrayList();
        for (int i = 0; i < dataPartitionRanges.size(); i++)
        {
            DataPartitionRecordRange partitionRange = dataPartitionRanges.get(i);
            if (compiledFileFilter.partitionMatchesFilter(partitionRange))
            {
                matchingPartitions.add(partitionRange.dataPartition);
            }
        }

        return matchingPartitions;
    }

    public int getNumPartitionRanges()
    {
        return dataPartitionRanges.size();
    }

    public String getPartitionRangeAsString(int index)
    {
        DataPartitionRecordRange range = dataPartitionRanges.get(index);
        String rangeStr = "[" + range.begin.toString() 
                        + "," + range.end.toString() + "]\n";
        return rangeStr;
    }

    @Override
    public String toString()
    {
        String out = "[\n";
        for (int i = 0; i < dataPartitionRanges.size(); i++)
        {
            DataPartitionRecordRange range = dataPartitionRanges.get(i);
            out += i + "\t[" + range.begin.toString() 
                + "," + range.end.toString() + "]\n";
        }
        out += "]\n";
        return out;
    }
};




© 2015 - 2025 Weber Informatics LLC | Privacy Policy