org.hpccsystems.dfs.client.PartitionProcessor Maven / Gradle / Ivy
/*******************************************************************************
* HPCC SYSTEMS software Copyright (C) 2020 HPCC Systems®.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*******************************************************************************/
package org.hpccsystems.dfs.client;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.hpccsystems.dfs.client.HPCCRecord;
import org.hpccsystems.dfs.client.HPCCRecordBuilder;
import org.hpccsystems.dfs.client.HpccRemoteFileReader;
import org.hpccsystems.dfs.client.DataPartition;
import org.hpccsystems.dfs.client.CompiledFieldFilter;
import org.hpccsystems.commons.ecl.FieldDef;
import org.hpccsystems.commons.ecl.FieldType;
import org.hpccsystems.commons.ecl.FieldFilter;
import org.hpccsystems.commons.ecl.FileFilter;
import org.hpccsystems.commons.ecl.FieldFilterRange;
/**
* Uses the Top Level Key partition, if present, to determine which file parts in an index match a given FileFilter.
*/
public class PartitionProcessor
{
// A set field filters that are all OR'd togther
private class CompiledFieldFilterSet
{
public ArrayList filters = new ArrayList();
public String toString()
{
String ret = "{";
for (CompiledFieldFilter filter : filters)
{
ret += filter.toString() + " ";
}
ret += "}";
return ret;
}
};
// A set of field filters AND'd together
private class CompiledFileFilter
{
ArrayList filterSets = new ArrayList();
CompiledFileFilter(FieldDef recordDef, FileFilter filter) throws Exception
{
CompiledFieldFilterSet filterSet = new CompiledFieldFilterSet();
for (int i = 0; i < filter.getFieldFiltersCount(); i++)
{
FieldFilter fieldFilter = filter.getFieldFilter(i);
int fieldIndex = recordDef.getDefIndexWithFieldName(fieldFilter.getFieldName());
if (fieldIndex < 0)
{
throw new Exception("Unable to find field definition for field: " + fieldFilter.getFieldName());
}
FieldType type = recordDef.getDef(fieldIndex).getFieldType();
for (FieldFilterRange filterRange : fieldFilter.getRanges())
{
filterSet.filters.add(new CompiledFieldFilter(fieldIndex, type, filterRange));
}
}
filterSets.add(filterSet);
List andFileFilters = filter.getAndFileFilters();
for (int i = 0; i < andFileFilters.size(); i++)
{
filterSet = new CompiledFieldFilterSet();
FileFilter andFilter = andFileFilters.get(i);
for (int j = 0; j < andFilter.getFieldFiltersCount(); j++)
{
FieldFilter fieldFilter = andFilter.getFieldFilter(j);
int fieldIndex = recordDef.getDefIndexWithFieldName(fieldFilter.getFieldName());
if (fieldIndex < 0)
{
throw new Exception("Unable to find field definition for field: " + fieldFilter.getFieldName());
}
FieldType type = recordDef.getDef(fieldIndex).getFieldType();
List ranges = fieldFilter.getRanges();
for (FieldFilterRange range : ranges)
{
filterSet.filters.add(new CompiledFieldFilter(fieldIndex, type, range));
}
}
filterSets.add(filterSet);
}
}
private boolean partitionMatchesFilter(DataPartitionRecordRange partRange)
{
// Filter sets are AND'd together. So if there is any unmatched field break
boolean hasMatch = true;
for (CompiledFieldFilterSet filterSet : filterSets)
{
// All filters in the set filter on the same field
CompiledFieldFilter firstFilter = filterSet.filters.get(0);
Object fieldRangeBegin = partRange.begin.getField(firstFilter.getFieldIndex());
Object fieldRangeEnd = partRange.end.getField(firstFilter.getFieldIndex());
// The field set is OR'd together. So if any filter matches break
boolean fieldWasMatched = false;
for (CompiledFieldFilter filter : filterSet.filters)
{
fieldWasMatched = filter.matchesRange(fieldRangeBegin, fieldRangeEnd);
if (fieldWasMatched)
{
break;
}
}
hasMatch = hasMatch && fieldWasMatched;
if (hasMatch == false)
{
break;
}
}
return hasMatch;
}
@Override
public String toString()
{
String ret = "[\n";
for (CompiledFieldFilterSet filterSet : filterSets)
{
ret += "\t" + filterSet.toString() + "\n";
}
ret += "]\n";
return ret;
}
}
private class DataPartitionRecordRange
{
public HPCCRecord begin = null;
public HPCCRecord end = null;
public DataPartition dataPartition = null;
};
private static final Logger log = LogManager.getLogger(PartitionProcessor.class);
private DataPartition[] dataPartitions = null;
private ArrayList dataPartitionRanges = new ArrayList();
private FieldDef recordDef = null;
PartitionProcessor(FieldDef recDef, DataPartition[] partitions, DataPartition tlkPartition) throws Exception
{
this.dataPartitions = partitions;
this.recordDef = recDef;
if (tlkPartition == null)
{
log.warn("No TLK partition provided to partition processor; all partitions will be returned for all filters.");
return;
}
//------------------------------------------------------------------------------
// Read the TLK file part
//------------------------------------------------------------------------------
boolean success = false;
int numRetries = 0;
Exception readFailureException = null;
HPCCRecordBuilder recordBuilder = new HPCCRecordBuilder(recDef);
ArrayList tlkRecords = new ArrayList(partitions.length);
while (numRetries < 3 && success == false)
{
tlkRecords.clear();
try
{
HpccRemoteFileReader fileReader = new HpccRemoteFileReader(tlkPartition, recDef, recordBuilder);
while (fileReader.hasNext() && tlkRecords.size() < this.dataPartitions.length)
{
HPCCRecord record = fileReader.next();
// Correct integer key fields.
// Signed values are coming back as expectedValue - (2^31)
// Max unsigned value is negative
for (int i = 0; i < record.getNumFields(); i++)
{
Object field = record.getField(i);
FieldDef fd = recordDef.getDef(i);
if (fd.getFieldType() == FieldType.INTEGER)
{
Long longVal = (Long) field;
long intBitLength = recordDef.getDef(i).getDataLen() * 8;
if (fd.isUnsigned())
{
if (longVal < 0)
{
longVal = Long.MAX_VALUE;
}
}
record.setField(i,longVal);
}
else if (fd.getFieldType() == FieldType.STRING)
{
String strVal = (String) field;
// rtrim
strVal = strVal.replaceAll("\\s+$", "");
record.setField(i,strVal);
}
}
tlkRecords.add(record);
}
success = true;
fileReader.close();
}
catch (Exception e)
{
numRetries++;
log.error("Failed to read TLK file part retry: " + numRetries + " error: " + e.getMessage());
readFailureException = e;
}
}
if (success == false)
{
throw new Exception("PartitionProcessor: Error while attempting to read TLK: " + readFailureException.getMessage());
}
//------------------------------------------------------------------------------
// Construct data partition ranges
//------------------------------------------------------------------------------
if (tlkRecords.size() != this.dataPartitions.length)
{
// This would represent some form of corruption or an issue during the read
throw new Exception("Mismatch between # of data partitions: " + dataPartitions.length
+ " and # of records in TLK: " + tlkRecords.size());
}
// Create a max value cap record
if (tlkRecords.size() > 0)
{
HPCCRecord endRecord = tlkRecords.get(tlkRecords.size()-1);
Object[] capFields = new Object[endRecord.getNumFields()];
for (int i = 0; i < endRecord.getNumFields(); i++)
{
FieldDef fd = endRecord.getFieldDefintion(i);
FieldType type = fd.getFieldType();
// If the type is scalar it might be part of the index so create a max value
// Otherwise it is part of the payload and its value doesn't matter so reuse the end record value
if (type.isScalar())
{
capFields[i] = CompiledFieldFilter.getMaxValueForType(type,(int)fd.getDataLen());
}
else
{
capFields[i] = endRecord.getField(i);
}
}
tlkRecords.add(new HPCCRecord(capFields, endRecord.getRecordDefinition()));
}
// Construct ranges
for (int i = 0; i < tlkRecords.size()-1; i++)
{
DataPartitionRecordRange range = new DataPartitionRecordRange();
range.begin = tlkRecords.get(i);
range.end = tlkRecords.get(i+1);
range.dataPartition = this.dataPartitions[i];
dataPartitionRanges.add(range);
}
}
public List findMatchingPartitions(FileFilter filter)
{
// If we don't have a set of partition ranges. There wasn't a TLK
// associated with the file, so return all partitions.
if (dataPartitionRanges.size() == 0 || filter == null)
{
return Arrays.asList(dataPartitions);
}
CompiledFileFilter compiledFileFilter = null;
try
{
compiledFileFilter = new CompiledFileFilter(this.recordDef,filter);
}
catch (Exception e)
{
log.error("Invalid filter, returning all partitions: " + e.getMessage());
return Arrays.asList(dataPartitions);
}
ArrayList matchingPartitions = new ArrayList();
for (int i = 0; i < dataPartitionRanges.size(); i++)
{
DataPartitionRecordRange partitionRange = dataPartitionRanges.get(i);
if (compiledFileFilter.partitionMatchesFilter(partitionRange))
{
matchingPartitions.add(partitionRange.dataPartition);
}
}
return matchingPartitions;
}
public int getNumPartitionRanges()
{
return dataPartitionRanges.size();
}
public String getPartitionRangeAsString(int index)
{
DataPartitionRecordRange range = dataPartitionRanges.get(index);
String rangeStr = "[" + range.begin.toString()
+ "," + range.end.toString() + "]\n";
return rangeStr;
}
@Override
public String toString()
{
String out = "[\n";
for (int i = 0; i < dataPartitionRanges.size(); i++)
{
DataPartitionRecordRange range = dataPartitionRanges.get(i);
out += i + "\t[" + range.begin.toString()
+ "," + range.end.toString() + "]\n";
}
out += "]\n";
return out;
}
};
© 2015 - 2025 Weber Informatics LLC | Privacy Policy