org.hpccsystems.dfs.client.PartitionProcessor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dfsclient Show documentation
Show all versions of dfsclient Show documentation
Client interface into HPCC Systems' Distributed File System.
/*******************************************************************************
* HPCC SYSTEMS software Copyright (C) 2020 HPCC Systems®.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*******************************************************************************/
package org.hpccsystems.dfs.client;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.hpccsystems.dfs.client.HPCCRecord;
import org.hpccsystems.dfs.client.HPCCRecordBuilder;
import org.hpccsystems.dfs.client.HpccRemoteFileReader;
import org.hpccsystems.dfs.client.DataPartition;
import org.hpccsystems.dfs.client.CompiledFieldFilter;
import org.hpccsystems.commons.ecl.FieldDef;
import org.hpccsystems.commons.ecl.FieldType;
import org.hpccsystems.commons.ecl.FieldFilter;
import org.hpccsystems.commons.ecl.FileFilter;
import org.hpccsystems.commons.ecl.FieldFilterRange;
/**
* Uses the Top Level Key partition, if present, to determine which file parts in an index match a given FileFilter.
*/
public class PartitionProcessor
{
// A set field filters that are all OR'd togther
private class CompiledFieldFilterSet
{
public ArrayList filters = new ArrayList();
public String toString()
{
String ret = "{";
for (CompiledFieldFilter filter : filters)
{
ret += filter.toString() + " ";
}
ret += "}";
return ret;
}
};
// A set of field filters AND'd together
private class CompiledFileFilter
{
ArrayList filterSets = new ArrayList();
CompiledFileFilter(FieldDef recordDef, FileFilter filter) throws Exception
{
CompiledFieldFilterSet filterSet = new CompiledFieldFilterSet();
for (int i = 0; i < filter.getFieldFiltersCount(); i++)
{
FieldFilter fieldFilter = filter.getFieldFilter(i);
int fieldIndex = recordDef.getDefIndexWithFieldName(fieldFilter.getFieldName());
if (fieldIndex < 0)
{
throw new Exception("Unable to find field definition for field: " + fieldFilter.getFieldName());
}
FieldType type = recordDef.getDef(fieldIndex).getFieldType();
for (FieldFilterRange filterRange : fieldFilter.getRanges())
{
filterSet.filters.add(new CompiledFieldFilter(fieldIndex, type, filterRange));
}
}
filterSets.add(filterSet);
List andFileFilters = filter.getAndFileFilters();
for (int i = 0; i < andFileFilters.size(); i++)
{
filterSet = new CompiledFieldFilterSet();
FileFilter andFilter = andFileFilters.get(i);
for (int j = 0; j < andFilter.getFieldFiltersCount(); j++)
{
FieldFilter fieldFilter = andFilter.getFieldFilter(j);
int fieldIndex = recordDef.getDefIndexWithFieldName(fieldFilter.getFieldName());
if (fieldIndex < 0)
{
throw new Exception("Unable to find field definition for field: " + fieldFilter.getFieldName());
}
FieldType type = recordDef.getDef(fieldIndex).getFieldType();
List ranges = fieldFilter.getRanges();
for (FieldFilterRange range : ranges)
{
filterSet.filters.add(new CompiledFieldFilter(fieldIndex, type, range));
}
}
filterSets.add(filterSet);
}
}
private boolean partitionMatchesFilter(DataPartitionRecordRange partRange)
{
// Filter sets are AND'd together. So if there is any unmatched field break
boolean hasMatch = true;
for (CompiledFieldFilterSet filterSet : filterSets)
{
// All filters in the set filter on the same field
CompiledFieldFilter firstFilter = filterSet.filters.get(0);
Object fieldRangeBegin = partRange.begin.getField(firstFilter.getFieldIndex());
Object fieldRangeEnd = partRange.end.getField(firstFilter.getFieldIndex());
// The field set is OR'd together. So if any filter matches break
boolean fieldWasMatched = false;
for (CompiledFieldFilter filter : filterSet.filters)
{
fieldWasMatched = filter.matchesRange(fieldRangeBegin, fieldRangeEnd);
if (fieldWasMatched)
{
break;
}
}
hasMatch = hasMatch && fieldWasMatched;
if (hasMatch == false)
{
break;
}
}
return hasMatch;
}
@Override
public String toString()
{
String ret = "[\n";
for (CompiledFieldFilterSet filterSet : filterSets)
{
ret += "\t" + filterSet.toString() + "\n";
}
ret += "]\n";
return ret;
}
}
private class DataPartitionRecordRange
{
public HPCCRecord begin = null;
public HPCCRecord end = null;
public DataPartition dataPartition = null;
};
private static final Logger log = LogManager.getLogger(PartitionProcessor.class);
private DataPartition[] dataPartitions = null;
private ArrayList dataPartitionRanges = new ArrayList();
private FieldDef recordDef = null;
PartitionProcessor(FieldDef recDef, DataPartition[] partitions, DataPartition tlkPartition) throws Exception
{
this.dataPartitions = partitions;
this.recordDef = recDef;
if (tlkPartition == null)
{
log.warn("No TLK partition provided to partition processor; all partitions will be returned for all filters.");
return;
}
//------------------------------------------------------------------------------
// Read the TLK file part
//------------------------------------------------------------------------------
boolean success = false;
int numRetries = 0;
Exception readFailureException = null;
HPCCRecordBuilder recordBuilder = new HPCCRecordBuilder(recDef);
ArrayList tlkRecords = new ArrayList(partitions.length);
while (numRetries < 3 && success == false)
{
tlkRecords.clear();
try
{
HpccRemoteFileReader fileReader = new HpccRemoteFileReader(tlkPartition, recDef, recordBuilder);
while (fileReader.hasNext() && tlkRecords.size() < this.dataPartitions.length)
{
HPCCRecord record = fileReader.next();
// Correct integer key fields.
// Signed values are coming back as expectedValue - (2^31)
// Max unsigned value is negative
for (int i = 0; i < record.getNumFields(); i++)
{
Object field = record.getField(i);
FieldDef fd = recordDef.getDef(i);
if (fd.getFieldType() == FieldType.INTEGER)
{
Long longVal = (Long) field;
long intBitLength = recordDef.getDef(i).getDataLen() * 8;
if (fd.isUnsigned())
{
if (longVal < 0)
{
longVal = Long.MAX_VALUE;
}
}
record.setField(i,longVal);
}
else if (fd.getFieldType() == FieldType.STRING)
{
String strVal = (String) field;
// rtrim
strVal = strVal.replaceAll("\\s+$", "");
record.setField(i,strVal);
}
}
tlkRecords.add(record);
}
success = true;
fileReader.close();
}
catch (Exception e)
{
numRetries++;
log.error("Failed to read TLK file part retry: " + numRetries + " error: " + e.getMessage());
readFailureException = e;
}
}
if (success == false)
{
throw new Exception("PartitionProcessor: Error while attempting to read TLK: " + readFailureException.getMessage());
}
//------------------------------------------------------------------------------
// Construct data partition ranges
//------------------------------------------------------------------------------
if (tlkRecords.size() != this.dataPartitions.length)
{
// This would represent some form of corruption or an issue during the read
throw new Exception("Mismatch between # of data partitions: " + dataPartitions.length
+ " and # of records in TLK: " + tlkRecords.size());
}
// Create a max value cap record
if (tlkRecords.size() > 0)
{
HPCCRecord endRecord = tlkRecords.get(tlkRecords.size()-1);
Object[] capFields = new Object[endRecord.getNumFields()];
for (int i = 0; i < endRecord.getNumFields(); i++)
{
FieldDef fd = endRecord.getFieldDefintion(i);
FieldType type = fd.getFieldType();
// If the type is scalar it might be part of the index so create a max value
// Otherwise it is part of the payload and its value doesn't matter so reuse the end record value
if (type.isScalar())
{
capFields[i] = CompiledFieldFilter.getMaxValueForType(type,(int)fd.getDataLen());
}
else
{
capFields[i] = endRecord.getField(i);
}
}
tlkRecords.add(new HPCCRecord(capFields, endRecord.getRecordDefinition()));
}
// Construct ranges
for (int i = 0; i < tlkRecords.size()-1; i++)
{
DataPartitionRecordRange range = new DataPartitionRecordRange();
range.begin = tlkRecords.get(i);
range.end = tlkRecords.get(i+1);
range.dataPartition = this.dataPartitions[i];
dataPartitionRanges.add(range);
}
}
public List findMatchingPartitions(FileFilter filter)
{
// If we don't have a set of partition ranges. There wasn't a TLK
// associated with the file, so return all partitions.
if (dataPartitionRanges.size() == 0 || filter == null)
{
return Arrays.asList(dataPartitions);
}
CompiledFileFilter compiledFileFilter = null;
try
{
compiledFileFilter = new CompiledFileFilter(this.recordDef,filter);
}
catch (Exception e)
{
log.error("Invalid filter, returning all partitions: " + e.getMessage());
return Arrays.asList(dataPartitions);
}
ArrayList matchingPartitions = new ArrayList();
for (int i = 0; i < dataPartitionRanges.size(); i++)
{
DataPartitionRecordRange partitionRange = dataPartitionRanges.get(i);
if (compiledFileFilter.partitionMatchesFilter(partitionRange))
{
matchingPartitions.add(partitionRange.dataPartition);
}
}
return matchingPartitions;
}
public int getNumPartitionRanges()
{
return dataPartitionRanges.size();
}
public String getPartitionRangeAsString(int index)
{
DataPartitionRecordRange range = dataPartitionRanges.get(index);
String rangeStr = "[" + range.begin.toString()
+ "," + range.end.toString() + "]\n";
return rangeStr;
}
@Override
public String toString()
{
String out = "[\n";
for (int i = 0; i < dataPartitionRanges.size(); i++)
{
DataPartitionRecordRange range = dataPartitionRanges.get(i);
out += i + "\t[" + range.begin.toString()
+ "," + range.end.toString() + "]\n";
}
out += "]\n";
return out;
}
};