org.hpccsystems.dfs.client.ColumnPruner Maven / Gradle / Ivy
/*******************************************************************************
* HPCC SYSTEMS software Copyright (C) 2018 HPCC Systems®.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package org.hpccsystems.dfs.client;
import java.io.Serializable;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import org.hpccsystems.commons.ecl.FieldType;
import org.hpccsystems.commons.ecl.FieldDef;
/**
* Prune columns from the output request format. Columns are pruned
* that employ unsupported data types or that are not named by a column
* selection parameter that has a list of columns. The column
* selection parameter string is a comma separated list of
* field names. Nested fields are selected with the usual
* dot notation (e.g., foo.bar for the bar field in a foo field).
* All columns are selected for a structure field when no columns are
* specified (e.g., foo where foo is a record type). Selection column
* names that do not exist are ignored. If the selection string does not
* select any column, then all columns are returned unless dropped because
* of unsupported data types.
*
* The output types for certain data types is adjusted in the screening
* process. For instance, the QSTRING compressed string is returned as a
* string.
*/
public class ColumnPruner implements Serializable
{
private final static long serialVersionUID = 1L;
private class SelectedFieldInfo
{
String name = null;
boolean shouldCullChildren = false;
}
private String fieldListString;
private HashMap selectedFieldMap = null;
/**
* Gets the field list string.
*
* @return Project list in string format
*/
public String getFieldListString()
{
return fieldListString;
}
/**
* Contruct a pruner to remove fields from the output definition of a remote
* read definition string.
* @param commaSepFieldNamelist
* a comma separated list of field names. Nested
* fields are expressed using the normal compound name style with a dot (".")
* separator. For example, item1,foo.bar1,foo.bar2,item3, selects the item1,
* item3 fields (which may be child datasets) and the bar1 and bar2 fields from
* the foo child dataset field.
*/
public ColumnPruner(String commaSepFieldNamelist)
{
this.fieldListString = commaSepFieldNamelist;
// Create a map of selected fields by their path
selectedFieldMap = new HashMap();
String[] selectedFields = this.fieldListString.split(",");
for (int i = 0; i < selectedFields.length; i++)
{
String fieldPath = selectedFields[i].trim();
if (fieldPath.isEmpty())
{
continue;
}
String curPath = "";
String[] pathComponents = fieldPath.split("\\.");
for (int j = 0; j < pathComponents.length; j++)
{
String component = pathComponents[j].toLowerCase();
if (j != 0)
{
curPath += ".";
}
curPath += component.trim();
SelectedFieldInfo fieldInfo = selectedFieldMap.get(curPath);
if (fieldInfo == null)
{
fieldInfo = new SelectedFieldInfo();
fieldInfo.name = component;
selectedFieldMap.put(curPath, fieldInfo);
}
boolean shouldCullChildren = (j < pathComponents.length - 1);
fieldInfo.shouldCullChildren = fieldInfo.shouldCullChildren || shouldCullChildren;
}
}
}
/**
* Prune the definition tokens to match the field list if
* present and to remove unsupported types..
*
* @param originalRD
* the original RD
* @return the revised record definition
* @exception Exception
* is thrown when none of the
* fields in the selection list are defined.
*/
public FieldDef pruneRecordDefinition(FieldDef originalRD) throws Exception
{
if (selectedFieldMap.size() == 0)
{
return new FieldDef(originalRD);
}
ArrayList selectedFields = new ArrayList();
for (int i = 0; i < originalRD.getNumDefs(); i++)
{
FieldDef childDef = originalRD.getDef(i);
String fieldPath = childDef.getFieldName().trim().toLowerCase();
FieldDef prunedFieldDef = pruneFieldDefinition(childDef, fieldPath);
if (prunedFieldDef != null)
{
selectedFields.add(prunedFieldDef);
}
}
FieldDef ret = new FieldDef(originalRD);
ret.setDefs(selectedFields.toArray(new FieldDef[0]));
if (ret.getNumDefs() == 0)
{
throw new Exception("Error pruning record defintion. No fields were selected for field list: " + this.fieldListString);
}
return ret;
}
/**
* Prune field definition.
*
* @param originalRecordDef
* the original record def
* @param path
* the path
* @return the field def
*/
private FieldDef pruneFieldDefinition(FieldDef originalRecordDef, String path)
{
SelectedFieldInfo fieldInfo = selectedFieldMap.get(path);
if (fieldInfo == null)
{
return null;
}
if (fieldInfo.shouldCullChildren == false)
{
return originalRecordDef;
}
// Datasets are a special case. They will not have a component
// in the field path to represent the dataset FieldDef. So we skip to its record
if (originalRecordDef.getFieldType() == FieldType.DATASET)
{
FieldDef[] datasetRD = new FieldDef[1];
datasetRD[0] = pruneFieldDefinition(originalRecordDef.getDef(0), path);
FieldDef ret = new FieldDef(originalRecordDef);
ret.setDefs(datasetRD);
return ret;
}
ArrayList selectedFields = new ArrayList();
for (int i = 0; i < originalRecordDef.getNumDefs(); i++)
{
FieldDef childDef = originalRecordDef.getDef(i);
String fieldPath = path + "." + childDef.getFieldName().trim().toLowerCase();
FieldDef prunedFieldDef = pruneFieldDefinition(childDef, fieldPath);
if (prunedFieldDef != null)
{
selectedFields.add(prunedFieldDef);
}
}
FieldDef ret = new FieldDef(originalRecordDef);
ret.setDefs(selectedFields.toArray(new FieldDef[0]));
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy