org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.newplan.logical.rules;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
import org.apache.pig.newplan.optimizer.Transformer;
/**
* This Rule prunes columns and map keys and set to loader. This rule depends
* on MapKeysPruneHelper to calculate what keys are required for a loader,
* and ColumnPruneHelper to calculate the required columns for a loader. Then
* it combines the map keys and columns info to set into the loader.
*/
public class ColumnMapKeyPrune extends WholePlanRule {
private boolean hasRun;
public ColumnMapKeyPrune(String n) {
super(n, false);
hasRun = false;
}
@Override
public Transformer getNewTransformer() {
return new ColumnMapKeyPruneTransformer();
}
public class ColumnMapKeyPruneTransformer extends Transformer {
private MapKeysPruneHelper mapKeyHelper;
private ColumnPruneHelper columnHelper;
private boolean columnPrune;
private boolean mapKeyPrune;
/*
* This is a map of of required columns and map keys for each LOLoad
* RequiredMapKeys --> Map >
* RequiredColumns --> Set
*
* The integer are column indexes.
*/
private Map>,Set>> requiredItems =
new HashMap>,Set>>();
@Override
public boolean check(OperatorPlan matched) throws FrontendException {
// only run this rule once
if (hasRun) {
return false;
}
hasRun = true;
mapKeyHelper = new MapKeysPruneHelper(matched);
columnHelper = new ColumnPruneHelper(matched);
// check if map keys can be pruned
mapKeyPrune = mapKeyHelper.check();
// check if columns can be pruned
columnPrune = columnHelper.check();
return mapKeyPrune || columnPrune;
}
@Override
public OperatorPlan reportChanges() {
return currentPlan;
}
@SuppressWarnings("unchecked")
private void merge() throws FrontendException {
// combine annotations
for( Operator source : currentPlan.getSources() ) {
Map> mapKeys =
(Map>) source.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
Set requiredColumns = null;
if (source.getAnnotation(ColumnPruneHelper.REQUIREDCOLS) != null) {
requiredColumns = new HashSet((Set) source.getAnnotation(ColumnPruneHelper.REQUIREDCOLS));
}
// We dont have any information so skip
if( requiredColumns == null && mapKeys == null ) {
continue;
}
if( requiredColumns != null && mapKeys != null ) {
Set duplicatedCols = new HashSet();
// Remove the columns already marked by MapKeys
for( Integer col : requiredColumns ) {
if( mapKeys.containsKey(col) ) {
duplicatedCols.add(col);
}
}
requiredColumns.removeAll(duplicatedCols);
} else if ( mapKeys != null && requiredColumns == null ) {
// This is the case where only mapKeys can be pruned. And none
// of the columns can be pruned. So we add all columns to the
// requiredcolumns part
requiredColumns = new HashSet();
for(int i = 0; i < ((LogicalRelationalOperator)source).getSchema().size(); i++ ) {
if( !mapKeys.containsKey(i) ) {
requiredColumns.add(i);
}
}
}
requiredItems.put((LOLoad) source, new Pair