All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.gchq.gaffer.sparkaccumulo.operation.handler.AbstractGetRDDHandler Maven / Gradle / Ivy

There is a newer version: 2.3.1
Show newest version
/*
 * Copyright 2016-2020 Crown Copyright
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package uk.gov.gchq.gaffer.sparkaccumulo.operation.handler;

import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
import org.apache.accumulo.core.data.Range;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import scala.Tuple2;
import scala.runtime.AbstractFunction1;

import uk.gov.gchq.gaffer.accumulostore.AccumuloStore;
import uk.gov.gchq.gaffer.accumulostore.key.exception.IteratorSettingException;
import uk.gov.gchq.gaffer.accumulostore.key.exception.RangeFactoryException;
import uk.gov.gchq.gaffer.commonutil.pair.Pair;
import uk.gov.gchq.gaffer.data.element.Element;
import uk.gov.gchq.gaffer.data.element.id.ElementId;
import uk.gov.gchq.gaffer.operation.Operation;
import uk.gov.gchq.gaffer.operation.OperationException;
import uk.gov.gchq.gaffer.operation.graph.GraphFilters;
import uk.gov.gchq.gaffer.operation.impl.get.GetAllElements;
import uk.gov.gchq.gaffer.operation.io.Input;
import uk.gov.gchq.gaffer.operation.io.Output;
import uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements;
import uk.gov.gchq.gaffer.store.StoreException;
import uk.gov.gchq.gaffer.store.operation.handler.OutputOperationHandler;
import uk.gov.gchq.gaffer.user.User;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

public abstract class AbstractGetRDDHandler & GraphFilters, O>
        implements OutputOperationHandler {

    public static final String HADOOP_CONFIGURATION_KEY = "Hadoop_Configuration_Key";
    public static final String USE_RFILE_READER_RDD = "gaffer.accumulo.spark.directrdd.use_rfile_reader";
    public static final String VIEW = "gaffer.accumulo.spark.directrdd.view";
    public static final String USE_BATCH_SCANNER_RDD = "gaffer.accumulo.spark.rdd.use_batch_scanner";

    public void addIterators(final AccumuloStore accumuloStore,
                             final Configuration conf,
                             final User user,
                             final OP operation) throws OperationException {
        try {
            final GraphFilters derivedOperation;
            if (operation instanceof GetRDDOfAllElements) {
                // Create dummy GetAllElements operation as some of the methods in
                // AccumuloStore test if the operation is a GetAllElements operation
                // and if so set some options. We need those options if operation
                // is returning all the elements.
                derivedOperation = getGetAllElements(operation);
            } else {
                derivedOperation = operation;
            }
            // Update configuration with instance name, table name, zookeepers, and with view
            accumuloStore.updateConfiguration(conf, derivedOperation, user);
            // Add iterators based on operation-specific (i.e. not view related) options
            final IteratorSetting queryTimeAggregator = accumuloStore.getKeyPackage()
                    .getIteratorFactory()
                    .getQueryTimeAggregatorIteratorSetting(operation.getView(), accumuloStore);
            if (null != queryTimeAggregator) {
                InputConfigurator.addIterator(AccumuloInputFormat.class, conf, queryTimeAggregator);
            }
            final IteratorSetting propertyFilter = accumuloStore.getKeyPackage()
                    .getIteratorFactory()
                    .getElementPropertyRangeQueryFilter(derivedOperation);
            if (null != propertyFilter) {
                InputConfigurator.addIterator(AccumuloInputFormat.class, conf, propertyFilter);
            }
        } catch (final StoreException | IteratorSettingException e) {
            throw new OperationException("Failed to update configuration", e);
        }
    }

    private GetAllElements getGetAllElements(final OP getRDDOfAllElements) {
        return new GetAllElements.Builder()
                .view(getRDDOfAllElements.getView())
                .directedType(getRDDOfAllElements.getDirectedType())
                .options(getRDDOfAllElements.getOptions())
                .build();
    }

    public >>
    void addRanges(final AccumuloStore accumuloStore,
                   final Configuration conf,
                   final INPUT_OP operation)
            throws OperationException {
        final List ranges = new ArrayList<>();
        for (final ElementId entityId : operation.getInput()) {
            try {
                ranges.addAll(accumuloStore.getKeyPackage()
                        .getRangeFactory()
                        .getRange(entityId, operation));
            } catch (final RangeFactoryException e) {
                throw new OperationException("Failed to add ranges to configuration", e);
            }
        }
        InputConfigurator.setRanges(AccumuloInputFormat.class, conf, ranges);
    }

    public >>>
    void addRangesFromPairs(final AccumuloStore accumuloStore,
                            final Configuration conf,
                            final INPUT_OP operation)
            throws OperationException {
        final List ranges = new ArrayList<>();
        for (final Pair pair : operation.getInput()) {
            try {
                ranges.add(accumuloStore.getKeyPackage()
                        .getRangeFactory()
                        .getRangeFromPair(pair, operation));
            } catch (final RangeFactoryException e) {
                throw new OperationException("Failed to add ranges to configuration", e);
            }
        }
        InputConfigurator.setRanges(AccumuloInputFormat.class, conf, ranges);
    }

    protected Configuration getConfiguration(final OP operation) throws OperationException {
        final String serialisedConf = operation.getOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY);
        if (null == serialisedConf) {
            return new Configuration();
        }
        try {
            return AbstractGetRDDHandler.convertStringToConfiguration(serialisedConf);
        } catch (final IOException e) {
            throw new OperationException("Exception decoding Configuration from options", e);
        }
    }

    public static String convertConfigurationToString(final Configuration conf) throws IOException {
        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
        conf.write(new DataOutputStream(baos));
        return Base64.encodeBase64String(baos.toByteArray());
    }

    public static Configuration convertStringToConfiguration(final String encodedConf) throws IOException {
        final byte[] serialisedConf = Base64.decodeBase64(encodedConf);
        final ByteArrayInputStream baos = new ByteArrayInputStream(serialisedConf);
        final DataInputStream dis = new DataInputStream(baos);
        final Configuration conf = new Configuration();
        conf.readFields(dis);
        return conf;
    }

    public static class FirstElement extends AbstractFunction1, Element> implements Serializable {
        private static final long serialVersionUID = -5693778654823431294L;

        @Override
        public Element apply(final Tuple2 tuple) {
            return tuple._1();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy