All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oracle.kv.hadoop.hive.table.TableHiveInputSplit Maven / Gradle / Ivy

/*-
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.
 */

package oracle.kv.hadoop.hive.table;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;

import oracle.kv.PasswordCredentials;
import oracle.kv.hadoop.table.TableInputSplit;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;

/**
 * Concrete implementation of the InputSplit interface required by version 1
 * of MapReduce to support Hive queries. A RecordReader will take instances
 * of this class, where each such instance corresponds to data stored in
 * an Oracle NoSQL Database store via the Table API, and use those instances
 * to retrieve that data when performing a given Hive query against the
 * store's data.
 * 

* Note that the Hive infrastructure requires that even though the data * associated with instances of this class resides in a table in an Oracle * NoSQL Database store rather than an HDFS file, this class still must * subclass FileSplit. As a result, a Hadoop HDFS Path must be specified * for this class. *

* Also note that although this InputSplit is based on version 1 of MapReduce * (as requied by the Hive infrastructure), it wraps and delegates to a YARN * based (MapReduce version 2) InputSplit. This is done because the InputSplit * class Oracle NoSQL Database provides to support Hadoop integration is YARN * based, and this class wishes to exploit and reuse the functionality already * provided by the YARN based InputSplit class. */ public class TableHiveInputSplit extends FileSplit { private final TableInputSplit v2Split; private static final String[] EMPTY_STRING_ARRAY = new String[] { }; public TableHiveInputSplit() { super((Path) null, 0, 0, EMPTY_STRING_ARRAY); this.v2Split = new TableInputSplit(); } public TableHiveInputSplit(Path filePath, TableInputSplit v2Split) { super(filePath, 0, 0, EMPTY_STRING_ARRAY); this.v2Split = v2Split; } /** * Returns the HDFS Path associated with this split. * * @return the HDFS Path associated with this split */ @Override public Path getPath() { return super.getPath(); } /** * Get the size of the split, so that the input splits can be sorted by * size. * * @return the number of bytes in the split */ @Override public long getLength() { return v2Split.getLength(); } /** * Get the list of nodes by name where the data for the split would be * local. The locations do not need to be serialized. * * @return a new array of the node nodes. * @throws IOException if an I/O error occurs */ @Override public String[] getLocations() throws IOException { return v2Split.getLocations(); } /** * Serialize the fields of this object to out. * * @param out DataOuput to serialize this object into. * @throws IOException if an I/O error occurs */ @Override public void write(DataOutput out) throws IOException { super.write(out); v2Split.write(out); } /** * Deserialize the fields of this object from in. * *

For efficiency, implementations should attempt to re-use storage in * the existing object where possible.

* * @param in DataInput to deseriablize this object from. * @throws IOException if an I/O error occurs */ @Override public void readFields(DataInput in) throws IOException { super.readFields(in); v2Split.readFields(in); } /* * A well-defined equals, hashCode, and toString method must be provided * so that instances of this class can be compared, uniquely identified, * and stored in collections. */ @Override public boolean equals(Object obj) { if (!(obj instanceof TableHiveInputSplit)) { return false; } if (obj == this) { return true; } final TableHiveInputSplit obj1 = this; final TableHiveInputSplit obj2 = (TableHiveInputSplit) obj; final Path path1 = obj1.getPath(); final Path path2 = obj2.getPath(); if (path1 != null) { if (!path1.equals(path2)) { return false; } } else { if (path2 != null) { return false; } } return obj1.v2Split.equals(obj2.v2Split); } @Override public int hashCode() { int hc = 0; final Path filePath = getPath(); if (filePath != null) { hc = filePath.hashCode(); } return hc + v2Split.hashCode(); } @Override public String toString() { if (v2Split == null) { return super.toString(); } final StringBuilder buf = new StringBuilder(this.getClass().getSimpleName()); buf.append(": [path="); buf.append(getPath()); buf.append("], "); buf.append(v2Split.toString()); return buf.toString(); } public String getKVStoreName() { return v2Split.getKVStoreName(); } public String[] getKVHelperHosts() { return v2Split.getKVHelperHosts(); } public String getTableName() { return v2Split.getTableName(); } /** * Returns the version 2 split. This method is called by the method * TableHiveInputFormat.getRecordReader; which uses the * version 2 split returned by this method to create the version 2 * RecordReader that will be encapsulated by the version 1 * RecordReader used in Hive queries. */ TableInputSplit getV2Split() { return v2Split; } /** * Returns a List whose elements are Sets of * partitions; whose union is the set of all partitions in the store. */ List> getPartitionSets() { return v2Split.getPartitionSets(); } public int getQueryBy() { return v2Split.getQueryBy(); } public String getWhereClause() { return v2Split.getWhereClause(); } public String getSecurityLogin() { return v2Split.getSecurityLogin(); } public PasswordCredentials getSecurityCredentials() { return v2Split.getSecurityCredentials(); } public String getSecurityTrust() { return v2Split.getSecurityTrust(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy