
oracle.kv.hadoop.table.TableInputSplit Maven / Gradle / Ivy
Show all versions of oracle-nosql-server Show documentation
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.hadoop.table;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import oracle.kv.Consistency;
import oracle.kv.Direction;
import oracle.kv.PasswordCredentials;
import oracle.kv.impl.topo.RepGroupId;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
/**
* Concrete implementation of the InputSplit interface required to perform
* Hadoop MapReduce. A RecordReader will take instances of this class, where
* each such instance corresponds to data stored in an Oracle NoSQL Database
* store, and use those instances to retrieve that data when performing the
* MapReduce job.
*
* @since 3.1
*/
public class TableInputSplit extends InputSplit implements Writable {
/*
* Use int instead of enum for query type; because of concerns about
* how hadoop (split) serialization/deserialization handles enums.
*/
public static final int QUERY_BY_PRIMARY_ALL_PARTITIONS = 0;
public static final int QUERY_BY_PRIMARY_SINGLE_PARTITION = 1;
public static final int QUERY_BY_INDEX = 2;
public static final int QUERY_BY_ONQL_ALL_PARTITIONS = 3;
public static final int QUERY_BY_ONQL_SINGLE_PARTITION = 4;
public static final int QUERY_BY_ONQL_SHARDS = 5;
public static final String EMPTY_STR = "";
private int queryBy = QUERY_BY_PRIMARY_ALL_PARTITIONS;
private String onqlWhereClause = EMPTY_STR;
private String[] locations = new String[0];
private String kvStore;
private String[] kvHelperHosts;
private String tableName;
private String primaryKeyProperty;
/* For MultiRowOptions */
private String fieldRangeProperty;
/* For TableIteratorOptions */
private Direction direction;
private Consistency consistency;
private long timeout;
private TimeUnit timeoutUnit;
private int maxRequests;
private int batchSize;
private int maxBatches;
private String loginFlnm = null;
private PasswordCredentials pwdCredentials = null;
private String trustFlnm = null;
private List> partitionSets;
private Set shardSet;
/**
* No-arg constructor required by Hadoop semantics.
*/
public TableInputSplit() {
}
/**
* Get the size of the split, so that the input splits can be sorted by
* size.
*
* @return the number of bytes in the split
*/
@Override
public long getLength() {
return partitionSets.size();
}
/**
* Get the list of nodes by name where the data for the split would be
* local. The locations do not need to be serialized.
*
* @return a new array of the node nodes.
*/
@Override
public String[] getLocations() {
return locations;
}
TableInputSplit setLocations(String[] newLocations) {
this.locations = newLocations;
return this;
}
TableInputSplit setKVHelperHosts(String[] newHelperHosts) {
this.kvHelperHosts = newHelperHosts;
return this;
}
/**
* Returns a String array with elements of the form,
* '<hostname:port>'; where each elment specifies the connection
* information for each of store's helper hosts. Note that this method must
* be public so that instances of Hive InputSplit classes can wrap and
* delegate to an instance of this class.
*/
public String[] getKVHelperHosts() {
return kvHelperHosts;
}
TableInputSplit setKVStoreName(String newStoreName) {
this.kvStore = newStoreName;
return this;
}
/**
* Returns the name of the store containing the records associated with
* this split. Note that this method must be public so that instances of
* Hive InputSplit classes can wrap and delegate to an instance of this
* class.
*/
public String getKVStoreName() {
return kvStore;
}
public String getTableName() {
return tableName;
}
TableInputSplit setTableName(String newTableName) {
this.tableName = newTableName;
return this;
}
TableInputSplit setPrimaryKeyProperty(String newProperty) {
this.primaryKeyProperty = newProperty;
return this;
}
String getPrimaryKeyProperty() {
return primaryKeyProperty;
}
/* For MultiRowOptions */
TableInputSplit setFieldRangeProperty(String newProperty) {
this.fieldRangeProperty = newProperty;
return this;
}
String getFieldRangeProperty() {
return fieldRangeProperty;
}
/* For TableIteratorOptions */
TableInputSplit setDirection(Direction newDirection) {
this.direction = newDirection;
return this;
}
Direction getDirection() {
return direction;
}
TableInputSplit setConsistency(Consistency newConsistency) {
this.consistency = newConsistency;
return this;
}
Consistency getConsistency() {
return consistency;
}
TableInputSplit setTimeout(long newTimeout) {
this.timeout = newTimeout;
return this;
}
long getTimeout() {
return timeout;
}
TableInputSplit setTimeoutUnit(TimeUnit newTimeoutUnit) {
this.timeoutUnit = newTimeoutUnit;
return this;
}
TimeUnit getTimeoutUnit() {
return timeoutUnit;
}
TableInputSplit setMaxRequests(int newMaxRequests) {
this.maxRequests = newMaxRequests;
return this;
}
int getMaxRequests() {
return maxRequests;
}
TableInputSplit setBatchSize(int newBatchSize) {
this.batchSize = newBatchSize;
return this;
}
int getBatchSize() {
return batchSize;
}
TableInputSplit setMaxBatches(int newMaxBatches) {
this.maxBatches = newMaxBatches;
return this;
}
int getMaxBatches() {
return maxBatches;
}
TableInputSplit setPartitionSets(List> newPartitionSets) {
this.partitionSets = newPartitionSets;
return this;
}
/**
* Returns a List
whose elements are Set
s of
* partitions; whose union is the set of all partitions in the store.
* Note that this method is declared public
so that the
* method TableHiveInputSplit.getPartitionSets
can delegate
* to this method.
*/
public List> getPartitionSets() {
return partitionSets;
}
public TableInputSplit setQueryInfo(int newQueryBy,
String whereClause) {
this.queryBy = newQueryBy;
this.onqlWhereClause = whereClause;
return this;
}
public int getQueryBy() {
return queryBy;
}
public String getWhereClause() {
return onqlWhereClause;
}
TableInputSplit setShardSet(Set newShardSet) {
this.shardSet = newShardSet;
return this;
}
public Set getShardSet() {
return shardSet;
}
TableInputSplit setKVStoreSecurity(
final String loginFile,
final PasswordCredentials passwordCredentials,
final String trustFile) {
this.loginFlnm = loginFile;
this.pwdCredentials = passwordCredentials;
this.trustFlnm = trustFile;
return this;
}
public String getSecurityLogin() {
return loginFlnm;
}
public PasswordCredentials getSecurityCredentials() {
return pwdCredentials;
}
public String getSecurityTrust() {
return trustFlnm;
}
/**
* Serialize the fields of this object to out
.
*
* @param out DataOuput
to serialize this object into.
* @throws IOException if an I/O error occurs
*/
@Override
public void write(DataOutput out)
throws IOException {
out.writeInt(locations.length);
for (int i = 0; i < locations.length; i++) {
Text.writeString(out, locations[i]);
}
out.writeInt(kvHelperHosts.length);
for (int i = 0; i < kvHelperHosts.length; i++) {
Text.writeString(out, kvHelperHosts[i]);
}
Text.writeString(out, kvStore);
Text.writeString(out, tableName);
Text.writeString(out,
(primaryKeyProperty == null ? EMPTY_STR : primaryKeyProperty));
/* For MultiRowOptions */
Text.writeString(out,
(fieldRangeProperty == null ? EMPTY_STR : fieldRangeProperty));
/* For TableIteratorOptions */
Text.writeString(out, (direction == null ?
EMPTY_STR : direction.name()));
writeBytes(out, (consistency == null ?
null :
consistency.toByteArray()));
out.writeLong(timeout);
Text.writeString(out, (timeoutUnit == null ?
EMPTY_STR : timeoutUnit.name()));
out.writeInt(maxRequests);
out.writeInt(batchSize);
out.writeInt(maxBatches);
out.writeInt(partitionSets.size());
for (Set partitions : partitionSets) {
out.writeInt(partitions.size());
for (Integer p : partitions) {
out.writeInt(p);
}
}
out.writeInt(queryBy);
out.writeInt(shardSet.size());
for (RepGroupId repGroupId : shardSet) {
out.writeInt(repGroupId.getGroupId());
}
/* Serialize empty string rather than null to avoid NPE from Hadoop. */
Text.writeString(out,
(onqlWhereClause == null ? EMPTY_STR : onqlWhereClause));
/*
* Write the name of the login file, the user credentials (username
* and password), and the name of the trust file. If name of the
* login file is null, leave the others null too.
*/
if (loginFlnm == null) {
Text.writeString(out, EMPTY_STR); /* login file name */
Text.writeString(out, EMPTY_STR); /* username */
Text.writeString(out, EMPTY_STR); /* password */
Text.writeString(out, EMPTY_STR); /* trust file name */
} else {
Text.writeString(out, loginFlnm);
Text.writeString(out, pwdCredentials.getUsername());
Text.writeString(
out, String.copyValueOf(pwdCredentials.getPassword()));
Text.writeString(out, trustFlnm);
}
}
/**
* Deserialize the fields of this object from in
.
*
* For efficiency, implementations should attempt to re-use storage in
* the existing object where possible.
*
* @param in DataInput
to deseriablize this object from.
* @throws IOException if an I/O error occurs
*/
@Override
public void readFields(DataInput in)
throws IOException {
final int len = in.readInt();
locations = new String[len];
for (int i = 0; i < len; i++) {
locations[i] = Text.readString(in);
}
final int nHelperHosts = in.readInt();
kvHelperHosts = new String[nHelperHosts];
for (int i = 0; i < nHelperHosts; i++) {
kvHelperHosts[i] = Text.readString(in);
}
kvStore = Text.readString(in);
tableName = Text.readString(in);
primaryKeyProperty = Text.readString(in);
if (EMPTY_STR.equals(primaryKeyProperty)) {
primaryKeyProperty = null;
}
/* For MultiRowOptions */
fieldRangeProperty = Text.readString(in);
if (EMPTY_STR.equals(fieldRangeProperty)) {
fieldRangeProperty = null;
}
/* For TableIteratorOptions */
final String dirStr = Text.readString(in);
if (dirStr == null || EMPTY_STR.equals(dirStr)) {
direction = Direction.UNORDERED;
} else {
direction = Direction.valueOf(dirStr);
}
final byte[] consBytes = readBytes(in);
if (consBytes == null) {
consistency = null;
} else {
consistency = Consistency.fromByteArray(consBytes);
}
timeout = in.readLong();
final String tuStr = Text.readString(in);
if (tuStr == null || EMPTY_STR.equals(tuStr)) {
timeoutUnit = null;
} else {
timeoutUnit = TimeUnit.valueOf(tuStr);
}
maxRequests = in.readInt();
batchSize = in.readInt();
maxBatches = in.readInt();
int nSets = in.readInt();
partitionSets = new ArrayList>(nSets);
while (nSets > 0) {
int nPartitions = in.readInt();
final Set partitions = new HashSet(nPartitions);
partitionSets.add(partitions);
while (nPartitions > 0) {
partitions.add(in.readInt());
nPartitions--;
}
nSets--;
}
queryBy = in.readInt();
final int nShards = in.readInt();
shardSet = new HashSet(nShards);
for (int i = 0; i < nShards; i++) {
shardSet.add(new RepGroupId(in.readInt()));
}
/* Null where clause was serialized as empty string to avoid NPE. */
onqlWhereClause = Text.readString(in);
if (EMPTY_STR.equals(onqlWhereClause)) {
onqlWhereClause = null;
}
/*
* Read the name of the login file, the user credentials (username
* and password, and the name of the trust file.
*/
loginFlnm = Text.readString(in);
if (EMPTY_STR.equals(loginFlnm)) {
loginFlnm = null;
}
String userName = Text.readString(in);
if (EMPTY_STR.equals(userName)) {
userName = null;
}
char[] userPassword = null;
final String userPasswordStr = Text.readString(in);
if (userPasswordStr != null && !(EMPTY_STR.equals(userPasswordStr)) &&
userName != null) {
userPassword = userPasswordStr.toCharArray();
pwdCredentials = new PasswordCredentials(userName, userPassword);
}
trustFlnm = Text.readString(in);
if (EMPTY_STR.equals(trustFlnm)) {
trustFlnm = null;
}
}
private void writeBytes(DataOutput out, byte[] bytes)
throws IOException {
if (bytes == null) {
out.writeInt(0);
return;
}
out.writeInt(bytes.length);
out.write(bytes);
}
private byte[] readBytes(DataInput in)
throws IOException {
final int len = in.readInt();
if (len == 0) {
return null;
}
final byte[] ret = new byte[len];
in.readFully(ret);
return ret;
}
/*
* A well-defined equals, hashCode, and toString method should be provided
* so that instances of this class can be compared, uniquely identified,
* and stored in collections. These methods are required to support
* Hive integration.
*/
@Override
public boolean equals(Object obj) {
if (!(obj instanceof TableInputSplit)) {
return false;
}
if (obj == this) {
return true;
}
final TableInputSplit obj1 = this;
final TableInputSplit obj2 = (TableInputSplit) obj;
final String obj2KvStoreName = obj2.getKVStoreName();
if (obj1.kvStore == null || obj2KvStoreName == null) {
return false;
}
if (!obj1.kvStore.equals(obj2KvStoreName)) {
return false;
}
if (obj1.locations == null || obj2.getLocations() == null) {
return false;
}
final List obj1Locations = Arrays.asList(obj1.locations);
final List obj2Locations = Arrays.asList(obj2.getLocations());
if (obj1Locations.size() != obj2Locations.size()) {
return false;
}
if (!obj1Locations.containsAll(obj2Locations)) {
return false;
}
if (obj1.kvHelperHosts == null || obj2.getKVHelperHosts() == null) {
return false;
}
final List obj1KvHelperHosts =
Arrays.asList(obj1.kvHelperHosts);
final List obj2KvHelperHosts =
Arrays.asList(obj2.getKVHelperHosts());
if (obj1KvHelperHosts.size() != obj2KvHelperHosts.size()) {
return false;
}
if (!obj1KvHelperHosts.containsAll(obj2KvHelperHosts)) {
return false;
}
if (obj1.tableName != null) {
if (!obj1.tableName.equals(obj2.getTableName())) {
return false;
}
}
if (obj1.primaryKeyProperty != null) {
if (!obj1.primaryKeyProperty.equals(
obj2.getPrimaryKeyProperty())) {
return false;
}
}
if (obj1.fieldRangeProperty != null) {
if (!obj1.fieldRangeProperty.equals(
obj2.getFieldRangeProperty())) {
return false;
}
}
if (obj1.queryBy != obj2.queryBy) {
return false;
}
/* Compare shard sets for all iteration types. */
if (obj1.shardSet != null) {
if (!obj1.shardSet.containsAll(obj2.shardSet)) {
return false;
}
} else {
if (obj2.shardSet != null) {
return false;
}
}
/* Compare partition sets for all iteration types. */
if (obj1.partitionSets != null) {
if (!obj1.partitionSets.containsAll(obj2.partitionSets)) {
return false;
}
} else {
if (obj2.partitionSets != null) {
return false;
}
}
/* Compare predicates only if iteration is native based. */
if (queryBy != QUERY_BY_PRIMARY_ALL_PARTITIONS &&
queryBy != QUERY_BY_PRIMARY_SINGLE_PARTITION &&
queryBy != QUERY_BY_INDEX) {
if (obj1.onqlWhereClause != null) {
if (!obj1.onqlWhereClause.equals(obj2.onqlWhereClause)) {
return false;
}
} else {
if (obj2.onqlWhereClause != null) {
return false;
}
}
}
return true;
}
@Override
public int hashCode() {
final int pm = 37;
int hc = 11;
int hcSum = 0;
if (kvStore != null) {
hcSum = hcSum + kvStore.hashCode();
}
hcSum = hcSum + Arrays.hashCode(locations);
hcSum = hcSum + Arrays.hashCode(kvHelperHosts);
if (tableName != null) {
hcSum = hcSum + tableName.hashCode();
}
if (primaryKeyProperty != null) {
hcSum = hcSum + primaryKeyProperty.hashCode();
}
if (fieldRangeProperty != null) {
hcSum = hcSum + fieldRangeProperty.hashCode();
}
hcSum += queryBy;
if (partitionSets != null) {
hcSum += partitionSets.hashCode();
}
if (shardSet != null) {
hcSum += shardSet.hashCode();
}
/* Predicate contributes to hash only if iteration is native based. */
if (queryBy != QUERY_BY_PRIMARY_ALL_PARTITIONS &&
queryBy != QUERY_BY_PRIMARY_SINGLE_PARTITION &&
queryBy != QUERY_BY_INDEX) {
if (onqlWhereClause != null) {
hcSum += onqlWhereClause.hashCode();
}
}
hc = (pm * hc) + hcSum;
return hc;
}
@Override
public String toString() {
final StringBuilder buf =
new StringBuilder(this.getClass().getSimpleName());
buf.append(": [store=");
buf.append(kvStore);
buf.append(", dataNodeLocations=");
if (locations != null) {
buf.append(Arrays.asList(locations));
} else {
buf.append("null");
}
buf.append(", kvStoreHosts=");
if (kvHelperHosts != null) {
buf.append(Arrays.asList(kvHelperHosts));
} else {
buf.append("null");
}
if (tableName == null) {
buf.append(", tableName=NOT_SET_YET");
} else {
buf.append(", tableName=");
buf.append(tableName);
}
if (primaryKeyProperty != null) {
buf.append(", primaryKeyProperty=");
buf.append(primaryKeyProperty);
}
if (fieldRangeProperty != null) {
buf.append(", fieldRangeProperty=");
buf.append(fieldRangeProperty);
}
if (queryBy == QUERY_BY_INDEX) {
buf.append(", IndexKeyIteration");
} else if (queryBy == QUERY_BY_PRIMARY_ALL_PARTITIONS) {
buf.append(", PrimaryKeyIterationAllPartitions");
} else if (queryBy == QUERY_BY_PRIMARY_SINGLE_PARTITION) {
buf.append(", PrimaryKeyIterationSinglePartition");
} else {
if (queryBy == QUERY_BY_ONQL_ALL_PARTITIONS) {
buf.append(", NativeIterationAllPartitions");
} else if (queryBy == QUERY_BY_ONQL_SINGLE_PARTITION) {
buf.append(", NativeIterationSinglePartition");
} else if (queryBy == QUERY_BY_ONQL_SHARDS) {
buf.append(", NativeIterationByShards");
} else {
buf.append(", NativeIterationDefault");
}
buf.append(" [predicate=" + onqlWhereClause + "]");
}
if (shardSet == null) {
buf.append(", shardSet=null");
} else {
buf.append(", shardSet=");
buf.append(shardSet);
}
if (partitionSets == null) {
buf.append(", partitionSets=null");
} else {
/*
* The list of sets of partition ids can get quite large. Only
* display the whole list if its size is reasonably small;
* otherwise, elide most of the elements.
*/
final int maxListSize = 5;
final int maxSetSize = 3;
buf.append(", partitionSets=[");
/* Display only the 1st maxListSize sets in the list. */
int i = 0;
for (Set partitionSet : partitionSets) {
if (i >= maxListSize) {
buf.append(", ...");
break;
}
if (i > 0) {
buf.append(", [");
} else {
buf.append("[");
}
i++;
bufPartitionIdSets(partitionSet, maxSetSize, buf);
buf.append("]");
}
buf.append("]");
}
buf.append("]");
return buf.toString();
}
/*
* Convenience method that encapsulates functionality employed in toString
* above. This method will append the partition ids contained in the given
* partitionSet to the given StringBuilder buffer in a way appropriate for
* logging or display. But if the number of elements in the given set
* exceed the given maxSetSize, then only the first maxSetSize elements
* will be appended to the buffer.
*/
private void bufPartitionIdSets(Set partitionSet, int maxSetSize,
StringBuilder buf) {
/* Display only 1st maxSetSize elements of the set. */
int i = 0;
for (Integer partitionId : partitionSet) {
if (i >= maxSetSize) {
buf.append(", ...");
break;
}
if (i > 0) {
buf.append(", " + partitionId);
} else {
buf.append(partitionId);
}
i++;
}
}
}