All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.io.orc.OrcInputFormat Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.io.orc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ValidReadTxnList;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
 * A MapReduce/Hive input format for ORC files.
 * 

* This class implements both the classic InputFormat, which stores the rows * directly, and AcidInputFormat, which stores a series of events with the * following schema: *

 *   class AcidEvent<ROW> {
 *     enum ACTION {INSERT, UPDATE, DELETE}
 *     ACTION operation;
 *     long originalTransaction;
 *     int bucket;
 *     long rowId;
 *     long currentTransaction;
 *     ROW row;
 *   }
 * 
* Each AcidEvent object corresponds to an update event. The * originalTransaction, bucket, and rowId are the unique identifier for the row. * The operation and currentTransaction are the operation and the transaction * that added this event. Insert and update events include the entire row, while * delete events have null for row. */ public class OrcInputFormat implements InputFormat, InputFormatChecker, VectorizedInputFormatInterface, AcidInputFormat, CombineHiveInputFormat.AvoidSplitCombination { static enum SplitStrategyKind{ HYBRID, BI, ETL } private static final Log LOG = LogFactory.getLog(OrcInputFormat.class); private static boolean isDebugEnabled = LOG.isDebugEnabled(); static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); static final String MIN_SPLIT_SIZE = SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE"); static final String MAX_SPLIT_SIZE = SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE"); private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); private static final String CLASS_NAME = ReaderImpl.class.getName(); /** * When picking the hosts for a split that crosses block boundaries, * any drop any host that has fewer than MIN_INCLUDED_LOCATION of the * number of bytes available on the host with the most. * If host1 has 10MB of the split, host2 has 20MB, and host3 has 18MB the * split will contain host2 (100% of host2) and host3 (90% of host2). Host1 * with 50% will be dropped. */ private static final double MIN_INCLUDED_LOCATION = 0.80; @Override public boolean shouldSkipCombine(Path path, Configuration conf) throws IOException { return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf); } private static class OrcRecordReader implements org.apache.hadoop.mapred.RecordReader, StatsProvidingRecordReader { private final RecordReader reader; private final long offset; private final long length; private final int numColumns; private float progress = 0.0f; private final Reader file; private final SerDeStats stats; OrcRecordReader(Reader file, Configuration conf, FileSplit split) throws IOException { List types = file.getTypes(); this.file = file; numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); this.offset = split.getStart(); this.length = split.getLength(); this.reader = createReaderFromFile(file, conf, offset, length); this.stats = new SerDeStats(); } @Override public boolean next(NullWritable key, OrcStruct value) throws IOException { if (reader.hasNext()) { reader.next(value); progress = reader.getProgress(); return true; } else { return false; } } @Override public NullWritable createKey() { return NullWritable.get(); } @Override public OrcStruct createValue() { return new OrcStruct(numColumns); } @Override public long getPos() throws IOException { return offset + (long) (progress * length); } @Override public void close() throws IOException { reader.close(); } @Override public float getProgress() throws IOException { return progress; } @Override public SerDeStats getStats() { stats.setRawDataSize(file.getRawDataSize()); stats.setRowCount(file.getNumberOfRows()); return stats; } } /** * Get the root column for the row. In ACID format files, it is offset by * the extra metadata columns. * @param isOriginal is the file in the original format? * @return the column number for the root of row. */ private static int getRootColumn(boolean isOriginal) { return isOriginal ? 0 : (OrcRecordUpdater.ROW + 1); } public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { Reader.Options options = new Reader.Options().range(offset, length); boolean isOriginal = isOriginal(file); List types = file.getTypes(); options.include(genIncludedColumns(types, conf, isOriginal)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options); } public static boolean isOriginal(Reader file) { return !file.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME); } /** * Recurse down into a type subtree turning on all of the sub-columns. * @param types the types of the file * @param result the global view of columns that should be included * @param typeId the root of tree to enable * @param rootColumn the top column */ private static void includeColumnRecursive(List types, boolean[] result, int typeId, int rootColumn) { result[typeId - rootColumn] = true; OrcProto.Type type = types.get(typeId); int children = type.getSubtypesCount(); for(int i=0; i < children; ++i) { includeColumnRecursive(types, result, type.getSubtypes(i), rootColumn); } } public static boolean[] genIncludedColumns( List types, List included, boolean isOriginal) { int rootColumn = getRootColumn(isOriginal); int numColumns = types.size() - rootColumn; boolean[] result = new boolean[numColumns]; result[0] = true; OrcProto.Type root = types.get(rootColumn); for(int i=0; i < root.getSubtypesCount(); ++i) { if (included.contains(i)) { includeColumnRecursive(types, result, root.getSubtypes(i), rootColumn); } } return result; } /** * Take the configuration and figure out which columns we need to include. * @param types the types for the file * @param conf the configuration * @param isOriginal is the file in the original format? */ public static boolean[] genIncludedColumns( List types, Configuration conf, boolean isOriginal) { if (!ColumnProjectionUtils.isReadAllColumns(conf)) { List included = ColumnProjectionUtils.getReadColumnIDs(conf); return genIncludedColumns(types, included, isOriginal); } else { return null; } } public static String[] getSargColumnNames(String[] originalColumnNames, List types, boolean[] includedColumns, boolean isOriginal) { int rootColumn = getRootColumn(isOriginal); String[] columnNames = new String[types.size() - rootColumn]; int i = 0; for(int columnId: types.get(rootColumn).getSubtypesList()) { if (includedColumns == null || includedColumns[columnId - rootColumn]) { // this is guaranteed to be positive because types only have children // ids greater than their own id. columnNames[columnId - rootColumn] = originalColumnNames[i++]; } } return columnNames; } static void setSearchArgument(Reader.Options options, List types, Configuration conf, boolean isOriginal) { String columnNamesString = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); if (columnNamesString == null) { LOG.debug("No ORC pushdown predicate - no column names"); options.searchArgument(null, null); return; } SearchArgument sarg = SearchArgumentFactory.createFromConf(conf); if (sarg == null) { LOG.debug("No ORC pushdown predicate"); options.searchArgument(null, null); return; } LOG.info("ORC pushdown predicate: " + sarg); options.searchArgument(sarg, getSargColumnNames( columnNamesString.split(","), types, options.getInclude(), isOriginal)); } @Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files ) throws IOException { if (Utilities.isVectorMode(conf)) { return new VectorizedOrcInputFormat().validateInput(fs, conf, files); } if (files.size() <= 0) { return false; } for (FileStatus file : files) { try { OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { return false; } } return true; } /** * Get the list of input {@link Path}s for the map-reduce job. * * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */ static Path[] getInputPaths(Configuration conf) throws IOException { String dirs = conf.get("mapred.input.dir"); if (dirs == null) { throw new IOException("Configuration mapred.input.dir is not defined."); } String [] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; } /** * The global information about the split generation that we pass around to * the different worker threads. */ static class Context { private final Configuration conf; private static Cache footerCache; private static ExecutorService threadPool = null; private final int numBuckets; private final long maxSize; private final long minSize; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final AtomicInteger cacheHitCounter = new AtomicInteger(0); private final AtomicInteger numFilesCounter = new AtomicInteger(0); private ValidTxnList transactionList; private SplitStrategyKind splitStrategyKind; Context(Configuration conf) { this.conf = conf; minSize = conf.getLong(MIN_SPLIT_SIZE, DEFAULT_MIN_SPLIT_SIZE); maxSize = conf.getLong(MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE); String ss = conf.get(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname); if (ss == null || ss.equals(SplitStrategyKind.HYBRID.name())) { splitStrategyKind = SplitStrategyKind.HYBRID; } else { LOG.info("Enforcing " + ss + " ORC split strategy"); splitStrategyKind = SplitStrategyKind.valueOf(ss); } footerInSplits = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS); numBuckets = Math.max(conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0), 0); LOG.debug("Number of buckets specified by conf file is " + numBuckets); int cacheStripeDetailsSize = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE); int numThreads = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS); cacheStripeDetails = (cacheStripeDetailsSize > 0); synchronized (Context.class) { if (threadPool == null) { threadPool = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("ORC_GET_SPLITS #%d").build()); } if (footerCache == null && cacheStripeDetails) { footerCache = CacheBuilder.newBuilder().concurrencyLevel(numThreads) .initialCapacity(cacheStripeDetailsSize).softValues().build(); } } String value = conf.get(ValidTxnList.VALID_TXNS_KEY, Long.MAX_VALUE + ":"); transactionList = new ValidReadTxnList(value); } } interface SplitStrategy { List getSplits() throws IOException; } static final class SplitInfo extends ACIDSplitStrategy { private final Context context; private final FileSystem fs; private final FileStatus file; private final FileInfo fileInfo; private final boolean isOriginal; private final List deltas; private final boolean hasBase; SplitInfo(Context context, FileSystem fs, FileStatus file, FileInfo fileInfo, boolean isOriginal, List deltas, boolean hasBase, Path dir, boolean[] covered) throws IOException { super(dir, context.numBuckets, deltas, covered); this.context = context; this.fs = fs; this.file = file; this.fileInfo = fileInfo; this.isOriginal = isOriginal; this.deltas = deltas; this.hasBase = hasBase; } } /** * ETL strategy is used when spending little more time in split generation is acceptable * (split generation reads and caches file footers). */ static final class ETLSplitStrategy implements SplitStrategy { Context context; FileSystem fs; List files; boolean isOriginal; List deltas; Path dir; boolean[] covered; public ETLSplitStrategy(Context context, FileSystem fs, Path dir, List children, boolean isOriginal, List deltas, boolean[] covered) { this.context = context; this.dir = dir; this.fs = fs; this.files = children; this.isOriginal = isOriginal; this.deltas = deltas; this.covered = covered; } private FileInfo verifyCachedFileInfo(FileStatus file) { context.numFilesCounter.incrementAndGet(); FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath()); if (fileInfo != null) { if (LOG.isDebugEnabled()) { LOG.debug("Info cached for path: " + file.getPath()); } if (fileInfo.modificationTime == file.getModificationTime() && fileInfo.size == file.getLen()) { // Cached copy is valid context.cacheHitCounter.incrementAndGet(); return fileInfo; } else { // Invalidate Context.footerCache.invalidate(file.getPath()); if (LOG.isDebugEnabled()) { LOG.debug("Meta-Info for : " + file.getPath() + " changed. CachedModificationTime: " + fileInfo.modificationTime + ", CurrentModificationTime: " + file.getModificationTime() + ", CachedLength: " + fileInfo.size + ", CurrentLength: " + file.getLen()); } } } else { if (LOG.isDebugEnabled()) { LOG.debug("Info not cached for path: " + file.getPath()); } } return null; } @Override public List getSplits() throws IOException { List result = Lists.newArrayList(); for (FileStatus file : files) { FileInfo info = null; if (context.cacheStripeDetails) { info = verifyCachedFileInfo(file); } // ignore files of 0 length if (file.getLen() > 0) { result.add(new SplitInfo(context, fs, file, info, isOriginal, deltas, true, dir, covered)); } } return result; } @Override public String toString() { return ETLSplitStrategy.class.getSimpleName() + " strategy for " + dir; } } /** * BI strategy is used when the requirement is to spend less time in split generation * as opposed to query execution (split generation does not read or cache file footers). */ static final class BISplitStrategy extends ACIDSplitStrategy { List fileStatuses; boolean isOriginal; List deltas; FileSystem fs; Context context; Path dir; public BISplitStrategy(Context context, FileSystem fs, Path dir, List fileStatuses, boolean isOriginal, List deltas, boolean[] covered) { super(dir, context.numBuckets, deltas, covered); this.context = context; this.fileStatuses = fileStatuses; this.isOriginal = isOriginal; this.deltas = deltas; this.fs = fs; this.dir = dir; } @Override public List getSplits() throws IOException { List splits = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { String[] hosts = SHIMS.getLocationsWithOffset(fs, fileStatus).firstEntry().getValue() .getHosts(); OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts, null, isOriginal, true, deltas, -1); splits.add(orcSplit); } // add uncovered ACID delta splits splits.addAll(super.getSplits()); return splits; } @Override public String toString() { return BISplitStrategy.class.getSimpleName() + " strategy for " + dir; } } /** * ACID split strategy is used when there is no base directory (when transactions are enabled). */ static class ACIDSplitStrategy implements SplitStrategy { Path dir; List deltas; boolean[] covered; int numBuckets; public ACIDSplitStrategy(Path dir, int numBuckets, List deltas, boolean[] covered) { this.dir = dir; this.numBuckets = numBuckets; this.deltas = deltas; this.covered = covered; } @Override public List getSplits() throws IOException { // Generate a split for any buckets that weren't covered. // This happens in the case where a bucket just has deltas and no // base. List splits = Lists.newArrayList(); if (!deltas.isEmpty()) { for (int b = 0; b < numBuckets; ++b) { if (!covered[b]) { splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1)); } } } return splits; } @Override public String toString() { return ACIDSplitStrategy.class.getSimpleName() + " strategy for " + dir; } } /** * Given a directory, get the list of files and blocks in those files. * To parallelize file generator use "mapreduce.input.fileinputformat.list-status.num-threads" */ static final class FileGenerator implements Callable { private final Context context; private final FileSystem fs; private final Path dir; FileGenerator(Context context, FileSystem fs, Path dir) { this.context = context; this.fs = fs; this.dir = dir; } @Override public SplitStrategy call() throws IOException { final SplitStrategy splitStrategy; AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, context.conf, context.transactionList); List deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories()); Path base = dirInfo.getBaseDirectory(); List original = dirInfo.getOriginalFiles(); boolean[] covered = new boolean[context.numBuckets]; boolean isOriginal = base == null; // if we have a base to work from if (base != null || !original.isEmpty()) { // find the base files (original or new style) List children = original; if (base != null) { children = SHIMS.listLocatedStatus(fs, base, AcidUtils.hiddenFileFilter); } long totalFileSize = 0; for (FileStatus child : children) { totalFileSize += child.getLen(); AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename (child.getPath(), context.conf); int b = opts.getBucket(); // If the bucket is in the valid range, mark it as covered. // I wish Hive actually enforced bucketing all of the time. if (b >= 0 && b < covered.length) { covered[b] = true; } } int numFiles = children.size(); long avgFileSize = totalFileSize / numFiles; switch(context.splitStrategyKind) { case BI: // BI strategy requested through config splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); break; case ETL: // ETL strategy requested through config splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); break; default: // HYBRID strategy if (avgFileSize > context.maxSize) { splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); } else { splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); } break; } } else { // no base, only deltas splitStrategy = new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered); } return splitStrategy; } } /** * Split the stripes of a given file into input splits. * A thread is used for each file. */ static final class SplitGenerator implements Callable> { private final Context context; private final FileSystem fs; private final FileStatus file; private final long blockSize; private final TreeMap locations; private final FileInfo fileInfo; private List stripes; private ReaderImpl.FileMetaInfo fileMetaInfo; private Metadata metadata; private List types; private final boolean isOriginal; private final List deltas; private final boolean hasBase; private OrcFile.WriterVersion writerVersion; private long projColsUncompressedSize; private List deltaSplits; public SplitGenerator(SplitInfo splitInfo) throws IOException { this.context = splitInfo.context; this.fs = splitInfo.fs; this.file = splitInfo.file; this.blockSize = file.getBlockSize(); this.fileInfo = splitInfo.fileInfo; locations = SHIMS.getLocationsWithOffset(fs, file); this.isOriginal = splitInfo.isOriginal; this.deltas = splitInfo.deltas; this.hasBase = splitInfo.hasBase; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); } Path getPath() { return file.getPath(); } @Override public String toString() { return "splitter(" + file.getPath() + ")"; } /** * Compute the number of bytes that overlap between the two ranges. * @param offset1 start of range1 * @param length1 length of range1 * @param offset2 start of range2 * @param length2 length of range2 * @return the number of bytes in the overlap range */ static long getOverlap(long offset1, long length1, long offset2, long length2) { long end1 = offset1 + length1; long end2 = offset2 + length2; if (end2 <= offset1 || end1 <= offset2) { return 0; } else { return Math.min(end1, end2) - Math.max(offset1, offset2); } } /** * Create an input split over the given range of bytes. The location of the * split is based on where the majority of the byte are coming from. ORC * files are unlikely to have splits that cross between blocks because they * are written with large block sizes. * @param offset the start of the split * @param length the length of the split * @param fileMetaInfo file metadata from footer and postscript * @throws IOException */ OrcSplit createSplit(long offset, long length, ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException { String[] hosts; Map.Entry startEntry = locations.floorEntry(offset); BlockLocation start = startEntry.getValue(); if (offset + length <= start.getOffset() + start.getLength()) { // handle the single block case hosts = start.getHosts(); } else { Map.Entry endEntry = locations.floorEntry(offset + length); BlockLocation end = endEntry.getValue(); //get the submap NavigableMap navigableMap = locations.subMap(startEntry.getKey(), true, endEntry.getKey(), true); // Calculate the number of bytes in the split that are local to each // host. Map sizes = new HashMap(); long maxSize = 0; for (BlockLocation block : navigableMap.values()) { long overlap = getOverlap(offset, length, block.getOffset(), block.getLength()); if (overlap > 0) { for(String host: block.getHosts()) { LongWritable val = sizes.get(host); if (val == null) { val = new LongWritable(); sizes.put(host, val); } val.set(val.get() + overlap); maxSize = Math.max(maxSize, val.get()); } } else { throw new IOException("File " + file.getPath().toString() + " should have had overlap on block starting at " + block.getOffset()); } } // filter the list of locations to those that have at least 80% of the // max long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION); List hostList = new ArrayList(); // build the locations in a predictable order to simplify testing for(BlockLocation block: navigableMap.values()) { for(String host: block.getHosts()) { if (sizes.containsKey(host)) { if (sizes.get(host).get() >= threshold) { hostList.add(host); } sizes.remove(host); } } } hosts = new String[hostList.size()]; hostList.toArray(hosts); } return new OrcSplit(file.getPath(), offset, length, hosts, fileMetaInfo, isOriginal, hasBase, deltas, projColsUncompressedSize); } /** * Divide the adjacent stripes in the file into input splits based on the * block size and the configured minimum and maximum sizes. */ @Override public List call() throws IOException { populateAndCacheStripeDetails(); List splits = Lists.newArrayList(); // figure out which stripes we need to read boolean[] includeStripe = null; // we can't eliminate stripes if there are deltas because the // deltas may change the rows making them match the predicate. if (deltas.isEmpty()) { Reader.Options options = new Reader.Options(); options.include(genIncludedColumns(types, context.conf, isOriginal)); setSearchArgument(options, types, context.conf, isOriginal); // only do split pruning if HIVE-8732 has been fixed in the writer if (options.getSearchArgument() != null && writerVersion != OrcFile.WriterVersion.ORIGINAL) { SearchArgument sarg = options.getSearchArgument(); List sargLeaves = sarg.getLeaves(); List stripeStats = metadata.getStripeStatistics(); int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves, options.getColumnNames(), getRootColumn(isOriginal)); if (stripeStats != null) { // eliminate stripes that doesn't satisfy the predicate condition includeStripe = new boolean[stripes.size()]; for (int i = 0; i < stripes.size(); ++i) { includeStripe[i] = (i >= stripeStats.size()) || isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns); if (LOG.isDebugEnabled() && !includeStripe[i]) { LOG.debug("Eliminating ORC stripe-" + i + " of file '" + file.getPath() + "' as it did not satisfy " + "predicate condition."); } } } } } // if we didn't have predicate pushdown, read everything if (includeStripe == null) { includeStripe = new boolean[stripes.size()]; Arrays.fill(includeStripe, true); } long currentOffset = -1; long currentLength = 0; int idx = -1; for (StripeInformation stripe : stripes) { idx++; if (!includeStripe[idx]) { // create split for the previous unfinished stripe if (currentOffset != -1) { splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); currentOffset = -1; } continue; } // if we are working on a stripe, over the min stripe size, and // crossed a block boundary, cut the input split here. if (currentOffset != -1 && currentLength > context.minSize && (currentOffset / blockSize != stripe.getOffset() / blockSize)) { splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); currentOffset = -1; } // if we aren't building a split, start a new one. if (currentOffset == -1) { currentOffset = stripe.getOffset(); currentLength = stripe.getLength(); } else { currentLength = (stripe.getOffset() + stripe.getLength()) - currentOffset; } if (currentLength >= context.maxSize) { splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); currentOffset = -1; } } if (currentOffset != -1) { splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); } // add uncovered ACID delta splits splits.addAll(deltaSplits); return splits; } private void populateAndCacheStripeDetails() throws IOException { Reader orcReader = OrcFile.createReader(file.getPath(), OrcFile.readerOptions(context.conf).filesystem(fs)); List projCols = ColumnProjectionUtils.getReadColumnNames(context.conf); projColsUncompressedSize = orcReader.getRawDataSizeOfColumns(projCols); if (fileInfo != null) { stripes = fileInfo.stripeInfos; fileMetaInfo = fileInfo.fileMetaInfo; metadata = fileInfo.metadata; types = fileInfo.types; writerVersion = fileInfo.writerVersion; // For multiple runs, in case sendSplitsInFooter changes if (fileMetaInfo == null && context.footerInSplits) { fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo(); fileInfo.metadata = orcReader.getMetadata(); fileInfo.types = orcReader.getTypes(); fileInfo.writerVersion = orcReader.getWriterVersion(); } } else { stripes = orcReader.getStripes(); metadata = orcReader.getMetadata(); types = orcReader.getTypes(); writerVersion = orcReader.getWriterVersion(); fileMetaInfo = context.footerInSplits ? ((ReaderImpl) orcReader).getFileMetaInfo() : null; if (context.cacheStripeDetails) { // Populate into cache. Context.footerCache.put(file.getPath(), new FileInfo(file.getModificationTime(), file.getLen(), stripes, metadata, types, fileMetaInfo, writerVersion)); } } } private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns) { List predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); } } static List generateSplitsInfo(Configuration conf) throws IOException { // use threads to resolve directories into splits Context context = new Context(conf); List splits = Lists.newArrayList(); List> pathFutures = Lists.newArrayList(); List> splitFutures = Lists.newArrayList(); // multi-threaded file statuses and split strategy for (Path dir : getInputPaths(conf)) { FileSystem fs = dir.getFileSystem(conf); FileGenerator fileGenerator = new FileGenerator(context, fs, dir); pathFutures.add(context.threadPool.submit(fileGenerator)); } // complete path futures and schedule split generation try { for (Future pathFuture : pathFutures) { SplitStrategy splitStrategy = (SplitStrategy) pathFuture.get(); if (isDebugEnabled) { LOG.debug(splitStrategy); } if (splitStrategy instanceof ETLSplitStrategy) { List splitInfos = splitStrategy.getSplits(); for (SplitInfo splitInfo : splitInfos) { splitFutures.add(context.threadPool.submit(new SplitGenerator(splitInfo))); } } else { splits.addAll(splitStrategy.getSplits()); } } // complete split futures for (Future splitFuture : splitFutures) { splits.addAll((Collection) splitFuture.get()); } } catch (Exception e) { cancelFutures(pathFutures); cancelFutures(splitFutures); throw new RuntimeException("serious problem", e); } if (context.cacheStripeDetails) { LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + context.numFilesCounter.get()); } if (isDebugEnabled) { for (OrcSplit split : splits) { LOG.debug(split + " projected_columns_uncompressed_size: " + split.getProjectedColumnsUncompressedSize()); } } return splits; } private static void cancelFutures(List> futures) { for (Future future : futures) { future.cancel(true); } } @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); List result = generateSplitsInfo(job); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); return result.toArray(new InputSplit[result.size()]); } /** * FileInfo. * * Stores information relevant to split generation for an ORC File. * */ private static class FileInfo { long modificationTime; long size; List stripeInfos; ReaderImpl.FileMetaInfo fileMetaInfo; Metadata metadata; List types; private OrcFile.WriterVersion writerVersion; FileInfo(long modificationTime, long size, List stripeInfos, Metadata metadata, List types, ReaderImpl.FileMetaInfo fileMetaInfo, OrcFile.WriterVersion writerVersion) { this.modificationTime = modificationTime; this.size = size; this.stripeInfos = stripeInfos; this.fileMetaInfo = fileMetaInfo; this.metadata = metadata; this.types = types; this.writerVersion = writerVersion; } } @SuppressWarnings("unchecked") private org.apache.hadoop.mapred.RecordReader createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter ) throws IOException { return (org.apache.hadoop.mapred.RecordReader) new VectorizedOrcInputFormat().getRecordReader(split, conf, reporter); } @Override public org.apache.hadoop.mapred.RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { boolean vectorMode = Utilities.isVectorMode(conf); // if HiveCombineInputFormat gives us FileSplits instead of OrcSplits, // we know it is not ACID. (see a check in CombineHiveInputFormat.getSplits() that assures this) if (inputSplit.getClass() == FileSplit.class) { if (vectorMode) { return createVectorizedReader(inputSplit, conf, reporter); } return new OrcRecordReader(OrcFile.createReader( ((FileSplit) inputSplit).getPath(), OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit); } OrcSplit split = (OrcSplit) inputSplit; reporter.setStatus(inputSplit.toString()); Options options = new Options(conf).reporter(reporter); final RowReader inner = getReader(inputSplit, options); /*Even though there are no delta files, we still need to produce row ids so that an * UPDATE or DELETE statement would work on a table which didn't have any previous updates*/ if (split.isOriginal() && split.getDeltas().isEmpty()) { if (vectorMode) { return createVectorizedReader(inputSplit, conf, reporter); } else { return new NullKeyRecordReader(inner, conf); } } if (vectorMode) { return (org.apache.hadoop.mapred.RecordReader) new VectorizedOrcAcidRowReader(inner, conf, (FileSplit) inputSplit); } return new NullKeyRecordReader(inner, conf); } /** * Return a RecordReader that is compatible with the Hive 0.12 reader * with NullWritable for the key instead of RecordIdentifier. */ public static final class NullKeyRecordReader implements AcidRecordReader { private final RecordIdentifier id; private final RowReader inner; public RecordIdentifier getRecordIdentifier() { return id; } private NullKeyRecordReader(RowReader inner, Configuration conf) { this.inner = inner; id = inner.createKey(); } @Override public boolean next(NullWritable nullWritable, OrcStruct orcStruct) throws IOException { return inner.next(id, orcStruct); } @Override public NullWritable createKey() { return NullWritable.get(); } @Override public OrcStruct createValue() { return inner.createValue(); } @Override public long getPos() throws IOException { return inner.getPos(); } @Override public void close() throws IOException { inner.close(); } @Override public float getProgress() throws IOException { return inner.getProgress(); } } @Override public RowReader getReader(InputSplit inputSplit, Options options) throws IOException { final OrcSplit split = (OrcSplit) inputSplit; final Path path = split.getPath(); Path root; if (split.hasBase()) { if (split.isOriginal()) { root = path.getParent(); } else { root = path.getParent().getParent(); } } else { root = path; } final Path[] deltas = AcidUtils.deserializeDeltas(root, split.getDeltas()); final Configuration conf = options.getConfiguration(); final Reader reader; final int bucket; Reader.Options readOptions = new Reader.Options(); readOptions.range(split.getStart(), split.getLength()); if (split.hasBase()) { bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf) .getBucket(); reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); final List types = reader.getTypes(); readOptions.include(genIncludedColumns(types, conf, split.isOriginal())); setSearchArgument(readOptions, types, conf, split.isOriginal()); } else { bucket = (int) split.getStart(); reader = null; } String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY, Long.MAX_VALUE + ":"); ValidTxnList validTxnList = new ValidReadTxnList(txnString); final OrcRawRecordMerger records = new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, validTxnList, readOptions, deltas); return new RowReader() { OrcStruct innerRecord = records.createValue(); @Override public ObjectInspector getObjectInspector() { return ((StructObjectInspector) records.getObjectInspector()) .getAllStructFieldRefs().get(OrcRecordUpdater.ROW) .getFieldObjectInspector(); } @Override public boolean next(RecordIdentifier recordIdentifier, OrcStruct orcStruct) throws IOException { boolean result; // filter out the deleted records do { result = records.next(recordIdentifier, innerRecord); } while (result && OrcRecordUpdater.getOperation(innerRecord) == OrcRecordUpdater.DELETE_OPERATION); if (result) { // swap the fields with the passed in orcStruct orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord)); } return result; } @Override public RecordIdentifier createKey() { return records.createKey(); } @Override public OrcStruct createValue() { return new OrcStruct(records.getColumns()); } @Override public long getPos() throws IOException { return records.getPos(); } @Override public void close() throws IOException { records.close(); } @Override public float getProgress() throws IOException { return records.getProgress(); } }; } static Path findOriginalBucket(FileSystem fs, Path directory, int bucket) throws IOException { for(FileStatus stat: fs.listStatus(directory)) { String name = stat.getPath().getName(); String numberPart = name.substring(0, name.indexOf('_')); if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart) && Integer.parseInt(numberPart) == bucket) { return stat.getPath(); } } throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + directory); } @Override public RawReader getRawReader(Configuration conf, boolean collapseEvents, int bucket, ValidTxnList validTxnList, Path baseDirectory, Path[] deltaDirectory ) throws IOException { Reader reader = null; boolean isOriginal = false; if (baseDirectory != null) { Path bucketFile; if (baseDirectory.getName().startsWith(AcidUtils.BASE_PREFIX)) { bucketFile = AcidUtils.createBucketFile(baseDirectory, bucket); } else { isOriginal = true; bucketFile = findOriginalBucket(baseDirectory.getFileSystem(conf), baseDirectory, bucket); } reader = OrcFile.createReader(bucketFile, OrcFile.readerOptions(conf)); } return new OrcRawRecordMerger(conf, collapseEvents, reader, isOriginal, bucket, validTxnList, new Reader.Options(), deltaDirectory); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy