All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hive.fs.HiveFileIterator Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hive.fs;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.AbstractIterator;
import io.trino.filesystem.Location;
import io.trino.filesystem.TrinoFileSystem;
import io.trino.metastore.Table;
import io.trino.spi.TrinoException;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;

import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND;
import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.FAIL;
import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.RECURSE;
import static java.util.Objects.requireNonNull;

public class HiveFileIterator
        extends AbstractIterator
{
    public enum NestedDirectoryPolicy
    {
        IGNORED,
        RECURSE,
        FAIL
    }

    private final Location location;
    private final NestedDirectoryPolicy nestedDirectoryPolicy;
    private final Iterator remoteIterator;

    public HiveFileIterator(
            Table table,
            Location location,
            TrinoFileSystem fileSystem,
            DirectoryLister directoryLister,
            NestedDirectoryPolicy nestedDirectoryPolicy)
    {
        this.location = requireNonNull(location, "location is null");
        this.nestedDirectoryPolicy = requireNonNull(nestedDirectoryPolicy, "nestedDirectoryPolicy is null");
        this.remoteIterator = new FileStatusIterator(table, location, fileSystem, directoryLister, nestedDirectoryPolicy);
    }

    @Override
    protected TrinoFileStatus computeNext()
    {
        while (remoteIterator.hasNext()) {
            TrinoFileStatus status = remoteIterator.next();

            // Ignore hidden files and directories
            if (nestedDirectoryPolicy == RECURSE) {
                // Search the full sub-path under the listed prefix for hidden directories
                if (isHiddenOrWithinHiddenParentDirectory(Location.of(status.getPath()), location)) {
                    continue;
                }
            }
            else if (isHiddenFileOrDirectory(Location.of(status.getPath()))) {
                continue;
            }

            return status;
        }

        return endOfData();
    }

    @VisibleForTesting
    static boolean isHiddenFileOrDirectory(Location location)
    {
        // Only looks for the last part of the path
        String path = location.path();
        int lastSeparator = path.lastIndexOf('/');
        return containsHiddenPathPartAfterIndex(path, lastSeparator + 1);
    }

    @VisibleForTesting
    static boolean isHiddenOrWithinHiddenParentDirectory(Location path, Location rootLocation)
    {
        String pathString = path.toString();
        String prefix = rootLocation.toString();
        checkArgument(pathString.startsWith(prefix), "path %s does not start with prefix %s", pathString, prefix);
        return containsHiddenPathPartAfterIndex(pathString, prefix.endsWith("/") ? prefix.length() : prefix.length() + 1);
    }

    @VisibleForTesting
    static boolean containsHiddenPathPartAfterIndex(String pathString, int startFromIndex)
    {
        // Ignore hidden files and directories. Hive ignores files starting with _ and . as well.
        while (startFromIndex < pathString.length()) {
            char firstNameChar = pathString.charAt(startFromIndex);
            if (firstNameChar == '.' || firstNameChar == '_') {
                return true;
            }
            int nextSeparator = pathString.indexOf('/', startFromIndex);
            if (nextSeparator < 0) {
                break;
            }
            startFromIndex = nextSeparator + 1;
        }
        return false;
    }

    private static class FileStatusIterator
            implements Iterator
    {
        private final Location location;
        private final RemoteIterator fileStatusIterator;

        private FileStatusIterator(
                Table table,
                Location location,
                TrinoFileSystem fileSystem,
                DirectoryLister directoryLister,
                NestedDirectoryPolicy nestedDirectoryPolicy)
        {
            this.location = requireNonNull(location, "location is null");
            try {
                if (nestedDirectoryPolicy == RECURSE) {
                    this.fileStatusIterator = directoryLister.listFilesRecursively(fileSystem, table, location);
                }
                else {
                    this.fileStatusIterator = new DirectoryListingFilter(
                            location,
                            directoryLister.listFilesRecursively(fileSystem, table, location),
                            nestedDirectoryPolicy == FAIL);
                }
            }
            catch (IOException e) {
                throw processException(e);
            }
        }

        @Override
        public boolean hasNext()
        {
            try {
                return fileStatusIterator.hasNext();
            }
            catch (IOException e) {
                throw processException(e);
            }
        }

        @Override
        public TrinoFileStatus next()
        {
            try {
                return fileStatusIterator.next();
            }
            catch (IOException e) {
                throw processException(e);
            }
        }

        private TrinoException processException(IOException exception)
        {
            if (exception instanceof FileNotFoundException) {
                return new TrinoException(HIVE_FILE_NOT_FOUND, "Partition location does not exist: " + location);
            }
            return new TrinoException(HIVE_FILESYSTEM_ERROR, "Failed to list directory: " + location, exception);
        }
    }

    public static class NestedDirectoryNotAllowedException
            extends RuntimeException
    {
        private final String nestedDirectoryPath;

        public NestedDirectoryNotAllowedException(String nestedDirectoryPath)
        {
            super("Nested sub-directories are not allowed: " + nestedDirectoryPath);
            this.nestedDirectoryPath = requireNonNull(nestedDirectoryPath, "nestedDirectoryPath is null");
        }

        public String getNestedDirectoryPath()
        {
            return nestedDirectoryPath;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy