All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.org.apache.hadoop.hbase.io.FileLink Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.io;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.fs.CanSetDropBehind;
import org.apache.hadoop.fs.CanSetReadahead;
import org.apache.hadoop.fs.CanUnbuffer;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.AccessControlException;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The FileLink is a sort of hardlink, that allows access to a file given a set of locations.
 *
 * 

The Problem: *

    *
  • * HDFS doesn't have support for hardlinks, and this make impossible to referencing * the same data blocks using different names. *
  • *
  • * HBase store files in one location (e.g. table/region/family/) and when the file is not * needed anymore (e.g. compaction, region deletion, ...) moves it to an archive directory. *
  • *
* If we want to create a reference to a file, we need to remember that it can be in its * original location or in the archive folder. * The FileLink class tries to abstract this concept and given a set of locations * it is able to switch between them making this operation transparent for the user. * {@link HFileLink} is a more concrete implementation of the {@code FileLink}. * *

Back-references: * To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} to keep track of * the links to a particular file, during the {@code FileLink} creation, a new file is placed * inside a back-reference directory. There's one back-reference directory for each file that * has links, and in the directory there's one file per link. * *

HFileLink Example *

    *
  • * /hbase/table/region-x/cf/file-k * (Original File) *
  • *
  • * /hbase/table-cloned/region-y/cf/file-k.region-x.table * (HFileLink to the original file) *
  • *
  • * /hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table * (HFileLink to the original file) *
  • *
  • * /hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned * (Back-reference to the link in table-cloned) *
  • *
  • * /hbase/.archive/table/region-x/.links-file-k/region-z.table-2nd-cloned * (Back-reference to the link in table-2nd-cloned) *
  • *
*/ @InterfaceAudience.Private public class FileLink { private static final Logger LOG = LoggerFactory.getLogger(FileLink.class); /** Define the Back-reference directory name prefix: .links-<hfile>/ */ public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-"; /** * FileLink InputStream that handles the switch between the original path * and the alternative locations, when the file is moved. */ private static class FileLinkInputStream extends InputStream implements Seekable, PositionedReadable, CanSetDropBehind, CanSetReadahead, CanUnbuffer { private FSDataInputStream in = null; private Path currentPath = null; private long pos = 0; private final FileLink fileLink; private final int bufferSize; private final FileSystem fs; public FileLinkInputStream(final FileSystem fs, final FileLink fileLink) throws IOException { this(fs, fileLink, CommonFSUtils.getDefaultBufferSize(fs)); } public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize) throws IOException { this.bufferSize = bufferSize; this.fileLink = fileLink; this.fs = fs; this.in = tryOpen(); } @Override public int read() throws IOException { int res; try { res = in.read(); } catch (FileNotFoundException e) { res = tryOpen().read(); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() res = tryOpen().read(); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() res = tryOpen().read(); } if (res > 0) pos += 1; return res; } @Override public int read(byte[] b) throws IOException { return read(b, 0, b.length); } @Override public int read(byte[] b, int off, int len) throws IOException { int n; try { n = in.read(b, off, len); } catch (FileNotFoundException e) { n = tryOpen().read(b, off, len); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(b, off, len); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(b, off, len); } if (n > 0) pos += n; assert(in.getPos() == pos); return n; } @Override public int read(long position, byte[] buffer, int offset, int length) throws IOException { int n; try { n = in.read(position, buffer, offset, length); } catch (FileNotFoundException e) { n = tryOpen().read(position, buffer, offset, length); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(position, buffer, offset, length); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(position, buffer, offset, length); } return n; } @Override public void readFully(long position, byte[] buffer) throws IOException { readFully(position, buffer, 0, buffer.length); } @Override public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { try { in.readFully(position, buffer, offset, length); } catch (FileNotFoundException e) { tryOpen().readFully(position, buffer, offset, length); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() tryOpen().readFully(position, buffer, offset, length); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() tryOpen().readFully(position, buffer, offset, length); } } @Override public long skip(long n) throws IOException { long skipped; try { skipped = in.skip(n); } catch (FileNotFoundException e) { skipped = tryOpen().skip(n); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() skipped = tryOpen().skip(n); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() skipped = tryOpen().skip(n); } if (skipped > 0) pos += skipped; return skipped; } @Override public int available() throws IOException { try { return in.available(); } catch (FileNotFoundException e) { return tryOpen().available(); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() return tryOpen().available(); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() return tryOpen().available(); } } @Override public void seek(long pos) throws IOException { try { in.seek(pos); } catch (FileNotFoundException e) { tryOpen().seek(pos); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() tryOpen().seek(pos); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() tryOpen().seek(pos); } this.pos = pos; } @Override public long getPos() throws IOException { return pos; } @Override public boolean seekToNewSource(long targetPos) throws IOException { boolean res; try { res = in.seekToNewSource(targetPos); } catch (FileNotFoundException e) { res = tryOpen().seekToNewSource(targetPos); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() res = tryOpen().seekToNewSource(targetPos); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() res = tryOpen().seekToNewSource(targetPos); } if (res) pos = targetPos; return res; } @Override public void close() throws IOException { in.close(); } @Override public synchronized void mark(int readlimit) { } @Override public synchronized void reset() throws IOException { throw new IOException("mark/reset not supported"); } @Override public boolean markSupported() { return false; } @Override public void unbuffer() { if (in == null) { return; } in.unbuffer(); } /** * Try to open the file from one of the available locations. * * @return FSDataInputStream stream of the opened file link * @throws IOException on unexpected error, or file not found. */ private FSDataInputStream tryOpen() throws IOException { IOException exception = null; for (Path path: fileLink.getLocations()) { if (path.equals(currentPath)) continue; try { in = fs.open(path, bufferSize); if (pos != 0) in.seek(pos); assert(in.getPos() == pos) : "Link unable to seek to the right position=" + pos; if (LOG.isTraceEnabled()) { if (currentPath == null) { LOG.debug("link open path=" + path); } else { LOG.trace("link switch from path=" + currentPath + " to path=" + path); } } currentPath = path; return(in); } catch (FileNotFoundException | AccessControlException | RemoteException e) { exception = FileLink.handleAccessLocationException(fileLink, e, exception); } } throw exception; } @Override public void setReadahead(Long readahead) throws IOException, UnsupportedOperationException { in.setReadahead(readahead); } @Override public void setDropBehind(Boolean dropCache) throws IOException, UnsupportedOperationException { in.setDropBehind(dropCache); } } private Path[] locations = null; protected FileLink() { this.locations = null; } /** * @param originPath Original location of the file to link * @param alternativePaths Alternative locations to look for the linked file */ public FileLink(Path originPath, Path... alternativePaths) { setLocations(originPath, alternativePaths); } /** * @param locations locations to look for the linked file */ public FileLink(final Collection locations) { this.locations = locations.toArray(new Path[locations.size()]); } /** * @return the locations to look for the linked file. */ public Path[] getLocations() { return locations; } @Override public String toString() { StringBuilder str = new StringBuilder(getClass().getSimpleName()); str.append(" locations=["); for (int i = 0; i < locations.length; ++i) { if (i > 0) str.append(", "); str.append(locations[i].toString()); } str.append("]"); return str.toString(); } /** * @return true if the file pointed by the link exists */ public boolean exists(final FileSystem fs) throws IOException { for (int i = 0; i < locations.length; ++i) { if (fs.exists(locations[i])) { return true; } } return false; } /** * @return the path of the first available link. */ public Path getAvailablePath(FileSystem fs) throws IOException { for (int i = 0; i < locations.length; ++i) { if (fs.exists(locations[i])) { return locations[i]; } } throw new FileNotFoundException(toString()); } /** * Get the FileStatus of the referenced file. * * @param fs {@link FileSystem} on which to get the file status * @return InputStream for the hfile link. * @throws IOException on unexpected error. */ public FileStatus getFileStatus(FileSystem fs) throws IOException { IOException exception = null; for (int i = 0; i < locations.length; ++i) { try { return fs.getFileStatus(locations[i]); } catch (FileNotFoundException | AccessControlException e) { exception = handleAccessLocationException(this, e, exception); } } throw exception; } /** * Handle exceptions which are thrown when access locations of file link * @param fileLink the file link * @param newException the exception caught by access the current location * @param previousException the previous exception caught by access the other locations * @return return AccessControlException if access one of the locations caught, otherwise return * FileNotFoundException. The AccessControlException is threw if user scan snapshot * feature is enabled, see * {@link org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclController}. * @throws IOException if the exception is neither AccessControlException nor * FileNotFoundException */ private static IOException handleAccessLocationException(FileLink fileLink, IOException newException, IOException previousException) throws IOException { if (newException instanceof RemoteException) { newException = ((RemoteException) newException) .unwrapRemoteException(FileNotFoundException.class, AccessControlException.class); } if (newException instanceof FileNotFoundException) { // Try another file location if (previousException == null) { previousException = new FileNotFoundException(fileLink.toString()); } } else if (newException instanceof AccessControlException) { // Try another file location previousException = newException; } else { throw newException; } return previousException; } /** * Open the FileLink for read. *

* It uses a wrapper of FSDataInputStream that is agnostic to the location * of the file, even if the file switches between locations. * * @param fs {@link FileSystem} on which to open the FileLink * @return InputStream for reading the file link. * @throws IOException on unexpected error. */ public FSDataInputStream open(final FileSystem fs) throws IOException { return new FSDataInputStream(new FileLinkInputStream(fs, this)); } /** * Open the FileLink for read. *

* It uses a wrapper of FSDataInputStream that is agnostic to the location * of the file, even if the file switches between locations. * * @param fs {@link FileSystem} on which to open the FileLink * @param bufferSize the size of the buffer to be used. * @return InputStream for reading the file link. * @throws IOException on unexpected error. */ public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException { return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize)); } /** * NOTE: This method must be used only in the constructor! * It creates a List with the specified locations for the link. */ protected void setLocations(Path originPath, Path... alternativePaths) { assert this.locations == null : "Link locations already set"; List paths = new ArrayList<>(alternativePaths.length +1); if (originPath != null) { paths.add(originPath); } for (int i = 0; i < alternativePaths.length; i++) { if (alternativePaths[i] != null) { paths.add(alternativePaths[i]); } } this.locations = paths.toArray(new Path[0]); } /** * Get the directory to store the link back references * *

To simplify the reference count process, during the FileLink creation * a back-reference is added to the back-reference directory of the specified file. * * @param storeDir Root directory for the link reference folder * @param fileName File Name with links * @return Path for the link back references. */ public static Path getBackReferencesDir(final Path storeDir, final String fileName) { return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName); } /** * Get the referenced file name from the reference link directory path. * * @param dirPath Link references directory path * @return Name of the file referenced */ public static String getBackReferenceFileName(final Path dirPath) { return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length()); } /** * Checks if the specified directory path is a back reference links folder. * @param dirPath Directory path to verify * @return True if the specified directory is a link references folder */ public static boolean isBackReferencesDir(final Path dirPath) { if (dirPath == null) { return false; } return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX); } @Override public boolean equals(Object obj) { if (obj == null) { return false; } // Assumes that the ordering of locations between objects are the same. This is true for the // current subclasses already (HFileLink, WALLink). Otherwise, we may have to sort the locations // or keep them presorted if (this.getClass().equals(obj.getClass())) { return Arrays.equals(this.locations, ((FileLink) obj).locations); } return false; } @Override public int hashCode() { return Arrays.hashCode(locations); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy