org.apache.hadoop.hbase.io.FileLink Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileNotFoundException;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.fs.CanUnbuffer;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.ipc.RemoteException;
/**
* The FileLink is a sort of hardlink, that allows access to a file given a set of locations.
*
* The Problem:
*
* -
* HDFS doesn't have support for hardlinks, and this make impossible to referencing
* the same data blocks using different names.
*
* -
* HBase store files in one location (e.g. table/region/family/) and when the file is not
* needed anymore (e.g. compaction, region deletion, ...) moves it to an archive directory.
*
*
* If we want to create a reference to a file, we need to remember that it can be in its
* original location or in the archive folder.
* The FileLink class tries to abstract this concept and given a set of locations
* it is able to switch between them making this operation transparent for the user.
* {@link HFileLink} is a more concrete implementation of the {@code FileLink}.
*
* Back-references:
* To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} to keep track of
* the links to a particular file, during the {@code FileLink} creation, a new file is placed
* inside a back-reference directory. There's one back-reference directory for each file that
* has links, and in the directory there's one file per link.
*
*
HFileLink Example
*
* -
* /hbase/table/region-x/cf/file-k
* (Original File)
*
* -
* /hbase/table-cloned/region-y/cf/file-k.region-x.table
* (HFileLink to the original file)
*
* -
* /hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table
* (HFileLink to the original file)
*
* -
* /hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned
* (Back-reference to the link in table-cloned)
*
* -
* /hbase/.archive/table/region-x/.links-file-k/region-z.table-2nd-cloned
* (Back-reference to the link in table-2nd-cloned)
*
*
*/
@InterfaceAudience.Private
public class FileLink {
private static final Log LOG = LogFactory.getLog(FileLink.class);
/** Define the Back-reference directory name prefix: .links-<hfile>/ */
public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-";
/**
* FileLink InputStream that handles the switch between the original path
* and the alternative locations, when the file is moved.
*/
private static class FileLinkInputStream extends InputStream
implements Seekable, PositionedReadable, CanUnbuffer {
private FSDataInputStream in = null;
private Path currentPath = null;
private long pos = 0;
private final FileLink fileLink;
private final int bufferSize;
private final FileSystem fs;
public FileLinkInputStream(final FileSystem fs, final FileLink fileLink)
throws IOException {
this(fs, fileLink, FSUtils.getDefaultBufferSize(fs));
}
public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize)
throws IOException {
this.bufferSize = bufferSize;
this.fileLink = fileLink;
this.fs = fs;
this.in = tryOpen();
}
@Override
public int read() throws IOException {
int res;
try {
res = in.read();
} catch (FileNotFoundException e) {
res = tryOpen().read();
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
res = tryOpen().read();
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
res = tryOpen().read();
}
if (res > 0) pos += 1;
return res;
}
@Override
public int read(byte[] b) throws IOException {
return read(b, 0, b.length);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
int n;
try {
n = in.read(b, off, len);
} catch (FileNotFoundException e) {
n = tryOpen().read(b, off, len);
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
n = tryOpen().read(b, off, len);
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
n = tryOpen().read(b, off, len);
}
if (n > 0) pos += n;
assert(in.getPos() == pos);
return n;
}
@Override
public int read(long position, byte[] buffer, int offset, int length) throws IOException {
int n;
try {
n = in.read(position, buffer, offset, length);
} catch (FileNotFoundException e) {
n = tryOpen().read(position, buffer, offset, length);
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
n = tryOpen().read(position, buffer, offset, length);
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
n = tryOpen().read(position, buffer, offset, length);
}
return n;
}
@Override
public void readFully(long position, byte[] buffer) throws IOException {
readFully(position, buffer, 0, buffer.length);
}
@Override
public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
try {
in.readFully(position, buffer, offset, length);
} catch (FileNotFoundException e) {
tryOpen().readFully(position, buffer, offset, length);
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
tryOpen().readFully(position, buffer, offset, length);
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
tryOpen().readFully(position, buffer, offset, length);
}
}
@Override
public long skip(long n) throws IOException {
long skipped;
try {
skipped = in.skip(n);
} catch (FileNotFoundException e) {
skipped = tryOpen().skip(n);
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
skipped = tryOpen().skip(n);
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
skipped = tryOpen().skip(n);
}
if (skipped > 0) pos += skipped;
return skipped;
}
@Override
public int available() throws IOException {
try {
return in.available();
} catch (FileNotFoundException e) {
return tryOpen().available();
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
return tryOpen().available();
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
return tryOpen().available();
}
}
@Override
public void seek(long pos) throws IOException {
try {
in.seek(pos);
} catch (FileNotFoundException e) {
tryOpen().seek(pos);
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
tryOpen().seek(pos);
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
tryOpen().seek(pos);
}
this.pos = pos;
}
@Override
public long getPos() throws IOException {
return pos;
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
boolean res;
try {
res = in.seekToNewSource(targetPos);
} catch (FileNotFoundException e) {
res = tryOpen().seekToNewSource(targetPos);
} catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
res = tryOpen().seekToNewSource(targetPos);
} catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
res = tryOpen().seekToNewSource(targetPos);
}
if (res) pos = targetPos;
return res;
}
@Override
public void close() throws IOException {
in.close();
}
@Override
public synchronized void mark(int readlimit) {
}
@Override
public synchronized void reset() throws IOException {
throw new IOException("mark/reset not supported");
}
@Override
public boolean markSupported() {
return false;
}
@Override
public void unbuffer() {
if (in == null) {
return;
}
in.unbuffer();
}
/**
* Try to open the file from one of the available locations.
*
* @return FSDataInputStream stream of the opened file link
* @throws IOException on unexpected error, or file not found.
*/
private FSDataInputStream tryOpen() throws IOException {
for (Path path: fileLink.getLocations()) {
if (path.equals(currentPath)) continue;
try {
in = fs.open(path, bufferSize);
if (pos != 0) in.seek(pos);
assert(in.getPos() == pos) : "Link unable to seek to the right position=" + pos;
if (LOG.isTraceEnabled()) {
if (currentPath == null) {
LOG.debug("link open path=" + path);
} else {
LOG.trace("link switch from path=" + currentPath + " to path=" + path);
}
}
currentPath = path;
return(in);
} catch (FileNotFoundException e) {
// Try another file location
} catch (RemoteException re) {
IOException ioe = re.unwrapRemoteException(FileNotFoundException.class);
if (!(ioe instanceof FileNotFoundException)) throw re;
}
}
throw new FileNotFoundException("Unable to open link: " + fileLink);
}
}
private Path[] locations = null;
protected FileLink() {
this.locations = null;
}
/**
* @param originPath Original location of the file to link
* @param alternativePaths Alternative locations to look for the linked file
*/
public FileLink(Path originPath, Path... alternativePaths) {
setLocations(originPath, alternativePaths);
}
/**
* @param locations locations to look for the linked file
*/
public FileLink(final Collection locations) {
this.locations = locations.toArray(new Path[locations.size()]);
}
/**
* @return the locations to look for the linked file.
*/
public Path[] getLocations() {
return locations;
}
@Override
public String toString() {
StringBuilder str = new StringBuilder(getClass().getName());
str.append(" locations=[");
for (int i = 0; i < locations.length; ++i) {
if (i > 0) str.append(", ");
str.append(locations[i].toString());
}
str.append("]");
return str.toString();
}
/**
* @return true if the file pointed by the link exists
*/
public boolean exists(final FileSystem fs) throws IOException {
for (int i = 0; i < locations.length; ++i) {
if (fs.exists(locations[i])) {
return true;
}
}
return false;
}
/**
* @return the path of the first available link.
*/
public Path getAvailablePath(FileSystem fs) throws IOException {
for (int i = 0; i < locations.length; ++i) {
if (fs.exists(locations[i])) {
return locations[i];
}
}
throw new FileNotFoundException("Unable to open link: " + this);
}
/**
* Get the FileStatus of the referenced file.
*
* @param fs {@link FileSystem} on which to get the file status
* @return InputStream for the hfile link.
* @throws IOException on unexpected error.
*/
public FileStatus getFileStatus(FileSystem fs) throws IOException {
for (int i = 0; i < locations.length; ++i) {
try {
return fs.getFileStatus(locations[i]);
} catch (FileNotFoundException e) {
// Try another file location
}
}
throw new FileNotFoundException("Unable to open link: " + this);
}
/**
* Open the FileLink for read.
*
* It uses a wrapper of FSDataInputStream that is agnostic to the location
* of the file, even if the file switches between locations.
*
* @param fs {@link FileSystem} on which to open the FileLink
* @return InputStream for reading the file link.
* @throws IOException on unexpected error.
*/
public FSDataInputStream open(final FileSystem fs) throws IOException {
return new FSDataInputStream(new FileLinkInputStream(fs, this));
}
/**
* Open the FileLink for read.
*
* It uses a wrapper of FSDataInputStream that is agnostic to the location
* of the file, even if the file switches between locations.
*
* @param fs {@link FileSystem} on which to open the FileLink
* @param bufferSize the size of the buffer to be used.
* @return InputStream for reading the file link.
* @throws IOException on unexpected error.
*/
public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException {
return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize));
}
/**
* NOTE: This method must be used only in the constructor!
* It creates a List with the specified locations for the link.
*/
protected void setLocations(Path originPath, Path... alternativePaths) {
assert this.locations == null : "Link locations already set";
List paths = new ArrayList(alternativePaths.length +1);
if (originPath != null) {
paths.add(originPath);
}
for (int i = 0; i < alternativePaths.length; i++) {
if (alternativePaths[i] != null) {
paths.add(alternativePaths[i]);
}
}
this.locations = paths.toArray(new Path[0]);
}
/**
* Get the directory to store the link back references
*
* To simplify the reference count process, during the FileLink creation
* a back-reference is added to the back-reference directory of the specified file.
*
* @param storeDir Root directory for the link reference folder
* @param fileName File Name with links
* @return Path for the link back references.
*/
public static Path getBackReferencesDir(final Path storeDir, final String fileName) {
return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName);
}
/**
* Get the referenced file name from the reference link directory path.
*
* @param dirPath Link references directory path
* @return Name of the file referenced
*/
public static String getBackReferenceFileName(final Path dirPath) {
return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length());
}
/**
* Checks if the specified directory path is a back reference links folder.
*
* @param dirPath Directory path to verify
* @return True if the specified directory is a link references folder
*/
public static boolean isBackReferencesDir(final Path dirPath) {
if (dirPath == null) return false;
return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
// Assumes that the ordering of locations between objects are the same. This is true for the
// current subclasses already (HFileLink, WALLink). Otherwise, we may have to sort the locations
// or keep them presorted
if (this.getClass().equals(obj.getClass())) {
return Arrays.equals(this.locations, ((FileLink) obj).locations);
}
return false;
}
@Override
public int hashCode() {
return Arrays.hashCode(locations);
}
}