All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.regionserver.wal.SequenceFileLogReader Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.regionserver.wal;

import java.io.IOException;
import java.lang.reflect.Field;
import java.util.NavigableMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.wal.WAL.Entry;
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Metadata;
import org.apache.hadoop.io.Text;

@InterfaceAudience.LimitedPrivate({HBaseInterfaceAudience.COPROC, HBaseInterfaceAudience.PHOENIX,
  HBaseInterfaceAudience.CONFIG})
public class SequenceFileLogReader extends ReaderBase {
  private static final Log LOG = LogFactory.getLog(SequenceFileLogReader.class);

  // Legacy stuff from pre-PB WAL metadata.
  private static final Text WAL_VERSION_KEY = new Text("version");
  // Let the version be 1.  Let absence of a version meta tag be old, version 0.
  // Set this version '1' to be the version that introduces compression,
  // the COMPRESSION_VERSION.
  private static final int COMPRESSION_VERSION = 1;
  private static final Text WAL_COMPRESSION_TYPE_KEY = new Text("compression.type");
  private static final Text DICTIONARY_COMPRESSION_TYPE = new Text("dictionary");

  /**
   * Hack just to set the correct file length up in SequenceFile.Reader.
   * See HADOOP-6307.  The below is all about setting the right length on the
   * file we are reading.  fs.getFileStatus(file).getLen() is passed down to
   * a private SequenceFile.Reader constructor.  This won't work.  Need to do
   * the available on the stream.  The below is ugly.  It makes getPos, the
   * first time its called, return length of the file -- i.e. tell a lie -- just
   * so this line up in SF.Reader's constructor ends up with right answer:
   *
   *         this.end = in.getPos() + length;
   *
   */
  private static class WALReader extends SequenceFile.Reader {

    WALReader(final FileSystem fs, final Path p, final Configuration c)
    throws IOException {
      super(fs, p, c);
    }

    @Override
    protected FSDataInputStream openFile(FileSystem fs, Path file,
      int bufferSize, long length)
    throws IOException {
      return new WALReaderFSDataInputStream(super.openFile(fs, file,
        bufferSize, length), length);
    }

    /**
     * Override just so can intercept first call to getPos.
     */
    static class WALReaderFSDataInputStream extends FSDataInputStream {
      private boolean firstGetPosInvocation = true;
      private long length;

      WALReaderFSDataInputStream(final FSDataInputStream is, final long l)
      throws IOException {
        super(is);
        this.length = l;
      }

      // This section can be confusing.  It is specific to how HDFS works.
      // Let me try to break it down.  This is the problem:
      //
      //  1. HDFS DataNodes update the NameNode about a filename's length
      //     on block boundaries or when a file is closed. Therefore,
      //     if an RS dies, then the NN's fs.getLength() can be out of date
      //  2. this.in.available() would work, but it returns int &
      //     therefore breaks for files > 2GB (happens on big clusters)
      //  3. DFSInputStream.getFileLength() gets the actual length from the DNs
      //  4. DFSInputStream is wrapped 2 levels deep : this.in.in
      //
      // So, here we adjust getPos() using getFileLength() so the
      // SequenceFile.Reader constructor (aka: first invocation) comes out
      // with the correct end of the file:
      //         this.end = in.getPos() + length;
      @Override
      public long getPos() throws IOException {
        if (this.firstGetPosInvocation) {
          this.firstGetPosInvocation = false;
          long adjust = 0;
          HdfsDataInputStream hdfsDataInputStream = null;
          try {
            if (this.in.getClass().getName().endsWith("HdfsDataInputStream")
                || this.in.getClass().getName().endsWith("DFSInputStream")) {
              hdfsDataInputStream = (HdfsDataInputStream) this.getWrappedStream();
              long realLength = hdfsDataInputStream.getVisibleLength();
              assert(realLength >= this.length);
              adjust = realLength - this.length;
            } else {
              LOG.info(
                "Input stream class: " + this.in.getClass().getName() + ", not adjusting length");
            }
          } catch (Exception e) {
            LOG.warn("Error while trying to get accurate file length.  "
                + "Truncation / data loss may occur if RegionServers die.",
              e);
            throw new IOException(e);
          }
          return adjust + super.getPos();
        }
        return super.getPos();
      }
    }
  }

  // Protected for tests.
  protected SequenceFile.Reader reader;
  long entryStart = 0; // needed for logging exceptions

  public SequenceFileLogReader() {
    super();
  }

  @Override
  public void close() throws IOException {
    try {
      if (reader != null) {
        this.reader.close();
        this.reader = null;
      }
    } catch (IOException ioe) {
      throw addFileInfoToException(ioe);
    }
  }

  @Override
  public long getPosition() throws IOException {
    return reader != null ? reader.getPosition() : 0;
  }

  @Override
  public void reset() throws IOException {
    // Resetting the reader lets us see newly added data if the file is being written to
    // We also keep the same compressionContext which was previously populated for this file
    reader = new WALReader(fs, path, conf);
  }

  @Override
  protected String initReader(FSDataInputStream stream) throws IOException {
    // We don't use the stream because we have to have the magic stream above.
    if (stream != null) {
      stream.close();
    }
    reset();
    return null;
  }
  
  @Override
  protected void initAfterCompression(String cellCodecClsName) throws IOException {
    // Nothing to do here
  }

  @Override
  protected void initAfterCompression() throws IOException {
    // Nothing to do here
  }

  @Override
  protected boolean hasCompression() {
    return isWALCompressionEnabled(reader.getMetadata());
  }

  @Override
  protected boolean hasTagCompression() {
    // Tag compression not supported with old SequenceFileLog Reader/Writer
    return false;
  }

  /**
   * Call this method after init() has been executed
   * @return whether WAL compression is enabled
   */
  static boolean isWALCompressionEnabled(final Metadata metadata) {
    // Check version is >= VERSION?
    Text txt = metadata.get(WAL_VERSION_KEY);
    if (txt == null || Integer.parseInt(txt.toString()) < COMPRESSION_VERSION) {
      return false;
    }
    // Now check that compression type is present.  Currently only one value.
    txt = metadata.get(WAL_COMPRESSION_TYPE_KEY);
    return txt != null && txt.equals(DICTIONARY_COMPRESSION_TYPE);
  }


  /**
   * fill in the passed entry with teh next key/value.
   * Note that because this format deals with our legacy storage, the provided
   * Entery MUST use an {@link HLogKey} for the key.
   * @return boolean indicating if the contents of Entry have been filled in.
   */
  @Override
  protected boolean readNext(Entry e) throws IOException {
    try {
      if (!(e.getKey() instanceof HLogKey)) {
        final IllegalArgumentException exception = new IllegalArgumentException(
            "SequenceFileLogReader only works when given entries that have HLogKey for keys. This" +
            " one had '" + e.getKey().getClass() + "'");
        LOG.error("We need to use the legacy SequenceFileLogReader to handle a " +
            " pre-0.96 style WAL, but HBase internals failed to use the deprecated HLogKey class." +
            " This is a bug; please file an issue or email the developer mailing list. You will " +
            "need the following exception details when seeking help from the HBase community.",
            exception);
        throw exception;
      }
      boolean hasNext = this.reader.next((HLogKey)e.getKey(), e.getEdit());
      if (!hasNext) return false;
      // Scopes are probably in WAL edit, move to key
      NavigableMap scopes = e.getEdit().getAndRemoveScopes();
      if (scopes != null) {
        e.getKey().readOlderScopes(scopes);
      }
      return true;
    } catch (IOException ioe) {
      throw addFileInfoToException(ioe);
    }
  }

  @Override
  protected void seekOnFs(long pos) throws IOException {
    try {
      reader.seek(pos);
    } catch (IOException ioe) {
      throw addFileInfoToException(ioe);
    }
  }

  protected IOException addFileInfoToException(final IOException ioe)
  throws IOException {
    long pos = -1;
    try {
      pos = getPosition();
    } catch (IOException e) {
      LOG.warn("Failed getting position to add to throw", e);
    }

    // See what SequenceFile.Reader thinks is the end of the file
    long end = Long.MAX_VALUE;
    try {
      Field fEnd = SequenceFile.Reader.class.getDeclaredField("end");
      fEnd.setAccessible(true);
      end = fEnd.getLong(this.reader);
    } catch(NoSuchFieldException nfe) {
       /* reflection failure, keep going */
      if (LOG.isTraceEnabled()) LOG.trace(nfe);
    } catch(IllegalAccessException iae) {
       /* reflection failure, keep going */
      if (LOG.isTraceEnabled()) LOG.trace(iae);
    } catch(Exception e) {
       /* All other cases. Should we handle it more aggressively? */
       LOG.warn("Unexpected exception when accessing the end field", e);
    }
 
    String msg = (this.path == null? "": this.path.toString()) +
      ", entryStart=" + entryStart + ", pos=" + pos +
      ((end == Long.MAX_VALUE) ? "" : ", end=" + end) +
      ", edit=" + this.edit;

    // Enhance via reflection so we don't change the original class type
    try {
      return (IOException) ioe.getClass()
        .getConstructor(String.class)
        .newInstance(msg)
        .initCause(ioe);
    } catch(NoSuchMethodException nfe) {
       /* reflection failure, keep going */
      if (LOG.isTraceEnabled()) LOG.trace(nfe);
    } catch(IllegalAccessException iae) {
       /* reflection failure, keep going */
      if (LOG.isTraceEnabled()) LOG.trace(iae);
    } catch(Exception e) {
       /* All other cases. Should we handle it more aggressively? */
       LOG.warn("Unexpected exception when accessing the end field", e);
    }
    return ioe;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy