All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.io.compress.DictionaryCache Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.io.compress;

import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder;
import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader;
import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache;

/**
 * A utility class for managing compressor/decompressor dictionary loading and caching of load
 * results. Useful for any codec that can support changing dictionaries at runtime, such as
 * ZStandard.
 */
@InterfaceAudience.Private
public final class DictionaryCache {

  public static final String DICTIONARY_MAX_SIZE_KEY = "hbase.io.compress.dictionary.max.size";
  public static final int DEFAULT_DICTIONARY_MAX_SIZE = 10 * 1024 * 1024;
  public static final String RESOURCE_SCHEME = "resource://";

  private static final Logger LOG = LoggerFactory.getLogger(DictionaryCache.class);
  private static volatile LoadingCache CACHE;

  private DictionaryCache() {
  }

  /**
   * Load a dictionary or return a previously cached load.
   * @param conf configuration
   * @param path the hadoop Path where the dictionary is located, as a String
   * @return the dictionary bytes if successful, null otherwise
   */
  public static byte[] getDictionary(final Configuration conf, final String path)
    throws IOException {
    if (path == null || path.isEmpty()) {
      return null;
    }
    // Create the dictionary loading cache if we haven't already
    if (CACHE == null) {
      synchronized (DictionaryCache.class) {
        if (CACHE == null) {
          final int maxSize = conf.getInt(DICTIONARY_MAX_SIZE_KEY, DEFAULT_DICTIONARY_MAX_SIZE);
          CACHE = CacheBuilder.newBuilder().maximumSize(100).expireAfterAccess(10, TimeUnit.MINUTES)
            .build(new CacheLoader() {
              @Override
              public byte[] load(String s) throws Exception {
                byte[] bytes;
                if (path.startsWith(RESOURCE_SCHEME)) {
                  bytes = loadFromResource(conf, path, maxSize);
                } else {
                  bytes = loadFromHadoopFs(conf, path, maxSize);
                }
                LOG.info("Loaded dictionary from {} (size {})", s, bytes.length);
                return bytes;
              }
            });
        }
      }
    }

    // Get or load the dictionary for the given path
    try {
      return CACHE.get(path);
    } catch (ExecutionException e) {
      throw new IOException(e);
    }
  }

  // Visible for testing
  public static byte[] loadFromResource(final Configuration conf, final String s, final int maxSize)
    throws IOException {
    if (!s.startsWith(RESOURCE_SCHEME)) {
      throw new IOException("Path does not start with " + RESOURCE_SCHEME);
    }
    final String path = s.substring(RESOURCE_SCHEME.length(), s.length());
    LOG.info("Loading resource {}", path);
    final InputStream in = DictionaryCache.class.getClassLoader().getResourceAsStream(path);
    if (in == null) {
      throw new FileNotFoundException("Resource " + path + " not found");
    }
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    try {
      final byte[] buffer = new byte[8192];
      int n, len = 0;
      do {
        n = in.read(buffer);
        if (n > 0) {
          len += n;
          if (len > maxSize) {
            throw new IOException("Dictionary " + s + " is too large, limit=" + maxSize);
          }
          baos.write(buffer, 0, n);
        }
      } while (n > 0);
    } finally {
      in.close();
    }
    return baos.toByteArray();
  }

  private static byte[] loadFromHadoopFs(final Configuration conf, final String s,
    final int maxSize) throws IOException {
    final Path path = new Path(s);
    final FileSystem fs = FileSystem.get(path.toUri(), conf);
    LOG.info("Loading file {}", path);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final FSDataInputStream in = fs.open(path);
    try {
      final byte[] buffer = new byte[8192];
      int n, len = 0;
      do {
        n = in.read(buffer);
        if (n > 0) {
          len += n;
          if (len > maxSize) {
            throw new IOException("Dictionary " + s + " is too large, limit=" + maxSize);
          }
          baos.write(buffer, 0, n);
        }
      } while (n > 0);
    } finally {
      in.close();
    }
    return baos.toByteArray();
  }

  // Visible for testing
  public static boolean contains(String dictionaryPath) {
    if (CACHE != null) {
      return CACHE.asMap().containsKey(dictionaryPath);
    }
    return false;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy