All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.common.StringInternUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.common;

import org.apache.hadoop.fs.Path;

import java.lang.reflect.Field;
import java.net.URI;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

/**
 * Collection of utilities for string interning, common across Hive.
 * We use the standard String.intern() call, that performs very well
 * (no problems with PermGen overflowing, etc.) starting from JDK 7.
 */
public class StringInternUtils {

  // When a URI instance is initialized, it creates a bunch of private String
  // fields, never bothering about their possible duplication. It would be
  // best if we could tell URI constructor to intern these strings right away.
  // Without this option, we can only use reflection to "fix" strings in these
  // fields after a URI has been created.
  private static Class uriClass = URI.class;
  private static Field stringField, schemeField, authorityField, hostField, pathField,
      fragmentField, schemeSpecificPartField;

  static {
    try {
      stringField = uriClass.getDeclaredField("string");
      schemeField = uriClass.getDeclaredField("scheme");
      authorityField = uriClass.getDeclaredField("authority");
      hostField = uriClass.getDeclaredField("host");
      pathField = uriClass.getDeclaredField("path");
      fragmentField = uriClass.getDeclaredField("fragment");
      schemeSpecificPartField = uriClass.getDeclaredField("schemeSpecificPart");
    } catch (NoSuchFieldException e) {
      throw new RuntimeException(e);
    }

    // Note that the calls below will throw an exception if a Java SecurityManager
    // is installed and configured to forbid invoking setAccessible(). In practice
    // this is not a problem in Hive.
    stringField.setAccessible(true);
    schemeField.setAccessible(true);
    authorityField.setAccessible(true);
    hostField.setAccessible(true);
    pathField.setAccessible(true);
    fragmentField.setAccessible(true);
    schemeSpecificPartField.setAccessible(true);
  }

  /**
   * This method interns all the URI strings in place.
   * Goes over the URI strings, checks if each string element is already interned,
   * and if not it replaces each element with the interned copy.
   * Eventually returns the same URI.
   *
   * @param uri
   * @return
   */
  public static URI internStringsInUri(URI uri) {
    if (uri == null) return null;
    try {
      String string = (String) stringField.get(uri);
      if (string != null && string != string.intern()) stringField.set(uri, string.intern());
      String scheme = (String) schemeField.get(uri);
      if (scheme != null && scheme != scheme.intern()) schemeField.set(uri, scheme.intern());
      String authority = (String) authorityField.get(uri);
      if (authority != null && authority != authority.intern()) authorityField.set(uri, authority.intern());
      String host = (String) hostField.get(uri);
      if (host != null && host != host.intern()) hostField.set(uri, host.intern());
      String path = (String) pathField.get(uri);
      if (path != null && path != path.intern()) pathField.set(uri, path.intern());
      String fragment = (String) fragmentField.get(uri);
      if (fragment != null && fragment != fragment.intern()) fragmentField.set(uri, fragment.intern());
      String schemeSPart = (String) schemeSpecificPartField.get(uri);
      if (schemeSPart != null && schemeSPart != schemeSPart.intern()) schemeSpecificPartField.set(uri, schemeSPart.intern());
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    return uri;
  }

  public static Path internUriStringsInPath(Path path) {
    if (path != null) internStringsInUri(path.toUri());
    return path;
  }

  public static Path[] internUriStringsInPathArray(Path[] paths) {
    if (paths != null) {
      for (Path path : paths) {
        internUriStringsInPath(path);
      }
    }
    return paths;
  }

  /**
   * This method interns all the strings in the given list in place. That is,
   * it iterates over the list, checks if each string element is already interned,
   * and if not it replaces each element with the interned copy.
   * Eventually returns the same list.
   *
   * Note that the provided List implementation should return an iterator
   * (via list.listIterator()) method, and that iterator should implement
   * the set(Object) method. That's what all List implementations in the JDK
   * provide. However, if some custom List implementation doesn't have this
   * functionality, this method will return without interning its elements.
   */
  public static List internStringsInList(List list) {
    if (list != null) {
      try {
        ListIterator it = list.listIterator();
        while (it.hasNext()) {
          String curr = it.next();
          // Intern values only when they are not part of the String pool already
          if (curr != curr.intern()) {
            it.set(curr.intern());
          }
        }
      } catch (UnsupportedOperationException e) { } // set() not implemented - ignore
    }
    return list;
  }

  /** Interns all the strings in the given array in place, returning the same array */
  public static String[] internStringsInArray(String[] strings) {
    for (int i = 0; i < strings.length; i++) {
      // Intern values only when they are not part of the String pool already
      if (strings[i] != null && strings[i] != strings[i].intern()) {
        strings[i] = strings[i].intern();
      }
    }
    return strings;
  }

  public static  Map internValuesInMap(Map map) {
    if (map != null) {
      for (Map.Entry entry : map.entrySet()) {
        String value = entry.getValue();
        // Intern values only when they are not part of the String pool already
        if (value != null && value != value.intern()) {
          map.put(entry.getKey(), value.intern());
        }
      }
    }
    return map;
  }

  public static String internIfNotNull(String s) {
    if (s != null) s = s.intern();
    return s;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy