com.fasterxml.aalto.util.UriCanonicalizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aalto-xml Show documentation
Show all versions of aalto-xml Show documentation
Ultra-high performance non-blocking XML processor (Stax/Stax2, SAX/SAX2)
package com.fasterxml.aalto.util;
import java.util.*;
/**
* This class is used for canonicalization of namespace URIs.
* It will act as a layer above String.intern(), trying to reduce
* calls to somewhat slow intern() method, and to do that as efficiently
* as possible considering that Strings in question are often
* longer than names in xml documents.
*/
public final class UriCanonicalizer
{
private BoundedHashMap mURIs = null;
public UriCanonicalizer() { }
private void init()
{
mURIs = new BoundedHashMap();
}
public synchronized String canonicalizeURI(char[] ch, int len)
{
CanonicalKey key = new CanonicalKey(ch, len);
if (mURIs == null) {
init();
} else {
String result = (String) mURIs.get(key);
if (result != null) {
return result;
}
}
/* Key we have is not yet stable, as the underlying array
* is shared and mutable. So:
*/
key = key.safeClone();
// Also, now we should intern() the URI
String uri = new String(ch, 0, len).intern();
mURIs.put(key, uri);
return uri;
}
/*
///////////////////////////////////////////////////
// Helper classes
///////////////////////////////////////////////////
*/
/**
* We'll use a bounded map, which should work well for most normal
* cases, but avoid excesses for degenerate cases (unique URIs
* used as idenfitiers etc).
*/
@SuppressWarnings("serial")
final static class BoundedHashMap
extends LinkedHashMap
{
/**
* Let's create cache big enough to usually have enough space for
* all/most entries for normal cases, but that won't grow
* indefinitely for degenerate cases
*/
private final static int DEFAULT_SIZE = 64;
private final static int MAX_SIZE = (int) (1023 * 0.7f); // 4k primary hash
public BoundedHashMap()
{
super(DEFAULT_SIZE, 0.7f, true);
}
@Override
public boolean removeEldestEntry(Map.Entry entry)
{
return (size() >= MAX_SIZE);
}
}
final static class CanonicalKey
{
/**
* Array containing characters of the canonicalized String.
*/
final char[] mChars;
/**
* Length of canonicalized String
*/
final int mLength;
/**
* Hash of the URI string, calculated using fast(er) hash
* function (compared to regular String).
*/
final int mHash;
public CanonicalKey(char[] buffer, int len)
{
mChars = buffer;
mLength = len;
mHash = calcKeyHash(buffer, len);
}
public CanonicalKey(char[] buffer, int len, int hashCode)
{
mChars = buffer;
mLength = len;
mHash = hashCode;
}
public CanonicalKey safeClone()
{
char[] newBuf = new char[mLength];
System.arraycopy(mChars, 0, newBuf, 0, mLength);
return new CanonicalKey(newBuf, mLength, mHash);
}
public static int calcKeyHash(char[] buffer, int len)
{
/* Short URIs are not common, but if they were to
* happen, let's just use regular String.hashCode();
* it's good one, and for short strings, fast enough
*/
if (len <= 8) { // we know it's at least one char, though
int hash = buffer[0];
// For these, let's use regular hashing method
for (int i = 1; i < len; ++i) {
hash = (hash * 31) + buffer[i];
}
return hash;
}
/* Ok, longer. So first let's use length xored with first char;
* usually first 4 will just be "http" anyways (and could
* just be skipped for good?)
*/
int hash = len ^ buffer[0];
/* Otherwise, let's start with length, xor with first char,
* then latter chars separated by larger and larger
* spaces. The idea is to severely limit time needed
* to calc hash code as URIs can get quite long.
* But let's ignore last 4 chars, for now (we'll use them
* all after the loop)
*/
int ix = 2; // start from 3rd char (buffer[2])
int dist = 2; // and skip 1 char first
int end = (len - 4);
while (ix < end) {
hash = (hash * 31) + buffer[ix];
ix += dist;
++dist; // will skip progressively longer spans
}
// And then last 4 chars...
hash = (hash * 31) ^ (buffer[end] << 2) + buffer[end+1];
hash = (hash * 31) + (buffer[end+2] << 2) ^ buffer[end+3];
return hash;
}
@Override
public String toString() { return "{URI, hash: 0x"+Integer.toHexString(mHash)+"}"; }
@Override
public int hashCode() { return mHash; }
@Override
public boolean equals(Object o)
{
if (o == this) return true;
if (o == null) return false;
if (o.getClass() != getClass()) return false;
CanonicalKey other = (CanonicalKey) o;
if (other.mLength != mLength) return false;
char[] c1 = mChars;
char[] c2 = other.mChars;
for (int i = 0, len = mLength; i < len; ++i) {
if (c1[i] != c2[i]) {
return false;
}
}
return true;
}
}
}