All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.util.ElementIdMap Maven / Gradle / Ivy

Go to download

Woodstox is a high-performance XML processor that implements Stax (JSR-173) and SAX2 APIs

The newest version!
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.util;

import javax.xml.stream.Location;


/**
 * This class is a specialized type-safe linked hash map used for
 * storing {@link ElementId} instances. {@link ElementId} instances
 * represent both id definitions (values of element attributes that
 * have type ID in DTD), and references (values of element attributes
 * of type IDREF and IDREFS). These definitions and references are
 * stored for the purpose of verifying
 * that all referenced id values are defined, and that none are defined
 * more than once.
 *

* Note: there are 2 somewhat distinct usage modes, by DTDValidator and * by MSV-based validators. * DTDs pass raw character arrays, whereas * MSV-based validators operate on Strings. This is the main reason * for 2 distinct sets of methods. */ public final class ElementIdMap { /** * Default initial table size; set so that usually it need not * be expanded. */ protected static final int DEFAULT_SIZE = 128; protected static final int MIN_SIZE = 16; /** * Let's use 80% fill factor... */ protected static final int FILL_PCT = 80; /* //////////////////////////////////////// // Actual hash table structure //////////////////////////////////////// */ /** * Actual hash table area */ protected ElementId[] mTable; /** * Current size (number of entries); needed to know if and when * rehash. */ protected int mSize; /** * Limit that indicates maximum size this instance can hold before * it needs to be expanded and rehashed. Calculated using fill * factor passed in to constructor. */ protected int mSizeThreshold; /** * Mask used to get index from hash values; equal to * mBuckets.length - 1, when mBuckets.length is * a power of two. */ protected int mIndexMask; /* //////////////////////////////////////// // Linked list info //////////////////////////////////////// */ protected ElementId mHead; protected ElementId mTail; /* //////////////////////////////////////// // Life-cycle: //////////////////////////////////////// */ public ElementIdMap() { this(DEFAULT_SIZE); } /** * This constructor is mainly used for testing, as it can be sized * appropriately to test rehashing etc. */ public ElementIdMap(int initialSize) { int actual = MIN_SIZE; while (actual < initialSize) { actual += actual; } mTable = new ElementId[actual]; // Mask is easy to calc for powers of two. mIndexMask = actual - 1; mSize = 0; mSizeThreshold = (actual * FILL_PCT) / 100; mHead = mTail = null; } /* //////////////////////////////////////////////////// // Public API //////////////////////////////////////////////////// */ public ElementId getFirstUndefined() { /* Since the linked list is pruned to always start with * the first (in doc order) undefined id, we can just * return head: */ return mHead; } /** * Method called when a reference to id is encountered. If so, need * to check if specified id entry (ref or definiton) exists; and if not, * to add a reference marker. */ public ElementId addReferenced(char[] buffer, int start, int len, int hash, Location loc, PrefixedName elemName, PrefixedName attrName) { int index = (hash & mIndexMask); ElementId id = mTable[index]; while (id != null) { if (id.idMatches(buffer, start, len)) { // found existing one return id; } id = id.nextColliding(); } // Not found, need to create a placeholder... // But first, do we need more room? if (mSize >= mSizeThreshold) { rehash(); // Index changes, for the new entr: index = (hash & mIndexMask); } ++mSize; // Ok, then, let's create the entry String idStr = new String(buffer, start, len); id = new ElementId(idStr, loc, false, elemName, attrName); // First, let's link it to Map; all ids have to be connected id.setNextColliding(mTable[index]); mTable[index] = id; // And then add the undefined entry at the end of list if (mHead == null) { mHead = mTail = id; } else { mTail.linkUndefined(id); mTail = id; } return id; } public ElementId addReferenced(String idStr, Location loc, PrefixedName elemName, PrefixedName attrName) { int hash = calcHash(idStr); int index = (hash & mIndexMask); ElementId id = mTable[index]; while (id != null) { if (id.idMatches(idStr)) { // found existing one return id; } id = id.nextColliding(); } // Not found, need to create a placeholder... // But first, do we need more room? if (mSize >= mSizeThreshold) { rehash(); // Index changes, for the new entr: index = (hash & mIndexMask); } ++mSize; // Ok, then, let's create the entry id = new ElementId(idStr, loc, false, elemName, attrName); // First, let's link it to Map; all ids have to be connected id.setNextColliding(mTable[index]); mTable[index] = id; // And then add the undefined entry at the end of list if (mHead == null) { mHead = mTail = id; } else { mTail.linkUndefined(id); mTail = id; } return id; } /** * Method called when an id definition is encountered. If so, need * to check if specified id entry (ref or definiton) exists. If not, * need to add the definition marker. If it does exist, need to * 'upgrade it', if it was a reference marker; otherwise need to * just return the old entry, and expect caller to check for dups * and report the error. */ public ElementId addDefined(char[] buffer, int start, int len, int hash, Location loc, PrefixedName elemName, PrefixedName attrName) { int index = (hash & mIndexMask); ElementId id = mTable[index]; while (id != null) { if (id.idMatches(buffer, start, len)) { break; } id = id.nextColliding(); } /* Not found, can just add it to the Map; no need to add to the * linked list as it's not undefined */ if (id == null) { // First, do we need more room? if (mSize >= mSizeThreshold) { rehash(); index = (hash & mIndexMask); } ++mSize; String idStr = new String(buffer, start, len); id = new ElementId(idStr, loc, true, elemName, attrName); id.setNextColliding(mTable[index]); mTable[index] = id; } else { /* If already defined, nothing additional to do (we could * signal an error here, though... for now, we'll let caller * do that */ if (id.isDefined()) { ; } else { /* Not defined, just need to upgrade, and possibly remove from * the linked list. */ id.markDefined(loc); /* Ok; if it was the first undefined, need to unlink it, as * well as potentially next items. */ if (id == mHead) { do { mHead = mHead.nextUndefined(); } while (mHead != null && mHead.isDefined()); // Did we clear up all undefined ids? if (mHead == null) { mTail = null; } } } } return id; } public ElementId addDefined(String idStr, Location loc, PrefixedName elemName, PrefixedName attrName) { int hash = calcHash(idStr); int index = (hash & mIndexMask); ElementId id = mTable[index]; while (id != null) { if (id.idMatches(idStr)) { break; } id = id.nextColliding(); } /* Not found, can just add it to the Map; no need to add to the * linked list as it's not undefined */ if (id == null) { if (mSize >= mSizeThreshold) { // need more room rehash(); index = (hash & mIndexMask); } ++mSize; id = new ElementId(idStr, loc, true, elemName, attrName); id.setNextColliding(mTable[index]); mTable[index] = id; } else { /* If already defined, nothing additional to do (we could * signal an error here, though... for now, we'll let caller * do that */ if (id.isDefined()) { ; } else { /* Not defined, just need to upgrade, and possibly remove from * the linked list. */ id.markDefined(loc); /* Ok; if it was the first undefined, need to unlink it, as * well as potentially next items. */ if (id == mHead) { do { mHead = mHead.nextUndefined(); } while (mHead != null && mHead.isDefined()); if (mHead == null) { // cleared up all undefined ids? mTail = null; } } } } return id; } /** * Implementation of a hashing method for variable length * Strings. Most of the time intention is that this calculation * is done by caller during parsing, not here; however, sometimes * it needs to be done for parsed "String" too. *

* Note: identical to {@link com.ctc.wstx.util.SymbolTable#calcHash}, * although not required to be. * * @param len Length of String; has to be at least 1 (caller guarantees * this pre-condition) */ public static int calcHash(char[] buffer, int start, int len) { int hash = (int) buffer[0]; for (int i = 1; i < len; ++i) { hash = (hash * 31) + (int) buffer[i]; } return hash; } public static int calcHash(String key) { int hash = (int) key.charAt(0); for (int i = 1, len = key.length(); i < len; ++i) { hash = (hash * 31) + (int) key.charAt(i); } return hash; } /* ////////////////////////////////////////////////////////// // Internal methods ////////////////////////////////////////////////////////// */ /** * Method called when size (number of entries) of symbol table grows * so big that load factor is exceeded. Since size has to remain * power of two, arrays will then always be doubled. Main work * is really redistributing old entries into new String/Bucket * entries. */ private void rehash() { int size = mTable.length; /* Let's grow aggressively; this should minimize number of * resizes, while adding to mem usage. But since these Maps * are never long-lived (only during parsing and validation of * a single doc), that shouldn't greatly matter. */ int newSize = (size << 2); ElementId[] oldSyms = mTable; mTable = new ElementId[newSize]; // Let's update index mask, threshold, now (needed for rehashing) mIndexMask = newSize - 1; mSizeThreshold <<= 2; int count = 0; // let's do sanity check for (int i = 0; i < size; ++i) { for (ElementId id = oldSyms[i]; id != null; ) { ++count; int index = calcHash(id.getId()) & mIndexMask; ElementId nextIn = id.nextColliding(); id.setNextColliding(mTable[index]); mTable[index] = id; id = nextIn; } } if (count != mSize) { ExceptionUtil.throwInternal("on rehash(): had "+mSize+" entries; now have "+count+"."); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy