All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.aalto.in.CharBasedPNameTable Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/* Woodstox Lite ("wool") XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.in;

import com.fasterxml.aalto.util.NameTable;

/**
 * This is a symbol table implementation used for storing byte-based
 * PNames, specifically, instances of
 * ({@link PNameC}).
 */
public class CharBasedPNameTable
    extends NameTable
{
    final static int MIN_HASH_SIZE = 16;

    protected static final float DEFAULT_FILL_FACTOR = 0.75f;

    /*
    /**********************************************************************
    /* Actual symbol table data
    /**********************************************************************
     */

    /**
     * Primary matching symbols; it's expected most match occur from
     * here.
     */
    protected PNameC[] _symbols;

    /**
     * Overflow buckets; if primary doesn't match, lookup is done
     * from here.
     *

* Note: Number of buckets is half of number of symbol entries, on * assumption there's less need for buckets. */ protected Bucket[] _buckets; /** * Current size (number of entries); needed to know if and when * rehash. */ protected int _size; /** * Limit that indicates maximum size this instance can hold before * it needs to be expanded and rehashed. Calculated using fill * factor passed in to constructor. */ protected int _sizeThreshold; /** * Mask used to get index from hash values; equal to * mBuckets.length - 1, when mBuckets.length is * a power of two. */ protected int _indexMask; /* /********************************************************************** /* Information about concurrency /********************************************************************** */ /** * Flag that indicates if any changes have been made to the data; * used to both determine if bucket array needs to be copied when * (first) change is made, and potentially if updated bucket list * is to be resync'ed back to master instance. */ protected boolean _dirty; /* /********************************************************************** /* Life-cycle /********************************************************************** */ /** * Main method for constructing a master symbol table instance; will * be called by other public constructors. * * @param initialSize Minimum initial size for bucket array; internally * will always use a power of two equal to or bigger than this value. */ public CharBasedPNameTable(int initialSize) { // Let's set flags so no copying of buckets is needed: _dirty = true; // No point in requesting funny initial sizes... if (initialSize < 1) { throw new IllegalArgumentException("Can not use negative/zero initial size: "+initialSize); } { int currSize = MIN_HASH_SIZE; while (currSize < initialSize) { currSize += currSize; } initialSize = currSize; } _symbols = new PNameC[initialSize]; _buckets = new Bucket[initialSize >> 1]; // Mask is easy to calc for powers of two. _indexMask = initialSize - 1; _size = 0; /* Let's use 3/4 fill factor... */ _sizeThreshold = (initialSize * 3 + 3) >> 2; } CharBasedPNameTable(CharBasedPNameTable parent) { _symbols = parent._symbols; _buckets = parent._buckets; _size = parent._size; _sizeThreshold = parent._sizeThreshold; _indexMask = parent._indexMask; // Need to make copies of arrays, if/when adding new entries _dirty = false; } /** * Method that allows contents of child table to potentially be * "merged in" with contents of this symbol table. *

* Note that caller has to make sure symbol table passed in is * really a child or sibling of this symbol table. */ public synchronized void mergeFromChild(CharBasedPNameTable child) { // Let's do a basic sanity check first: if (child.size() <= size()) { // nothing to add return; } // Okie dokie, let's get the data in! _symbols = child._symbols; _buckets = child._buckets; _size = child._size; _sizeThreshold = child._sizeThreshold; _indexMask = child._indexMask; // Dirty flag... well, let's just clear it, to force copying just // in case. Shouldn't really matter, for master tables. _dirty = false; /* However, we have to mark child as dirty, so that it will not * be modifying arrays we "took over" (since child may have * returned an updated table before it stopped fully using * the SymbolTable: for example, it may still use it for * parsing PI targets in epilog) */ child._dirty = false; } /* /********************************************************************** // Public API, generic accessors: /********************************************************************** */ @Override public int size() { return _size; } @Override public boolean maybeDirty() { return _dirty; } /* /********************************************************************** /* Public API, accessing symbols /********************************************************************** */ public PNameC findSymbol(char[] buffer, int start, int len, int hash) { int index = hash & _indexMask; PNameC sym = _symbols[index]; // Optimal case; checking existing primary symbol for hash index: if (sym != null) { if (sym.equalsPName(buffer, start, len, hash)) { return sym; } // How about collision bucket? Bucket b = _buckets[index >> 1]; if (b != null) { sym = b.find(buffer, start, len, hash); if (sym != null) { return sym; } } } return null; } public PNameC addSymbol(char[] buffer, int start, int len, int hash) { String newStr = new String(buffer, start, len).intern(); PNameC pname = PNameC.construct(newStr, hash); int index = hash & _indexMask; boolean primary; // First... let's see if we can add it without a collision. if (null == _symbols[index]) { primary = true; // yup, we are good then } else { /* Otherwise, better check if we need to expand; after * which there may be a slot in primary hash? */ if (_size >= _sizeThreshold) { rehash(); // Need to recalc hash index index = hash & _indexMask; primary = (null == _symbols[index]); } else { // nope: need a bucket primary = false; } } // good, can just update primary hash table if (!_dirty) { // need to do copy-on-write? copyArrays(); } ++_size; if (primary) { _symbols[index] = pname; } else { // Ok, all right: need to add to a bucket int bix = (index >> 1); _buckets[bix] = new Bucket(pname, _buckets[bix]); } return pname; } /* /********************************************************************** /* Internal methods /********************************************************************** */ /** * Method called when copy-on-write is needed; generally when first * change is made to a derived symbol table. */ private void copyArrays() { PNameC[] oldSyms = _symbols; int size = oldSyms.length; _symbols = new PNameC[size]; System.arraycopy(oldSyms, 0, _symbols, 0, size); Bucket[] oldBuckets = _buckets; size = oldBuckets.length; _buckets = new Bucket[size]; System.arraycopy(oldBuckets, 0, _buckets, 0, size); _dirty = true; } /** * Method called when size (number of entries) of symbol table grows * so big that load factor is exceeded. Since size has to remain * power of two, arrays will then always be doubled. Main work * is really redistributing old entries into new String/Bucket * entries. */ private void rehash() { int size = _symbols.length; int newSize = size + size; PNameC[] oldSyms = _symbols; Bucket[] oldBuckets = _buckets; _symbols = new PNameC[newSize]; _buckets = new Bucket[newSize >> 1]; // Let's update index mask, threshold, now (needed for rehashing) _indexMask = newSize - 1; _sizeThreshold += _sizeThreshold; int count = 0; // let's do sanity check /* Need to do two loops, unfortunately, since spillover area is * only half the size: */ for (int i = 0; i < size; ++i) { PNameC symbol = oldSyms[i]; if (symbol != null) { ++count; int index = symbol.getCustomHash() & _indexMask; if (_symbols[index] == null) { _symbols[index] = symbol; } else { int bix = index >> 1; _buckets[bix] = new Bucket(symbol, _buckets[bix]); } } } size >>= 1; for (int i = 0; i < size; ++i) { Bucket b = oldBuckets[i]; while (b != null) { ++count; PNameC symbol = b.getSymbol(); int index = symbol.getCustomHash() & _indexMask; if (_symbols[index] == null) { _symbols[index] = symbol; } else { int bix = index >> 1; _buckets[bix] = new Bucket(symbol, _buckets[bix]); } b = b.getNext(); } } if (count != _size) { throw new Error("Internal error on SymbolTable.rehash(): had "+_size+" entries; now have "+count+"."); } } /* /********************************************************************** /* Bucket class /********************************************************************** */ /** * This class is a symbol table entry. Each entry acts as a node * in a linked list. */ final static class Bucket { private final PNameC mSymbol; private final Bucket mNext; public Bucket(PNameC symbol, Bucket next) { mSymbol = symbol; mNext = next; } public PNameC getSymbol() { return mSymbol; } public Bucket getNext() { return mNext; } public PNameC find(char[] buf, int start, int len, int hash) { Bucket b = this; do { PNameC sym = b.mSymbol; if (sym.equalsPName(buf, start, len, hash)) { return sym; } b = b.getNext(); } while (b != null); return null; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy