All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlunit.xpath.xml.dtm.ref.DTMDefaultBase Maven / Gradle / Ivy

There is a newer version: 4.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit.xpath.xml.dtm.ref;

// for dumpDTM
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;

import javax.xml.transform.Source;

import org.htmlunit.xpath.objects.XString;
import org.htmlunit.xpath.res.XPATHErrorResources;
import org.htmlunit.xpath.res.XPATHMessages;
import org.htmlunit.xpath.xml.dtm.DTM;
import org.htmlunit.xpath.xml.dtm.DTMAxisTraverser;
import org.htmlunit.xpath.xml.dtm.DTMManager;
import org.htmlunit.xpath.xml.utils.SuballocatedIntVector;

/**
 * The DTMDefaultBase class serves as a helper base for DTMs. It sets up structures for
 * navigation and type, while leaving data management and construction to the derived classes.
 */
public abstract class DTMDefaultBase implements DTM {
  static final boolean JJK_DEBUG = false;

  // This constant is likely to be removed in the future. Use the
  // getDocument() method instead of ROOTNODE to get at the root
  // node of a DTM.
  /** The identity of the root node. */
  public static final int ROOTNODE = 0;

  /** The number of nodes, which is also used to determine the next node index. */
  protected int m_size = 0;

  /** The expanded names, one array element for each node. */
  protected final SuballocatedIntVector m_exptype;

  /** First child values, one array element for each node. */
  protected final SuballocatedIntVector m_firstch;

  /** Next sibling values, one array element for each node. */
  protected final SuballocatedIntVector m_nextsib;

  /** Previous sibling values, one array element for each node. */
  protected SuballocatedIntVector m_prevsib;

  /** Previous sibling values, one array element for each node. */
  protected final SuballocatedIntVector m_parent;

  /** Vector of SuballocatedIntVectors of NS decl sets */
  protected List m_namespaceDeclSets = null;

  /** SuballocatedIntVector of elements at which corresponding namespaceDeclSets were defined */
  protected SuballocatedIntVector m_namespaceDeclSetElements = null;

  /**
   * These hold indexes to elements based on namespace and local name. The base lookup is the the
   * namespace. The second lookup is the local name, and the last array contains the the first free
   * element at the start, and the list of element handles following.
   */
  protected int[][][] m_elemIndexes;

  /** The default block size of the node arrays */
  public static final int DEFAULT_BLOCKSIZE = 512; // favor small docs.

  /** The number of blocks for the node arrays */
  public static final int DEFAULT_NUMBLOCKS = 32;

  /** The number of blocks used for small documents & RTFs */
  public static final int DEFAULT_NUMBLOCKS_SMALL = 4;

  /* The block size of the node arrays */
  // protected final int m_blocksize;

  /** The value to use when the information has not been built yet. */
  protected static final int NOTPROCESSED = DTM.NULL - 1;

  /** The DTM manager who "owns" this DTM. */
  public final DTMManager m_mgr;

  /** m_mgr cast to DTMManagerDefault, or null if it isn't an instance (Efficiency hook) */
  protected DTMManagerDefault m_mgrDefault = null;

  /**
   * The document identity number(s). If we have overflowed the addressing range of the first that
   * was assigned to us, we may add others.
   */
  protected final SuballocatedIntVector m_dtmIdent;

  /*
   * The mask for the identity. %REVIEW% Should this really be set to the
   * _DEFAULT? What if a particular DTM wanted to use another value?
   */
  // protected final static int m_mask = DTMManager.IDENT_NODE_DEFAULT;

  /** The base URI for this document. */
  protected final String m_documentBaseURI;

  /**
   * The table for exandedNameID lookups. This may or may not be the same table as is contained in
   * the DTMManagerDefault.
   */
  protected final ExpandedNameTable m_expandedNameTable;

  /** true if indexing is turned on. */
  protected final boolean m_indexing;

  /**
   * Construct a DTMDefaultBase object using the default block size.
   *
   * @param mgr The DTMManager who owns this DTM.
   * @param source The object that is used to specify the construction source.
   * @param dtmIdentity The DTM identity ID for this DTM.
   * @param doIndexing true if the caller considers it worth it to use indexing schemes.
   */
  public DTMDefaultBase(
      final DTMManager mgr, final Source source, final int dtmIdentity, final boolean doIndexing) {
    this(mgr, source, dtmIdentity, doIndexing, DEFAULT_BLOCKSIZE, true);
  }

  /**
   * Construct a DTMDefaultBase object from a DOM node.
   *
   * @param mgr The DTMManager who owns this DTM.
   * @param source The object that is used to specify the construction source.
   * @param dtmIdentity The DTM identity ID for this DTM.
   * @param doIndexing true if the caller considers it worth it to use indexing schemes.
   * @param blocksize The block size of the DTM.
   * @param usePrevsib true if we want to build the previous sibling node array.
   */
  public DTMDefaultBase(
      final DTMManager mgr,
      final Source source,
      final int dtmIdentity,
      final boolean doIndexing,
      final int blocksize,
      final boolean usePrevsib) {
    // Use smaller sizes for the internal node arrays if the block size
    // is small.
    final int numblocks;
    if (blocksize <= 64) {
      numblocks = DEFAULT_NUMBLOCKS_SMALL;
      m_dtmIdent = new SuballocatedIntVector(4, 1);
    }
    else {
      numblocks = DEFAULT_NUMBLOCKS;
      m_dtmIdent = new SuballocatedIntVector(32);
    }

    m_exptype = new SuballocatedIntVector(blocksize, numblocks);
    m_firstch = new SuballocatedIntVector(blocksize, numblocks);
    m_nextsib = new SuballocatedIntVector(blocksize, numblocks);
    m_parent = new SuballocatedIntVector(blocksize, numblocks);

    // Only create the m_prevsib array if the usePrevsib flag is true.
    // Some DTM implementations (e.g. SAXImpl) do not need this array.
    // We can save the time to build it in those cases.
    if (usePrevsib) {
      m_prevsib = new SuballocatedIntVector(blocksize, numblocks);
    }

    m_mgr = mgr;
    if (mgr instanceof DTMManagerDefault) {
      m_mgrDefault = (DTMManagerDefault) mgr;
    }

    m_documentBaseURI = (null != source) ? source.getSystemId() : null;
    m_dtmIdent.setElementAt(dtmIdentity, 0);
    m_indexing = doIndexing;

    if (doIndexing) {
      m_expandedNameTable = new ExpandedNameTable();
    }
    else {
      // Note that this fails if we aren't talking to an instance of
      // DTMManagerDefault
      m_expandedNameTable = m_mgrDefault.getExpandedNameTable();
    }
  }

  /**
   * Ensure that the size of the element indexes can hold the information.
   *
   * @param namespaceID Namespace ID index.
   * @param LocalNameID Local name ID.
   */
  protected void ensureSizeOfIndex(final int namespaceID, final int LocalNameID) {

    if (null == m_elemIndexes) {
      m_elemIndexes = new int[namespaceID + 20][][];
    }
    else if (m_elemIndexes.length <= namespaceID) {
      final int[][][] indexes = m_elemIndexes;

      m_elemIndexes = new int[namespaceID + 20][][];

      System.arraycopy(indexes, 0, m_elemIndexes, 0, indexes.length);
    }

    int[][] localNameIndex = m_elemIndexes[namespaceID];

    if (null == localNameIndex) {
      localNameIndex = new int[LocalNameID + 100][];
      m_elemIndexes[namespaceID] = localNameIndex;
    }
    else if (localNameIndex.length <= LocalNameID) {
      final int[][] indexes = localNameIndex;

      localNameIndex = new int[LocalNameID + 100][];

      System.arraycopy(indexes, 0, localNameIndex, 0, indexes.length);

      m_elemIndexes[namespaceID] = localNameIndex;
    }

    int[] elemHandles = localNameIndex[LocalNameID];

    if (null == elemHandles) {
      elemHandles = new int[128];
      localNameIndex[LocalNameID] = elemHandles;
      elemHandles[0] = 1;
    }
    else if (elemHandles.length <= elemHandles[0] + 1) {
      final int[] indexes = elemHandles;

      elemHandles = new int[elemHandles[0] + 1024];

      System.arraycopy(indexes, 0, elemHandles, 0, indexes.length);

      localNameIndex[LocalNameID] = elemHandles;
    }
  }

  /**
   * Add a node to the element indexes. The node will not be added unless it's an element.
   *
   * @param expandedTypeID The expanded type ID of the node.
   * @param identity The node identity index.
   */
  protected void indexNode(final int expandedTypeID, final int identity) {

    final ExpandedNameTable ent = m_expandedNameTable;
    final short type = ent.getType(expandedTypeID);

    if (DTM.ELEMENT_NODE == type) {
      final int namespaceID = ent.getNamespaceID(expandedTypeID);
      final int localNameID = ent.getLocalNameID(expandedTypeID);

      ensureSizeOfIndex(namespaceID, localNameID);

      final int[] index = m_elemIndexes[namespaceID][localNameID];

      index[index[0]] = identity;

      index[0]++;
    }
  }

  /**
   * Find the first index that occurs in the list that is greater than or equal to the given value.
   *
   * @param list A list of integers.
   * @param start The start index to begin the search.
   * @param len The number of items to search.
   * @param value Find the slot that has a value that is greater than or identical to this argument.
   * @return The index in the list of the slot that is higher or identical to the identity argument,
   *     or -1 if no node is higher or equal.
   */
  protected int findGTE(final int[] list, final int start, final int len, final int value) {

    int low = start;
    int high = start + (len - 1);
    final int end = high;

    while (low <= high) {
      final int mid = (low + high) >>> 1;
      final int c = list[mid];

      if (c > value) {
        high = mid - 1;
      }
      else if (c < value) {
        low = mid + 1;
      }
      else {
        return mid;
      }
    }

    return (low <= end && list[low] > value) ? low : -1;
  }

  /**
   * Find the first matching element from the index at or after the given node.
   *
   * @param nsIndex The namespace index lookup.
   * @param lnIndex The local name index lookup.
   * @param firstPotential The first potential match that is worth looking at.
   * @return The first node that is greater than or equal to the firstPotential argument, or
   *     DTM.NOTPROCESSED if not found.
   */
  int findElementFromIndex(final int nsIndex, final int lnIndex, final int firstPotential) {

    final int[][][] indexes = m_elemIndexes;

    if (null != indexes && nsIndex < indexes.length) {
      final int[][] lnIndexs = indexes[nsIndex];

      if (null != lnIndexs && lnIndex < lnIndexs.length) {
        final int[] elems = lnIndexs[lnIndex];

        if (null != elems) {
          final int pos = findGTE(elems, 1, elems[0], firstPotential);

          if (pos > -1) {
            return elems[pos];
          }
        }
      }
    }

    return NOTPROCESSED;
  }

  /**
   * Get the next node identity value in the list, and call the iterator if it hasn't been added
   * yet.
   *
   * @param identity The node identity (index).
   * @return identity+1, or DTM.NULL.
   */
  protected abstract int getNextNodeIdentity(int identity);

  /**
   * This method should try and build one or more nodes in the table.
   *
   * @return The true if a next node is found or false if there are no more nodes.
   */
  protected abstract boolean nextNode();

  /** Stateless axis traversers, lazely built. */
  protected DTMAxisTraverser[] m_traversers;

  /**
   * Get the simple type ID for the given node identity.
   *
   * @param identity The node identity.
   * @return The simple type ID, or DTM.NULL.
   */
  protected short _type(final int identity) {

    final int info = _exptype(identity);

    if (NULL != info) {
      return m_expandedNameTable.getType(info);
    }
    return NULL;
  }

  /**
   * Get the expanded type ID for the given node identity.
   *
   * @param identity The node identity.
   * @return The expanded type ID, or DTM.NULL.
   */
  protected int _exptype(final int identity) {
    if (identity == DTM.NULL) {
      return NULL;
    }
    // Reorganized test and loop into single flow
    // Tiny performance improvement, saves a few bytes of code, clearer.
    // %OPT% Other internal getters could be treated simliarly
    while (identity >= m_size) {
      if (!nextNode() && identity >= m_size) {
        return NULL;
      }
    }
    return m_exptype.elementAt(identity);
  }

  /**
   * Get the level in the tree for the given node identity.
   *
   * @param identity The node identity.
   * @return The tree level, or DTM.NULL.
   */
  protected int _level(int identity) {
    while (identity >= m_size) {
      final boolean isMore = nextNode();
      if (!isMore && identity >= m_size) {
        return NULL;
      }
    }

    int i = 0;
    while (NULL != (identity = _parent(identity))) {
      ++i;
    }
    return i;
  }

  /**
   * Get the first child for the given node identity.
   *
   * @param identity The node identity.
   * @return The first child identity, or DTM.NULL.
   */
  protected int _firstch(final int identity) {

    // Boiler-plate code for each of the _xxx functions, except for the array.
    int info = (identity >= m_size) ? NOTPROCESSED : m_firstch.elementAt(identity);

    // Check to see if the information requested has been processed, and,
    // if not, advance the iterator until we the information has been
    // processed.
    while (info == NOTPROCESSED) {
      final boolean isMore = nextNode();

      if (identity >= m_size && !isMore) {
        return NULL;
      }
      info = m_firstch.elementAt(identity);
      if (info == NOTPROCESSED && !isMore) {
        return NULL;
      }
    }

    return info;
  }

  /**
   * Get the next sibling for the given node identity.
   *
   * @param identity The node identity.
   * @return The next sibling identity, or DTM.NULL.
   */
  protected int _nextsib(final int identity) {
    // Boiler-plate code for each of the _xxx functions, except for the array.
    int info = (identity >= m_size) ? NOTPROCESSED : m_nextsib.elementAt(identity);

    // Check to see if the information requested has been processed, and,
    // if not, advance the iterator until we the information has been
    // processed.
    while (info == NOTPROCESSED) {
      final boolean isMore = nextNode();

      if (identity >= m_size && !isMore) {
        return NULL;
      }
      info = m_nextsib.elementAt(identity);
      if (info == NOTPROCESSED && !isMore) {
        return NULL;
      }
    }

    return info;
  }

  /**
   * Get the previous sibling for the given node identity.
   *
   * @param identity The node identity.
   * @return The previous sibling identity, or DTM.NULL.
   */
  protected int _prevsib(final int identity) {

    if (identity < m_size) {
      return m_prevsib.elementAt(identity);
    }

    // Check to see if the information requested has been processed, and,
    // if not, advance the iterator until we the information has been
    // processed.
    while (true) {
      final boolean isMore = nextNode();

      if (identity >= m_size && !isMore) {
        return NULL;
      }
      else if (identity < m_size) {
        return m_prevsib.elementAt(identity);
      }
    }
  }

  /**
   * Get the parent for the given node identity.
   *
   * @param identity The node identity.
   * @return The parent identity, or DTM.NULL.
   */
  protected int _parent(final int identity) {

    if (identity < m_size) {
      return m_parent.elementAt(identity);
    }

    // Check to see if the information requested has been processed, and,
    // if not, advance the iterator until we the information has been
    // processed.
    while (true) {
      final boolean isMore = nextNode();

      if (identity >= m_size && !isMore) {
        return NULL;
      }
      else if (identity < m_size) {
        return m_parent.elementAt(identity);
      }
    }
  }

  /** Diagnostics function to dump the DTM. */
  public void dumpDTM(OutputStream os) {
    try {
      if (os == null) {
        final File f = new File("DTMDump" + this.hashCode() + ".txt");
        System.err.println("Dumping... " + f.getAbsolutePath());
        os = Files.newOutputStream(f.toPath());
      }
      final PrintStream ps = new PrintStream(os, false, "UTF-8");

      while (nextNode()) {
      }

      final int nRecords = m_size;

      ps.println("Total nodes: " + nRecords);

      for (int index = 0; index < nRecords; ++index) {
        final int i = makeNodeHandle(index);
        ps.println("=========== index=" + index + " handle=" + i + " ===========");
        ps.println("NodeName: " + getNodeName(i));
        ps.println("NodeNameX: " + getNodeNameX(i));
        ps.println("LocalName: " + getLocalName(i));
        ps.println("NamespaceURI: " + getNamespaceURI(i));
        ps.println("Prefix: " + getPrefix(i));

        final int exTypeID = _exptype(index);

        ps.println("Expanded Type ID: " + Integer.toHexString(exTypeID));

        final int type = _type(index);
        final String typestring;

        switch (type) {
          case DTM.ATTRIBUTE_NODE:
            typestring = "ATTRIBUTE_NODE";
            break;
          case DTM.CDATA_SECTION_NODE:
            typestring = "CDATA_SECTION_NODE";
            break;
          case DTM.COMMENT_NODE:
            typestring = "COMMENT_NODE";
            break;
          case DTM.DOCUMENT_FRAGMENT_NODE:
            typestring = "DOCUMENT_FRAGMENT_NODE";
            break;
          case DTM.DOCUMENT_NODE:
            typestring = "DOCUMENT_NODE";
            break;
          case DTM.DOCUMENT_TYPE_NODE:
            typestring = "DOCUMENT_NODE";
            break;
          case DTM.ELEMENT_NODE:
            typestring = "ELEMENT_NODE";
            break;
          case DTM.ENTITY_NODE:
            typestring = "ENTITY_NODE";
            break;
          case DTM.ENTITY_REFERENCE_NODE:
            typestring = "ENTITY_REFERENCE_NODE";
            break;
          case DTM.NAMESPACE_NODE:
            typestring = "NAMESPACE_NODE";
            break;
          case DTM.NOTATION_NODE:
            typestring = "NOTATION_NODE";
            break;
          case DTM.NULL:
            typestring = "NULL";
            break;
          case DTM.PROCESSING_INSTRUCTION_NODE:
            typestring = "PROCESSING_INSTRUCTION_NODE";
            break;
          case DTM.TEXT_NODE:
            typestring = "TEXT_NODE";
            break;
          default:
            typestring = "Unknown!";
            break;
        }

        ps.println("Type: " + typestring);

        final int firstChild = _firstch(index);

        if (DTM.NULL == firstChild) {
          ps.println("First child: DTM.NULL");
        }
        else if (NOTPROCESSED == firstChild) {
          ps.println("First child: NOTPROCESSED");
        }
        else {
          ps.println("First child: " + firstChild);
        }

        if (m_prevsib != null) {
          final int prevSibling = _prevsib(index);

          if (DTM.NULL == prevSibling) {
            ps.println("Prev sibling: DTM.NULL");
          }
          else if (NOTPROCESSED == prevSibling) {
            ps.println("Prev sibling: NOTPROCESSED");
          }
          else {
            ps.println("Prev sibling: " + prevSibling);
          }
        }

        final int nextSibling = _nextsib(index);

        if (DTM.NULL == nextSibling) {
          ps.println("Next sibling: DTM.NULL");
        }
        else if (NOTPROCESSED == nextSibling) {
          ps.println("Next sibling: NOTPROCESSED");
        }
        else {
          ps.println("Next sibling: " + nextSibling);
        }

        final int parent = _parent(index);

        if (DTM.NULL == parent) {
          ps.println("Parent: DTM.NULL");
        }
        else if (NOTPROCESSED == parent) {
          ps.println("Parent: NOTPROCESSED");
        }
        else {
          ps.println("Parent: " + parent);
        }

        final int level = _level(index);

        ps.println("Level: " + level);
        ps.println("Node Value: " + getNodeValue(i));
        ps.println("String Value: " + getStringValue(i));
      }
    }
    catch (final IOException ioe) {
      ioe.printStackTrace(System.err);
      throw new RuntimeException(ioe.getMessage());
    }
  }

  /**
   * Diagnostics function to dump a single node.
   *
   * 

%REVIEW% KNOWN GLITCH: If you pass it a node index rather than a node handle, it works just * fine... but the displayed identity number before the colon is different, which complicates * comparing it with nodes printed the other way. We could always OR the DTM ID into the value, to * suppress that distinction... * *

%REVIEW% This might want to be moved up to DTMDefaultBase, or possibly DTM itself, since * it's a useful diagnostic and uses only DTM's public APIs. */ public String dumpNode(final int nodeHandle) { if (nodeHandle == DTM.NULL) { return "[null]"; } final String typestring; switch (getNodeType(nodeHandle)) { case DTM.ATTRIBUTE_NODE: typestring = "ATTR"; break; case DTM.CDATA_SECTION_NODE: typestring = "CDATA"; break; case DTM.COMMENT_NODE: typestring = "COMMENT"; break; case DTM.DOCUMENT_FRAGMENT_NODE: typestring = "DOC_FRAG"; break; case DTM.DOCUMENT_NODE: typestring = "DOC"; break; case DTM.DOCUMENT_TYPE_NODE: typestring = "DOC_TYPE"; break; case DTM.ELEMENT_NODE: typestring = "ELEMENT"; break; case DTM.ENTITY_NODE: typestring = "ENTITY"; break; case DTM.ENTITY_REFERENCE_NODE: typestring = "ENT_REF"; break; case DTM.NAMESPACE_NODE: typestring = "NAMESPACE"; break; case DTM.NOTATION_NODE: typestring = "NOTATION"; break; case DTM.NULL: typestring = "null"; break; case DTM.PROCESSING_INSTRUCTION_NODE: typestring = "PI"; break; case DTM.TEXT_NODE: typestring = "TEXT"; break; default: typestring = "Unknown!"; break; } return "[" + nodeHandle + ": " + typestring + "(0x" + Integer.toHexString(getExpandedTypeID(nodeHandle)) + ") " + getNodeNameX(nodeHandle) + " {" + getNamespaceURI(nodeHandle) + "}" + "=\"" + getNodeValue(nodeHandle) + "\"]"; } // ========= Document Navigation Functions ========= /** * Given a node identity, return a node handle. If extended addressing has been used (multiple DTM * IDs), we need to map the high bits of the identity into the proper DTM ID. * *

This has been made FINAL to facilitate inlining, since we do not expect any subclass of * DTMDefaultBase to ever change the algorithm. (I don't really like doing so, and would love to * have an excuse not to...) * *

%REVIEW% Is it worth trying to specialcase small documents? %REVIEW% Should this be exposed * at the package/public layers? * * @param nodeIdentity Internal offset to this node's records. * @return NodeHandle (external representation of node) */ public final int makeNodeHandle(final int nodeIdentity) { if (NULL == nodeIdentity) { return NULL; } return m_dtmIdent.elementAt(nodeIdentity >>> DTMManager.IDENT_DTM_NODE_BITS) + (nodeIdentity & DTMManager.IDENT_NODE_DEFAULT); } /** * Given a node handle, return a node identity. If extended addressing has been used (multiple DTM * IDs), we need to map the high bits of the identity into the proper DTM ID and thence find the * proper offset to add to the low bits of the identity * *

This has been made FINAL to facilitate inlining, since we do not expect any subclass of * DTMDefaultBase to ever change the algorithm. (I don't really like doing so, and would love to * have an excuse not to...) * *

%OPT% Performance is critical for this operation. * *

%REVIEW% Should this be exposed at the package/public layers? * * @param nodeHandle (external representation of node) * @return nodeIdentity Internal offset to this node's records. */ public final int makeNodeIdentity(final int nodeHandle) { if (NULL == nodeHandle) { return NULL; } if (m_mgrDefault != null) { // Optimization: use the DTMManagerDefault's fast DTMID-to-offsets // table. I'm not wild about this solution but this operation // needs need extreme speed. final int whichDTMindex = nodeHandle >>> DTMManager.IDENT_DTM_NODE_BITS; // %REVIEW% Wish I didn't have to perform the pre-test, but // someone is apparently asking DTMs whether they contain nodes // which really don't belong to them. That's probably a bug // which should be fixed, but until it is: if (m_mgrDefault.m_dtms[whichDTMindex] != this) { return NULL; } return m_mgrDefault.m_dtm_offsets[whichDTMindex] | (nodeHandle & DTMManager.IDENT_NODE_DEFAULT); } final int whichDTMid = m_dtmIdent.indexOf(nodeHandle & DTMManager.IDENT_DTM_DEFAULT); return (whichDTMid == NULL) ? NULL : (whichDTMid << DTMManager.IDENT_DTM_NODE_BITS) + (nodeHandle & DTMManager.IDENT_NODE_DEFAULT); } /** {@inheritDoc} */ @Override public int getFirstChild(final int nodeHandle) { final int identity = makeNodeIdentity(nodeHandle); final int firstChild = _firstch(identity); return makeNodeHandle(firstChild); } /** {@inheritDoc} */ @Override public int getLastChild(final int nodeHandle) { final int identity = makeNodeIdentity(nodeHandle); int child = _firstch(identity); int lastChild = DTM.NULL; while (child != DTM.NULL) { lastChild = child; child = _nextsib(child); } return makeNodeHandle(lastChild); } /** {@inheritDoc} */ @Override public abstract int getAttributeNode(int nodeHandle, String namespaceURI, String name); /** {@inheritDoc} */ @Override public int getFirstAttribute(final int nodeHandle) { final int nodeID = makeNodeIdentity(nodeHandle); return makeNodeHandle(getFirstAttributeIdentity(nodeID)); } /** * Given a node identity, get the index of the node's first attribute. * * @param identity int identity of the node. * @return Identity of first attribute, or DTM.NULL to indicate none exists. */ protected int getFirstAttributeIdentity(int identity) { int type = _type(identity); if (DTM.ELEMENT_NODE == type) { // Assume that attributes and namespaces immediately follow the element. while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { // Assume this can not be null. type = _type(identity); if (type == DTM.ATTRIBUTE_NODE) { return identity; } else if (DTM.NAMESPACE_NODE != type) { break; } } } return DTM.NULL; } /** {@inheritDoc} */ @Override public int getNextSibling(final int nodeHandle) { if (nodeHandle == DTM.NULL) { return DTM.NULL; } return makeNodeHandle(_nextsib(makeNodeIdentity(nodeHandle))); } /** {@inheritDoc} */ @Override public int getPreviousSibling(final int nodeHandle) { if (nodeHandle == DTM.NULL) { return DTM.NULL; } if (m_prevsib != null) { return makeNodeHandle(_prevsib(makeNodeIdentity(nodeHandle))); } // If the previous sibling array is not built, we get at // the previous sibling using the parent, firstch and // nextsib arrays. final int nodeID = makeNodeIdentity(nodeHandle); final int parent = _parent(nodeID); int node = _firstch(parent); int result = DTM.NULL; while (node != nodeID) { result = node; node = _nextsib(node); } return makeNodeHandle(result); } /** {@inheritDoc} */ @Override public int getNextAttribute(final int nodeHandle) { final int nodeID = makeNodeIdentity(nodeHandle); if (_type(nodeID) == DTM.ATTRIBUTE_NODE) { return makeNodeHandle(getNextAttributeIdentity(nodeID)); } return DTM.NULL; } /** * Given a node identity for an attribute, advance to the next attribute. * * @param identity int identity of the attribute node. This must be an attribute * node. * @return int DTM node-identity of the resolved attr, or DTM.NULL to indicate none exists. */ protected int getNextAttributeIdentity(int identity) { // Assume that attributes and namespace nodes immediately follow the element while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { final int type = _type(identity); if (type == DTM.ATTRIBUTE_NODE) { return identity; } else if (type != DTM.NAMESPACE_NODE) { break; } } return DTM.NULL; } /** * Build table of namespace declaration locations during DTM construction. Table is a Vector of * SuballocatedIntVectors containing the namespace node HANDLES declared at that ID, plus an * SuballocatedIntVector of the element node INDEXES at which these declarations appeared. * *

NOTE: Since this occurs during model build, nodes will be encountered in doucment order and * thus the table will be ordered by element, permitting binary-search as a possible retrieval * optimization. * *

%REVIEW% Directly managed arrays rather than vectors? %REVIEW% Handles or IDs? Given usage, * I think handles. */ protected void declareNamespaceInContext( final int elementNodeIndex, final int namespaceNodeIndex) { SuballocatedIntVector nsList = null; if (m_namespaceDeclSets == null) { // First m_namespaceDeclSetElements = new SuballocatedIntVector(32); m_namespaceDeclSetElements.addElement(elementNodeIndex); m_namespaceDeclSets = new ArrayList<>(); nsList = new SuballocatedIntVector(32); m_namespaceDeclSets.add(nsList); } else { // Most recent. May be -1 (none) if DTM was pruned. // %OPT% Is there a lastElement() method? Should there be? final int last = m_namespaceDeclSetElements.size() - 1; if (last >= 0 && elementNodeIndex == m_namespaceDeclSetElements.elementAt(last)) { nsList = m_namespaceDeclSets.get(last); } } if (nsList == null) { m_namespaceDeclSetElements.addElement(elementNodeIndex); final SuballocatedIntVector inherited = findNamespaceContext(_parent(elementNodeIndex)); if (inherited != null) { // %OPT% Count-down might be faster, but debuggability may // be better this way, and if we ever decide we want to // keep this ordered by expanded-type... final int isize = inherited.size(); // Base the size of a new namespace list on the // size of the inherited list - but within reason! nsList = new SuballocatedIntVector(Math.max(Math.min(isize + 16, 2048), 32)); for (int i = 0; i < isize; ++i) { nsList.addElement(inherited.elementAt(i)); } } else { nsList = new SuballocatedIntVector(32); } m_namespaceDeclSets.add(nsList); } // Handle overwriting inherited. // %OPT% Keep sorted? (By expanded-name rather than by doc order...) // Downside: Would require insertElementAt if not found, // which has recopying costs. But these are generally short lists... final int newEType = _exptype(namespaceNodeIndex); for (int i = nsList.size() - 1; i >= 0; --i) { if (newEType == getExpandedTypeID(nsList.elementAt(i))) { nsList.setElementAt(makeNodeHandle(namespaceNodeIndex), i); return; } } nsList.addElement(makeNodeHandle(namespaceNodeIndex)); } /** * Retrieve list of namespace declaration locations active at this node. List is an * SuballocatedIntVector whose entries are the namespace node HANDLES declared at that ID. * *

%REVIEW% Directly managed arrays rather than vectors? %REVIEW% Handles or IDs? Given usage, * I think handles. */ protected SuballocatedIntVector findNamespaceContext(final int elementNodeIndex) { if (null != m_namespaceDeclSetElements) { // %OPT% Is binary-search really saving us a lot versus linear? // (... It may be, in large docs with many NS decls.) int wouldBeAt = findInSortedSuballocatedIntVector(m_namespaceDeclSetElements, elementNodeIndex); if (wouldBeAt >= 0) { // Found it return m_namespaceDeclSets.get(wouldBeAt); } if (wouldBeAt == -1) { // -1-wouldbeat == 0 return null; // Not after anything; definitely not found } // Not found, but we know where it should have been. // Search back until we find an ancestor or run out. wouldBeAt = -1 - wouldBeAt; // Decrement wouldBeAt to find last possible ancestor int candidate = m_namespaceDeclSetElements.elementAt(--wouldBeAt); int ancestor = _parent(elementNodeIndex); // Special case: if the candidate is before the given node, and // is in the earliest possible position in the document, it // must have the namespace declarations we're interested in. if (wouldBeAt == 0 && candidate < ancestor) { final int rootHandle = getDocumentRoot(makeNodeHandle(elementNodeIndex)); final int rootID = makeNodeIdentity(rootHandle); final int uppermostNSCandidateID; if (getNodeType(rootHandle) == DTM.DOCUMENT_NODE) { final int ch = _firstch(rootID); uppermostNSCandidateID = (ch != DTM.NULL) ? ch : rootID; } else { uppermostNSCandidateID = rootID; } if (candidate == uppermostNSCandidateID) { return m_namespaceDeclSets.get(wouldBeAt); } } while (wouldBeAt >= 0 && ancestor > 0) { if (candidate == ancestor) { // Found ancestor in list return m_namespaceDeclSets.get(wouldBeAt); } else if (candidate < ancestor) { // Too deep in tree do { ancestor = _parent(ancestor); } while (candidate < ancestor); } else if (wouldBeAt > 0) { // Too late in list candidate = m_namespaceDeclSetElements.elementAt(--wouldBeAt); } else { break; } } } return null; // No namespaces known at this node } /** * Subroutine: Locate the specified node within m_namespaceDeclSetElements, or the last element * which preceeds it in document order * *

%REVIEW% Inlne this into findNamespaceContext? Create SortedSuballocatedIntVector type? * * @param vector the vector * @param lookfor the lookfor * @return If positive or zero, the index of the found item. If negative, index of the point at * which it would have appeared, encoded as -1-index and hence reconvertable by subtracting it * from -1. (Encoding because I don't want to recompare the strings but don't want to burn * bytes on a datatype to hold a flagged value.) */ protected int findInSortedSuballocatedIntVector( final SuballocatedIntVector vector, final int lookfor) { // Binary search int i = 0; if (vector != null) { int first = 0; int last = vector.size() - 1; while (first <= last) { i = (first + last) / 2; final int test = lookfor - vector.elementAt(i); if (test == 0) { return i; // Name found } else if (test < 0) { last = i - 1; // looked too late } else { first = i + 1; // looked ot early } } if (first > i) { i = first; // Clean up at loop end } } return -1 - i; // not-found has to be encoded. } /** {@inheritDoc} */ @Override public int getFirstNamespaceNode(final int nodeHandle, final boolean inScope) { if (inScope) { final int identity = makeNodeIdentity(nodeHandle); if (_type(identity) == DTM.ELEMENT_NODE) { final SuballocatedIntVector nsContext = findNamespaceContext(identity); if (nsContext == null || nsContext.size() < 1) { return NULL; } return nsContext.elementAt(0); } return NULL; } // Assume that attributes and namespaces immediately // follow the element. // // %OPT% Would things be faster if all NS nodes were built // before all Attr nodes? Some costs at build time for 2nd // pass... int identity = makeNodeIdentity(nodeHandle); if (_type(identity) == DTM.ELEMENT_NODE) { while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { final int type = _type(identity); if (type == DTM.NAMESPACE_NODE) { return makeNodeHandle(identity); } else if (DTM.ATTRIBUTE_NODE != type) { break; } } return NULL; } return NULL; } /** {@inheritDoc} */ @Override public int getNextNamespaceNode( final int baseHandle, final int nodeHandle, final boolean inScope) { if (inScope) { // Since we've been given the base, try direct lookup // (could look from nodeHandle but this is at least one // comparison/get-parent faster) // SuballocatedIntVector nsContext=findNamespaceContext(nodeHandle & m_mask); final SuballocatedIntVector nsContext = findNamespaceContext(makeNodeIdentity(baseHandle)); if (nsContext == null) { return NULL; } final int i = 1 + nsContext.indexOf(nodeHandle); if (i <= 0 || i == nsContext.size()) { return NULL; } return nsContext.elementAt(i); } // Assume that attributes and namespace nodes immediately follow the element. int identity = makeNodeIdentity(nodeHandle); while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { final int type = _type(identity); if (type == DTM.NAMESPACE_NODE) { return makeNodeHandle(identity); } else if (type != DTM.ATTRIBUTE_NODE) { break; } } return DTM.NULL; } /** {@inheritDoc} */ @Override public int getParent(final int nodeHandle) { final int identity = makeNodeIdentity(nodeHandle); if (identity > 0) { return makeNodeHandle(_parent(identity)); } return DTM.NULL; } /** {@inheritDoc} */ @Override public int getDocument() { return m_dtmIdent.elementAt(0); // makeNodeHandle(0) } /** {@inheritDoc} */ @Override public int getOwnerDocument(final int nodeHandle) { if (DTM.DOCUMENT_NODE == getNodeType(nodeHandle)) { return DTM.NULL; } return getDocumentRoot(nodeHandle); } /** {@inheritDoc} */ @Override public int getDocumentRoot(final int nodeHandle) { return getManager().getDTM(nodeHandle).getDocument(); } /** {@inheritDoc} */ @Override public abstract XString getStringValue(int nodeHandle); /** {@inheritDoc} */ @Override public int getExpandedTypeID(final int nodeHandle) { // %REVIEW% This _should_ only be null if someone asked the wrong DTM about the // node... // which one would hope would never happen... final int id = makeNodeIdentity(nodeHandle); if (id == NULL) { return NULL; } return _exptype(id); } /** {@inheritDoc} */ @Override public int getExpandedTypeID(final String namespace, final String localName, final int type) { return m_expandedNameTable.getExpandedTypeID(namespace, localName, type); } /** {@inheritDoc} */ @Override public abstract String getNodeName(int nodeHandle); /** {@inheritDoc} */ @Override public String getNodeNameX(final int nodeHandle) { /* @todo: implement this org.apache.xml.dtm.DTMDefaultBase abstract method */ throw new RuntimeException( XPATHMessages.createXPATHMessage(XPATHErrorResources.ER_METHOD_NOT_SUPPORTED, null)); } /** {@inheritDoc} */ @Override public abstract String getLocalName(int nodeHandle); /** {@inheritDoc} */ @Override public abstract String getPrefix(int nodeHandle); /** {@inheritDoc} */ @Override public abstract String getNamespaceURI(int nodeHandle); /** {@inheritDoc} */ @Override public abstract String getNodeValue(int nodeHandle); /** {@inheritDoc} */ @Override public short getNodeType(final int nodeHandle) { if (nodeHandle == DTM.NULL) { return DTM.NULL; } return m_expandedNameTable.getType(_exptype(makeNodeIdentity(nodeHandle))); } /** {@inheritDoc} */ @Override public abstract int getElementById(String elementId); /** {@inheritDoc} */ @Override public boolean isNodeAfter(final int nodeHandle1, final int nodeHandle2) { // These return NULL if the node doesn't belong to this document. final int index1 = makeNodeIdentity(nodeHandle1); final int index2 = makeNodeIdentity(nodeHandle2); return index1 != NULL && index2 != NULL && index1 <= index2; } /** {@inheritDoc} */ @Override public org.w3c.dom.Node getNode(final int nodeHandle) { return new DTMNodeProxy(this, nodeHandle); } /** * Query which DTMManager this DTM is currently being handled by. * *

%REVEW% Should this become part of the base DTM API? * * @return a DTMManager, or null if this is a "stand-alone" DTM. */ public DTMManager getManager() { return m_mgr; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy