com.marklogic.tree.CompressedTreeDecoder Maven / Gradle / Ivy
/*
* Copyright (c) 2020 MarkLogic Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.marklogic.tree;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.util.zip.Inflater;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.marklogic.dom.NodeImpl;
import com.marklogic.io.BiendianDataInputStream;
import com.marklogic.io.Decoder;
/**
* Decoder of Compressed Tree.
*
* @author jchen
*/
public class CompressedTreeDecoder {
public static final Log LOG = LogFactory.getLog(
CompressedTreeDecoder.class);
private static final Charset UTF8 = Charset.forName("UTF8");
private static final byte xmlURIBytes[] =
"http://www.w3.org/XML/1998/namespace".getBytes(UTF8);
private static final byte xsiURIBytes[] =
"http://www.w3.org/2001/XMLSchema-instance".getBytes(UTF8);
private static final byte spaceBytes[] = "space".getBytes(UTF8);
private static final byte langBytes[] = "lang".getBytes(UTF8);
private static final byte baseBytes[] = "base".getBytes(UTF8);
private static final byte typeBytes[] = "type".getBytes(UTF8);
static final int MAX_BINARY_BYTES = 512<<20; // 512 MB
private static final int xmlSpaceAttrPresentFlag = 0x01;
private static final int xmlLangAttrPresentFlag = 0x02;
private static final int xmlBaseAttrPresentFlag = 0x04;
private static final int xsiTypeAttrPresentFlag = 0x08;
public String utf8(String s) {
byte b[] = s.getBytes(UTF8);
StringBuilder buf = new StringBuilder();
for (byte value : b) {
buf.append(String.format("%02x", value & 0xff));
}
return buf.toString();
}
private void decodeText(ExpandedTree rep, Decoder decoder, int atomLimit)
throws IOException {
if (atomLimit == 0) return;
int numAtoms = decoder.decodeUnsigned();
int index = rep.numTextReps;
int minSize = rep.numTextReps + numAtoms + 1;
if (rep.textReps == null) {
int size = Math.max(rep.atomLimit*16, minSize);
rep.textReps = new int[size];
} else if (rep.textReps.length < minSize) {
int size = Math.max(rep.textReps.length*2, minSize);
int textReps[] = new int[size];
System.arraycopy(rep.textReps, 0, textReps, 0, index);
rep.textReps = textReps;
}
rep.textReps[index++] = numAtoms;
rep.numTextReps += numAtoms+1;
for (int j = 0; j < numAtoms; j++) {
int atom = decoder.decodeUnsigned();
assert (atom < atomLimit);
rep.textReps[index++] = atom;
}
}
private void addText(ExpandedTree rep, int numKeys)
throws IOException {
if (numKeys == 0) return;
int index = rep.numTextReps;
int minSize = rep.numTextReps + numKeys + 1;
if (rep.textReps == null) {
int size = Math.max(rep.atomLimit*16, minSize);
rep.textReps = new int[size];
} else if (rep.textReps.length < minSize) {
int size = Math.max(rep.textReps.length*2, minSize);
int textReps[] = new int[size];
System.arraycopy(rep.textReps, 0, textReps, 0, index);
rep.textReps = textReps;
}
}
private int pow2ceil(int x) {
int y=8;
while (y> 8) & 0xff);
rep.atomData[j++] = (byte)((word >> 16) & 0xff);
rep.atomData[j++] = (byte)((word >> 24) & 0xff);
if (LOG.isTraceEnabled()) {
LOG.trace(String.format(" atomData[%d] %08x", i, word));
LOG.trace(String.format(
" atomData[%d] %02x %02x %02x %02x",
i, rep.atomData[i*4], rep.atomData[i*4+1],
rep.atomData[i*4+2], rep.atomData[i*4+3]));
}
}
}
rep.atomLimit = decoder.decodeUnsigned();
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("atomLimit %d", rep.atomLimit));
}
if (rep.atomLimit == 0) {
rep.atomIndex = null;
} else {
rep.atomIndex = new int[rep.atomLimit + 1];
int j = 0;
for (int i = 0; i < rep.atomLimit; ++i) {
rep.atomIndex[i] = j;
if (LOG.isTraceEnabled())
LOG.trace(String.format(" atomIndex[%d] %08x", i,
rep.atomIndex[i]));
if (rep.atomData != null) while (rep.atomData[j++] != 0);
}
rep.atomIndex[rep.atomLimit] = j;
}
for (int i = 0; i < rep.atomLimit; ++i) {
if (LOG.isTraceEnabled())
LOG.trace(String.format(" atomString[%d] %s", i,
rep.atomString(i)));
}
// node names
int numNodeNameReps = decoder.decodeUnsigned();
if (LOG.isTraceEnabled())
LOG.trace(String.format("numNodeNameReps %d", numNodeNameReps));
if (numNodeNameReps == 0) {
rep.nodeNameNameAtom = null;
rep.nodeNameNamespaceAtom = null;
} else {
rep.nodeNameNameAtom = new int[numNodeNameReps];
rep.nodeNameNamespaceAtom = new int[numNodeNameReps];
}
int xmlSpaceNodeNameRepID = Integer.MAX_VALUE;
int xmlLangNodeNameRepID = Integer.MAX_VALUE;
int xmlBaseNodeNameRepID = Integer.MAX_VALUE;
int xsiTypeNodeNameRepID = Integer.MAX_VALUE;
for (int j = 0; j < numNodeNameReps; j++) {
rep.nodeNameNameAtom[j] = decoder.decodeUnsigned();
if (LOG.isTraceEnabled())
LOG.trace(String.format(" nodeNameNameAtom[%d] %d", j,
rep.nodeNameNameAtom[j]));
assert (rep.nodeNameNameAtom[j] < rep.atomLimit);
rep.nodeNameNamespaceAtom[j] = decoder.decodeUnsigned();
if (LOG.isTraceEnabled())
LOG.trace(String.format(" nodeNameNamespaceAtom[%d] %d", j,
rep.nodeNameNamespaceAtom[j]));
assert (rep.nodeNameNamespaceAtom[j] < rep.atomLimit);
if (rep.atomEquals(rep.nodeNameNamespaceAtom[j], xmlURIBytes)) {
if (rep.atomEquals(rep.nodeNameNameAtom[j], spaceBytes))
xmlSpaceNodeNameRepID = j;
else if (rep.atomEquals(rep.nodeNameNameAtom[j], langBytes)) {
xmlLangNodeNameRepID = j;
} else if (rep.atomEquals(rep.nodeNameNameAtom[j], baseBytes))
xmlBaseNodeNameRepID = j;
} else if (rep.atomEquals(rep.nodeNameNameAtom[j], xsiURIBytes)) {
if (rep.atomEquals(rep.nodeNameNameAtom[j], typeBytes))
xsiTypeNodeNameRepID = j;
}
}
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("xmlSpaceNodeNameRepID %d",
xmlSpaceNodeNameRepID));
LOG.trace(String.format("xmlLangNodeNameRepID %d",
xmlLangNodeNameRepID));
LOG.trace(String.format("xmlBaseNodeNameRepID %d",
xmlBaseNodeNameRepID));
LOG.trace(String.format("xsiTypeNodeNameRepID %d",
xsiTypeNodeNameRepID));
}
int numElemNodeReps = 0;
int numAttrNodeReps = 0;
int numDocNodeReps = 0;
int numPINodeReps = 0;
int numArrayNodeReps = 0;
int numDoubles = 0;
// node counts
rep.numNodeReps = decoder.decodeUnsigned();
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("numNodeReps %d", rep.numNodeReps));
}
if (rep.numNodeReps==0) {
int version = decoder.decodeUnsigned();
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("version %d", version));
}
assert(version<=1);
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("version %d",
version));
}
if (version > 1) {
throw new IOException("Unexpected tree version:" + version);
}
rep.numNodeReps = decoder.decodeUnsigned();
if (version == 1) { // tree with metadata
rep.numMetadata = decoder.decodeUnsigned();
numElemNodeReps = decoder.decodeUnsigned();
numAttrNodeReps = decoder.decodeUnsigned();
rep.numLinkNodeReps = decoder.decodeUnsigned()*4/3;
numPINodeReps = decoder.decodeUnsigned();
rep.numNSNodeReps = decoder.decodeUnsigned();
} else { // json tree
rep.numMetadata = 0;
numElemNodeReps=0;
numAttrNodeReps=0;
numPINodeReps=0;
rep.numLinkNodeReps=0;
rep.numNSNodeReps=0;
}
numArrayNodeReps = decoder.decodeUnsigned();
numDoubles = decoder.decodeUnsigned();
numDocNodeReps = decoder.decodeUnsigned();
} else { // old format
rep.numMetadata = 0;
numArrayNodeReps=0;
numDoubles=0;
numElemNodeReps = decoder.decodeUnsigned();
numAttrNodeReps = decoder.decodeUnsigned();
rep.numLinkNodeReps = decoder.decodeUnsigned()*4/3;
numDocNodeReps = decoder.decodeUnsigned();
numPINodeReps = decoder.decodeUnsigned();
rep.numNSNodeReps = decoder.decodeUnsigned();
}
rep.numPermNodeReps = decoder.decodeUnsigned();
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("rep.numNodeReps %d",
rep.numNodeReps));
LOG.trace(String.format("rep.numMetadata %d",
rep.numMetadata));
LOG.trace(String.format("numElemNodeReps %d",
numElemNodeReps));
LOG.trace(String.format("numAttrNodeReps %d",
numAttrNodeReps));
LOG.trace(String.format("rep.numLinkNodeReps %d",
rep.numLinkNodeReps));
LOG.trace(String.format("numPINodeReps %d",
numPINodeReps));
LOG.trace(String.format("rep.numNSNodeReps %d",
rep.numNSNodeReps));
LOG.trace(String.format("rep.numArrayNodeReps %d",
numArrayNodeReps));
LOG.trace(String.format("numDoubles %d", numDoubles));
LOG.trace(String.format("numDocNodeReps %d", numDocNodeReps));
LOG.trace(String.format("rep.numPermNodeReps %d",
rep.numPermNodeReps));
}
if (rep.numNodeReps > 0) {
rep.nodes = new NodeImpl[rep.numNodeReps];
rep.nodeOrdinal = new long[rep.numNodeReps];
rep.nodeKind = new byte[rep.numNodeReps];
rep.nodeRepID = new int[rep.numNodeReps];
rep.nodeParentNodeRepID = new int[rep.numNodeReps];
}
if (numArrayNodeReps > 0) {
rep.arrayNodeTextRepID = new int[numArrayNodeReps];
rep.arrayNodeChildNodeRepID = new int[numArrayNodeReps];
rep.arrayNodeNumChildren = new int[numArrayNodeReps];
}
if (numDoubles > 0) {
rep.doubles = new double[numDoubles];
}
if (numDocNodeReps > 0) {
rep.docNodeTextRepID = new int[numDocNodeReps];
rep.docNodeChildNodeRepID = new int[numDocNodeReps];
rep.docNodeNumChildren = new int[numDocNodeReps];
}
if (numElemNodeReps > 0) {
rep.elemNodeNodeNameRepID = new int[numElemNodeReps];
rep.elemNodeAttrNodeRepID = new int[numElemNodeReps];
rep.elemNodeChildNodeRepID = new int[numElemNodeReps];
rep.elemNodeElemDeclRepID = new int[numElemNodeReps];
rep.elemNodeNumAttributes = new int[numElemNodeReps];
rep.elemNodeNumDefaultAttrs = new int[numElemNodeReps];
rep.elemNodeNumChildren = new int[numElemNodeReps];
rep.elemNodeFlags = new int[numElemNodeReps];
}
if (numAttrNodeReps > 0) {
rep.attrNodeNodeNameRepID = new int[numAttrNodeReps];
rep.attrNodeTextRepID = new int[numAttrNodeReps];
rep.attrNodeAttrDeclRepID = new int[numAttrNodeReps];
}
if (rep.numLinkNodeReps > 0) {
rep.linkNodeKey = new long[rep.numLinkNodeReps];
rep.linkNodeNodeCount = new long[rep.numLinkNodeReps];
rep.linkNodeNodeNameRepID = new int[rep.numLinkNodeReps];
rep.linkNodeNodeRepID = new int[rep.numLinkNodeReps];
}
if (numDocNodeReps > 0) {
rep.docNodeTextRepID = new int[numDocNodeReps];
rep.docNodeChildNodeRepID = new int[numDocNodeReps];
rep.docNodeNumChildren = new int[numDocNodeReps];
}
if (numPINodeReps > 0) {
rep.piNodeTargetAtom = new int[numPINodeReps];
rep.piNodeTextRepID = new int[numPINodeReps];
}
if (rep.numNSNodeReps > 0) {
rep.nsNodeOrdinal = new long[rep.numNSNodeReps];
rep.nsNodePrevNSNodeRepID = new int[rep.numNSNodeReps];
rep.nsNodePrefixAtom = new int[rep.numNSNodeReps];
rep.nsNodeUriAtom = new int[rep.numNSNodeReps];
}
if (rep.numPermNodeReps > 0) {
rep.permNodeOrdinal = new long[rep.numPermNodeReps];
rep.permNodePrevPermNodeRepID = new int[rep.numPermNodeReps];
rep.permNodeCapability = new Capability[rep.numPermNodeReps];
rep.permNodeRoleId = new long[rep.numPermNodeReps];
}
// uri atoms
rep.uriTextRepID = 0;
decodeText(rep, decoder, rep.atomLimit);
// collection atoms
rep.colsTextRepID = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
// metadata
rep.metaKeys = new int[rep.numMetadata];
rep.metaVals = new int[rep.numMetadata];
// meta
for (int i = 0; i < rep.numMetadata; ++i)
rep.metaKeys[i] = decoder.decodeUnsigned();
for (int i = 0; i < rep.numMetadata; ++i) {
rep.metaVals[i] = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
}
// nodes
int nextDocNodeRep = 0;
int nextElemNodeRep = 0;
int nextAttrNodeRep = 0;
int nextPINodeRep = 0;
int nextNSNodeRep = 0;
int nextPermNodeRep = 0;
int parentNodeRepID = 0;
int nextArrayNodeRep = 0;
int nextDouble = 0;
long lastNSNodeRepOrdinal = 0;
long lastPermNodeRepOrdinal = 0;
for (int i = 0; i < rep.numNodeReps; i++) {
rep.nodeKind[i] = (byte)decoder.decodeUnsigned(4);
if (LOG.isTraceEnabled())
LOG.trace(String.format(" nodeKind[%d] %s", i,
rep.nodeKind[i]));
//assert (rep.nodeKind[i] != NodeKind.NULL);
parentNodeRepID += decoder.decodeUnsigned();
if (LOG.isTraceEnabled())
LOG.trace(String.format(" parentNodeRepID[%d] %d", i,
parentNodeRepID));
assert (parentNodeRepID <= i);
if (parentNodeRepID == i)
rep.nodeParentNodeRepID[i] = Integer.MAX_VALUE;
else {
rep.nodeParentNodeRepID[i] = parentNodeRepID;
assert (rep.nodeKind[parentNodeRepID] == NodeKind.ELEM ||
rep.nodeKind[parentNodeRepID] == NodeKind.DOC ||
rep.nodeKind[parentNodeRepID] == NodeKind.ARRAY ||
rep.nodeKind[parentNodeRepID] == NodeKind.OBJECT ||
rep.nodeKind[parentNodeRepID] == NodeKind.LINK);
int parentRepID = rep.nodeRepID[parentNodeRepID];
switch (rep.nodeKind[parentNodeRepID]) {
case NodeKind.ELEM: {
switch (rep.nodeKind[i]) {
case NodeKind.ATTR:
if (rep.elemNodeAttrNodeRepID[parentRepID] ==
Integer.MAX_VALUE)
rep.elemNodeAttrNodeRepID[parentRepID] = i;
assert (rep.elemNodeAttrNodeRepID[parentRepID] +
rep.elemNodeNumAttributes[parentRepID] == i);
++rep.elemNodeNumAttributes[parentRepID];
break;
default:
if (rep.elemNodeChildNodeRepID[parentRepID] ==
Integer.MAX_VALUE)
rep.elemNodeChildNodeRepID[parentRepID] = i;
assert (rep.elemNodeChildNodeRepID[parentRepID] +
rep.elemNodeNumChildren[parentRepID] == i);
++rep.elemNodeNumChildren[parentRepID];
}
break;
}
case NodeKind.DOC: {
if (rep.docNodeChildNodeRepID[parentNodeRepID] ==
Integer.MAX_VALUE)
rep.docNodeChildNodeRepID[parentNodeRepID] = i;
assert (rep.docNodeChildNodeRepID[parentNodeRepID] +
rep.docNodeNumChildren[parentNodeRepID] == i);
++rep.docNodeNumChildren[parentNodeRepID];
break;
}
case NodeKind.ARRAY:
case NodeKind.OBJECT: {
if (rep.arrayNodeChildNodeRepID[parentRepID] ==
Integer.MAX_VALUE)
rep.arrayNodeChildNodeRepID[parentRepID] = i;
assert (rep.arrayNodeChildNodeRepID[parentRepID] +
rep.arrayNodeNumChildren[parentRepID] == i);
++rep.arrayNodeNumChildren[parentRepID];
break;
}
default:
break;
}
}
switch (rep.nodeKind[i]) {
case NodeKind.ELEM: {
int j = nextElemNodeRep++;
rep.nodeRepID[i] = j;
assert (j < numElemNodeReps);
rep.elemNodeNodeNameRepID[j] = decoder.decodeUnsigned();
rep.elemNodeAttrNodeRepID[j] = Integer.MAX_VALUE;
rep.elemNodeChildNodeRepID[j] = Integer.MAX_VALUE;
rep.elemNodeElemDeclRepID[j] = Integer.MAX_VALUE;
rep.elemNodeNumAttributes[j] = 0;
rep.elemNodeNumDefaultAttrs[j] = 0;
rep.elemNodeNumChildren[j] = 0;
rep.elemNodeFlags[j] = 0;
if (rep.elemNodeNodeNameRepID[j] >= numNodeNameReps) {
rep.elemNodeNumDefaultAttrs[j] =
rep.elemNodeNodeNameRepID[j] / numNodeNameReps;
rep.elemNodeNodeNameRepID[j] =
rep.elemNodeNodeNameRepID[j] % numNodeNameReps;
}
break;
}
case NodeKind.ATTR: {
assert (parentNodeRepID < i);
assert (rep.nodeKind[parentNodeRepID] == NodeKind.ELEM);
rep.nodeRepID[i] = nextAttrNodeRep++;
assert (rep.nodeRepID[i] < numAttrNodeReps);
rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] =
decoder.decodeUnsigned();
assert (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]]
< numNodeNameReps);
if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] ==
xmlSpaceNodeNameRepID)
rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |=
xmlSpaceAttrPresentFlag;
else if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] ==
xmlLangNodeNameRepID)
rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |=
xmlLangAttrPresentFlag;
else if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] ==
xmlBaseNodeNameRepID)
rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |=
xmlBaseAttrPresentFlag;
else if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] ==
xsiTypeNodeNameRepID)
rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |=
xsiTypeAttrPresentFlag;
rep.attrNodeTextRepID[rep.nodeRepID[i]] = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
rep.attrNodeAttrDeclRepID[rep.nodeRepID[i]] =
Integer.MAX_VALUE;
break;
}
case NodeKind.TEXT: {
rep.nodeRepID[i] = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
break;
}
case NodeKind.BINARY: {
rep.nodeRepID[i] = 0;
int nbytes = decoder.decodeUnsigned();
if (nbytes > MAX_BINARY_BYTES) { // large binary
rep.binaryKey = decoder.decode64bits();
rep.binaryOffset = decoder.decodeUnsignedLong();
rep.binarySize = decoder.decodeUnsignedLong();
rep.binaryOrigLen = decoder.decodeUnsignedLong();
rep.binaryPathAtom = decoder.decodeUnsigned();
} else {
decodeBinary(decoder, rep, nbytes);
}
break;
}
case NodeKind.PI: {
int piNodeRep = rep.nodeRepID[i] = nextPINodeRep++;
assert (piNodeRep < numPINodeReps);
int targetAtom = rep.piNodeTargetAtom[piNodeRep] =
decoder.decodeUnsigned();
assert (targetAtom < rep.atomLimit);
rep.piNodeTextRepID[piNodeRep] = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
break;
}
case NodeKind.LINK: {
long key = decoder.decode64bits();
int linkNodeRep = (int)remainderUnsigned(key,
rep.numLinkNodeReps);
while (true) {
if (rep.linkNodeKey[linkNodeRep] == 0) {
rep.nodeRepID[i] = linkNodeRep;
rep.linkNodeKey[linkNodeRep] = key;
rep.linkNodeNodeCount[linkNodeRep] =
decoder.decodeUnsignedLong();
rep.linkNodeNodeNameRepID[linkNodeRep] =
decoder.decodeUnsigned();
assert (rep.linkNodeNodeNameRepID[linkNodeRep] <
numNodeNameReps);
rep.linkNodeNodeRepID[linkNodeRep] = i;
break;
}
linkNodeRep = hashWrap(linkNodeRep + 1,
rep.numLinkNodeReps);
}
break;
}
case NodeKind.COMMENT: {
rep.nodeRepID[i] = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
break;
}
case NodeKind.DOC: {
int docNode = rep.nodeRepID[i] = nextDocNodeRep++;
assert (docNode < numDocNodeReps);
rep.docNodeTextRepID[i] = rep.numTextReps;
decodeText(rep, decoder, rep.atomLimit);
rep.docNodeChildNodeRepID[docNode] = Integer.MAX_VALUE;
rep.docNodeNumChildren[docNode] = 0;
break;
}
case NodeKind.NS: {
int nsNode = rep.nodeRepID[i] = nextNSNodeRep++;
assert (nsNode < rep.numNSNodeReps);
lastNSNodeRepOrdinal = rep.nsNodeOrdinal[nsNode] =
lastNSNodeRepOrdinal + decoder.decodeUnsignedLong();
rep.nsNodePrevNSNodeRepID[nsNode] = rep.nodeRepID[i] -
decoder.decodeUnsigned() - 1;
assert (rep.nsNodePrevNSNodeRepID[nsNode] < rep.numNSNodeReps
|| rep.nsNodePrevNSNodeRepID[nsNode] ==
Integer.MAX_VALUE);
rep.nsNodePrefixAtom[nsNode] = decoder.decodeUnsigned() - 1;
assert (rep.nsNodePrefixAtom[nsNode] < rep.atomLimit ||
rep.nsNodePrefixAtom[nsNode] == Integer.MAX_VALUE);
rep.nsNodeUriAtom[nsNode] = decoder.decodeUnsigned() - 1;
assert (rep.nsNodeUriAtom[nsNode] < rep.atomLimit ||
rep.nsNodeUriAtom[nsNode] == Integer.MAX_VALUE);
break;
}
case NodeKind.PERM: {
int permNode = rep.nodeRepID[i] = nextPermNodeRep++;
assert (permNode < rep.numPermNodeReps);
lastPermNodeRepOrdinal = rep.permNodeOrdinal[permNode] =
lastPermNodeRepOrdinal
+ decoder.decodeUnsignedLong();
long prevPermNode = rep.permNodePrevPermNodeRepID[permNode] =
permNode - decoder.decodeUnsigned() - 1;
assert (prevPermNode < rep.numPermNodeReps ||
prevPermNode == Integer.MAX_VALUE);
Capability capability = rep.permNodeCapability[permNode] =
Capability.values()[decoder.decodeUnsigned(4)];
assert (capability != Capability.NULL);
long roleId = rep.permNodeRoleId[permNode] =
decoder.decode64bits();
assert (roleId < Long.MAX_VALUE);
break;
}
case NodeKind.NULL: {
switch (decoder.decodeUnsigned(3)) {
case 1: {
rep.nodeKind[i] = NodeKind.BOOLEAN;
rep.nodeRepID[i] = 0;
break;
}
case 2: {
rep.nodeKind[i] = NodeKind.BOOLEAN;
rep.nodeRepID[i] = 1;
break;
}
case 3: {
rep.nodeKind[i] = NodeKind.NUMBER;
rep.nodeRepID[i] = nextDouble++;
assert(rep.nodeRepID[i] < numDoubles);
rep.doubles[rep.nodeRepID[i]] = decoder.decodeDouble();
break;
}
case 4: {
rep.nodeKind[i] = NodeKind.ARRAY;
rep.nodeRepID[i] = nextArrayNodeRep++;
assert(rep.nodeRepID[i] < numArrayNodeReps);
rep.arrayNodeTextRepID[rep.nodeRepID[i]] =
Integer.MAX_VALUE;
rep.arrayNodeChildNodeRepID[rep.nodeRepID[i]] =
Integer.MAX_VALUE;
rep.arrayNodeNumChildren[rep.nodeRepID[i]] = 0;
break;
}
case 5: {
rep.nodeKind[i] = NodeKind.OBJECT;
rep.nodeRepID[i] = nextArrayNodeRep++;
assert(rep.nodeRepID[i] < numArrayNodeReps);
rep.arrayNodeTextRepID[rep.nodeRepID[i]] = rep.numTextReps;
rep.arrayNodeChildNodeRepID[rep.nodeRepID[i]] =
Integer.MAX_VALUE;
rep.arrayNodeNumChildren[rep.nodeRepID[i]] = 0;
int numKeys = decoder.decodeUnsigned();
addText(rep,numKeys);
int atomLimit = rep.atomLimit;
for (int j=0; j=atomLimit) {
bad="atom";
if (LOG.isTraceEnabled())
LOG.trace(String.format(
"bad atom %d atomLimit %d",
atom,atomLimit));
}
rep.textReps[rep.numTextReps++] = atom;
}
break;
}
default:
break;
}
break;
}
default:
break;
}
}
if (rep.numNodeReps > 0) {
assignOrdinals(rep);
}
return rep;
}
static long remainderUnsigned(long dividend, int divisor) {
if (dividend > 0 && divisor > 0) { // signed comparisons
return dividend % divisor;
}
if (Long.compare(dividend + Long.MIN_VALUE, divisor + Long.MIN_VALUE)
< 0) {
return dividend;
}
return toUnsignedBigInteger(dividend).
remainder(toUnsignedBigInteger(divisor)).longValue();
}
static BigInteger toUnsignedBigInteger(long i) {
if (i >= 0L) {
return BigInteger.valueOf(i);
}
int upper = (int) (i >>> 32);
int lower = (int) i;
// return (upper << 32) + lower
return (BigInteger.valueOf(((long)upper) & 0xffffffffL)).shiftLeft(32).
add(BigInteger.valueOf(((long)lower) & 0xffffffffL));
}
private void decodeBinary(Decoder decoder, ExpandedTree rep, int nbytes)
throws IOException {
int nwords = ((nbytes+3)/4);
if (nwords <= 0) {
LOG.error("nbytes=" + nbytes + ", nwords=" + nwords);
}
rep.binaryData = new int[nwords];
decoder.decode(rep.binaryData, nwords);
}
private void assignOrdinals(ExpandedTree rep) {
long ordinal = 0;
int nodeID = 0;
if (rep.nodeKind[0] == NodeKind.LINK) {
rep.ordinal = rep.linkNodeNodeCount[rep.nodeRepID[0]];
rep.nodeOrdinal[0] = 0;
nodeID = 1;
}
while (nodeID != Integer.MAX_VALUE) {
rep.nodeOrdinal[nodeID] = ordinal++;
switch (rep.nodeKind[nodeID]) {
case NodeKind.ELEM: {
int elemID = rep.nodeRepID[nodeID];
for (int i = 0; i < rep.elemNodeNumAttributes[elemID]; i++) {
int attrNodeID = rep.elemNodeAttrNodeRepID[elemID] + i;
rep.nodeOrdinal[attrNodeID] = ordinal++;
}
int childNodeID = rep.elemNodeChildNodeRepID[elemID];
if (childNodeID != Integer.MAX_VALUE) {
nodeID = childNodeID;
continue;
}
break;
}
case NodeKind.LINK: {
int linkID = rep.nodeRepID[nodeID];
ordinal += rep.linkNodeNodeCount[linkID] - 1;
break;
}
case NodeKind.DOC: {
int docID = rep.nodeRepID[nodeID];
int childNodeID = rep.docNodeChildNodeRepID[docID];
if (childNodeID != Integer.MAX_VALUE) {
nodeID = childNodeID;
continue;
}
break;
}
case NodeKind.ARRAY:
case NodeKind.OBJECT: {
int docID = rep.nodeRepID[nodeID];
int childNodeID = rep.arrayNodeChildNodeRepID[docID];
if (childNodeID != Integer.MAX_VALUE) {
nodeID = childNodeID;
continue;
}
break;
}
default:
break;
}
int parentNodeID = rep.nodeParentNodeRepID[nodeID];
for (;;) {
if (parentNodeID == Integer.MAX_VALUE) {
nodeID = Integer.MAX_VALUE;
break;
}
if (rep.nodeKind[parentNodeID] == NodeKind.ELEM) {
int elemID = rep.nodeRepID[parentNodeID];
if (++nodeID < rep.elemNodeChildNodeRepID[elemID] +
rep.elemNodeNumChildren[elemID])
break;
} else if (rep.nodeKind[parentNodeID] == NodeKind.DOC) {
int docID = rep.nodeRepID[parentNodeID];
if (++nodeID < rep.docNodeChildNodeRepID[docID] +
rep.docNodeNumChildren[docID])
break;
} else if (rep.nodeKind[parentNodeID] == NodeKind.ARRAY ||
rep.nodeKind[parentNodeID] == NodeKind.OBJECT) {
int docID = rep.nodeRepID[parentNodeID];
if (++nodeID < rep.arrayNodeChildNodeRepID[docID] +
rep.arrayNodeNumChildren[docID])
break;
}
nodeID = parentNodeID;
parentNodeID = rep.nodeParentNodeRepID[nodeID];
}
}
for (int j = rep.numNodeReps - rep.numNSNodeReps - rep.numPermNodeReps;
j < rep.numNodeReps;
++j)
rep.nodeOrdinal[j] = ordinal++;
for (int k = rep.numNodeReps - rep.numPermNodeReps;
k < rep.numNodeReps;
++k)
rep.nodeOrdinal[k] = ordinal++;
// TODO: compared performance
if (Boolean.getBoolean("xcc.decode.atoms")) {
for (int x = 0; x < rep.atomLimit; ++x) rep.atomString(x);
}
}
public static int hashWrap(int x, int y) {
return (x < y) ? x : x - y;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy