com.marklogic.tree.ExpandedTree Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-mapreduce2 Show documentation
Show all versions of marklogic-mapreduce2 Show documentation
MarkLogic Connector for Hadoop MapReduce
/*
* Copyright 2003-2019 MarkLogic Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.marklogic.tree;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.w3c.dom.Node;
import com.marklogic.dom.AttrImpl;
import com.marklogic.dom.CommentImpl;
import com.marklogic.dom.DocumentImpl;
import com.marklogic.dom.ElementImpl;
import com.marklogic.dom.NodeImpl;
import com.marklogic.dom.ProcessingInstructionImpl;
import com.marklogic.dom.TextImpl;
/**
* Java equivalent of ExpandedTreeRep in Tree.h
*
* @author jchen
*/
public class ExpandedTree implements Writable {
public static final Log LOG = LogFactory.getLog(ExpandedTree.class);
private static final Charset UTF8 = Charset.forName("UTF8");
NodeImpl nodes[]; // NodeRep*
public long ordinal; // uint64_t
public long uriKey; // uint64_t
public long uniqKey; // uint64_t
public long linkKey; // uint64_t
public long keys[]; // uint64_t*
public byte atomData[]; // char*
public String atomString[];
public int atomIndex[]; // unsigned*
public long nodeOrdinal[];
public byte nodeKind[];
public int nodeRepID[];
public int nodeParentNodeRepID[];
public int docNodeTextRepID[]; // unsigned DocNodeRep::textRepID
public int docNodeChildNodeRepID[]; // unsigned DocNodeRep::childNodeRepID
public int docNodeNumChildren[]; // unsigned DocNodeRep::numChildren
public int elemNodeNodeNameRepID[]; // unsigned ElemNodeRep::nodeNameRepID
public int elemNodeAttrNodeRepID[]; // unsigned ElemNodeRep::attrNodeRepID
public int elemNodeChildNodeRepID[]; // unsigned ElemNodeRep::childNodeRepID
public int elemNodeElemDeclRepID[]; // unsigned ElemNodeRep::elemDeclRepID
public int elemNodeNumAttributes[]; // unsigned ElemNodeRep::numAttributes:24
public int elemNodeNumDefaultAttrs[]; // unsigned ElemNodeRep::numDefaultAttrs:8
public int elemNodeNumChildren[]; // unsigned ElemNodeRep::numChildren:28
public int elemNodeFlags[]; // unsigned ElemNodeRep::flags
public int attrNodeNodeNameRepID[];
public int attrNodeTextRepID[];
public int attrNodeAttrDeclRepID[];
public int piNodeTargetAtom[];
public int piNodeTextRepID[];
public long linkNodeKey[];
public long linkNodeNodeCount[];
public int linkNodeNodeNameRepID[];
public int linkNodeNodeRepID[];
public int nodeNameNameAtom[];
public int nodeNameNamespaceAtom[];
public long nsNodeOrdinal[];
public int nsNodePrevNSNodeRepID[];
public int nsNodePrefixAtom[];
public int nsNodeUriAtom[];
public long permNodeOrdinal[];
public int permNodePrevPermNodeRepID[];
public Capability permNodeCapability[];
public long permNodeRoleId[];
public int arrayNodeTextRepID[]; // unsigned ArrayNodeRep.testRepID
public int arrayNodeChildNodeRepID[]; // unsigned ArrayNodeRep.childNodeRepID
public int arrayNodeNumChildren[]; // unsigned ArrayNodeRep.numChildren
public double doubles[];
public long binaryKey; // uint64_t BinaryNodeRep.binaryKey
public long binaryOffset; // uint64_t BinaryNodeRep.offset
public long binarySize; // uint64_t BinaryNodeRep.size
public long binaryOrigLen; // uint64_t BinaryNodeRep.originalLength
public int binaryPathAtom; // unsigned BinaryNodeRep.pathAtom
public int numTextReps; // not in ExpandedTreeRep
public int textReps[]; // unsigned*
public int binaryData[]; // unsigned*
public int atomLimit; // unsigned
public int numKeys; // unsigned
public int numNodeReps; // unsigned
public int numNSNodeReps; // unsigned
public int numPermNodeReps; // unsigned
public int numLinkNodeReps; // unsigned
public int uriTextRepID; // unsigned
public int colsTextRepID; // unsigned
public int[] metaKeys; // unsigned*
public int[] metaVals; // unsigned*
public int schemaRepUID; // unsigned
public long schemaTimestamp; // uint64_t
public int numMetadata; // unsigned
private long fragmentOrdinal;
private int quality;
public boolean atomEquals(int atom, byte value[]) {
int p = 0;
int i = atomIndex[atom] + (atomData[atomIndex[atom]]>>>7) + 1;
while (p < value.length) {
byte b = atomData[i];
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("%02x %02x", b, value[p]));
}
if ((b == 0) || (b != value[p]))
return false;
p++;
i++;
}
return true;
}
public String atomString(int i) {
if (i<0 || i==Integer.MAX_VALUE) return null;
String value = null;
if (atomString == null) {
atomString = new String[atomIndex.length];
} else if (atomString.length > i){
value = atomString[i];
}
if (value == null) {
int aidx;
if (atomData[atomIndex[i]]>=0) {
aidx = atomIndex[i] + 1;
} else {
aidx = atomIndex[i] + 2;
}
value = atomString[i] = new String(atomData, aidx,
atomIndex[i + 1] - aidx - 1, UTF8);
}
return value;
}
public String getText(int index) {
int input = index;
if (textReps==null) return null;
StringBuilder buf = new StringBuilder();
for (int i=textReps[index++]; i > 0; --i) {
if (LOG.isTraceEnabled()) {
LOG.trace("atom " + textReps[index] + " [" +
atomString(textReps[index]) + "] length " +
atomString(textReps[index]).length());
}
buf.append(atomString(textReps[index++]));
}
if (LOG.isTraceEnabled()) {
LOG.trace("getText(" + input + ") returning [" + buf.toString() +
"] length " + buf.length());
}
return buf.toString();
}
public String[] getCollections() {
int index = colsTextRepID;
int cnt = textReps[index++];
String[] cols = new String[cnt];
for (int i = 0; i < cnt; ++i) {
cols[i] = atomString(textReps[index++]);
}
return cols;
}
public Map getMetadata() {
if (numMetadata == 0) return null;
Map metaMap = new HashMap(numMetadata);
for (int i = 0; i < numMetadata; i++) {
metaMap.put(atomString(metaKeys[i]), getText(metaVals[i]));
}
return metaMap;
}
public byte rootNodeKind() {
if (node(0) != null) {
return nodeKind[((DocumentImpl)node(0)).getFirstChildIndex()];
} else {
return nodeKind[0];
}
}
public Node node(int i) {
if (i == Integer.MAX_VALUE) {
return null;
}
else if (nodes[i] != null) {
return nodes[i];
}
else {
switch (nodeKind[i]) {
case NodeKind.ELEM:
nodes[i] = new ElementImpl(this, i);
break;
case NodeKind.ATTR:
nodes[i] = new AttrImpl(this, i);
break;
case NodeKind.TEXT:
nodes[i] = new TextImpl(this, i);
break;
case NodeKind.LINK:
break;
case NodeKind.NS:
break;
case NodeKind.DOC:
nodes[i] = new DocumentImpl(this, i);
break;
case NodeKind.PI:
nodes[i] = new ProcessingInstructionImpl(this, i);
break;
case NodeKind.COMMENT:
nodes[i] = new CommentImpl(this, i);
break;
case NodeKind.PERM:
break;
case NodeKind.BINARY:
break;
default:
LOG.warn("Unexpected node kind: " + nodeKind[i] + " @ " + i);
break;
}
return nodes[i];
}
}
public String getDocumentURI() {
return getText(uriTextRepID);
}
public Path getPathToBinary() {
long dirKey = binaryKey >>> 54;
String dir = String.format("%03x", dirKey);
String fileName = String.format("%016x", binaryKey);
return new Path(dir, fileName);
}
public boolean containLinks() {
return numLinkNodeReps > 0;
}
public long getFragmentOrdinal() {
return fragmentOrdinal;
}
public void setFragmentOrdinal(long fragmentOrdinal) {
this.fragmentOrdinal = fragmentOrdinal;
}
public int getQuality() {
return quality;
}
public void setQuality(int quality) {
this.quality = quality;
}
@Override
public void readFields(DataInput in) throws IOException {
uriKey = in.readLong();
uniqKey = in.readLong();
linkKey = in.readLong();
numKeys = in.readInt();
if (numKeys > 0) {
keys = new long[numKeys];
for (int i = 0; i < numKeys; i++) {
keys[i] = in.readLong();
}
}
int atomDataLen = in.readInt();
if (atomDataLen > 0) {
atomData = new byte[atomDataLen];
for (int i = 0; i < atomDataLen; i++) {
atomData[i] = in.readByte();
}
}
atomLimit = in.readInt();
if (atomLimit > 0) {
atomIndex = new int[atomLimit+1];
for (int i = 0; i < atomLimit + 1; i++) {
atomIndex[i] = in.readInt();
}
}
int nodeNameNameAtomLen = in.readInt();
if (nodeNameNameAtomLen > 0) {
nodeNameNameAtom = new int[nodeNameNameAtomLen];
nodeNameNamespaceAtom = new int[nodeNameNameAtomLen];
for (int i = 0; i < nodeNameNameAtomLen; i++) {
nodeNameNameAtom[i] = in.readInt();
nodeNameNamespaceAtom[i] = in.readInt();
}
}
numNodeReps = in.readInt();
if (numNodeReps > 0) {
nodes = new NodeImpl[numNodeReps];
nodeOrdinal = new long[numNodeReps];
nodeKind = new byte[numNodeReps];
nodeRepID = new int[numNodeReps];
nodeParentNodeRepID = new int[numNodeReps];
for (int i = 0; i < numNodeReps; i++) {
nodeOrdinal[i] = in.readLong();
nodeKind[i] = in.readByte();
nodeRepID[i] = in.readInt();
nodeParentNodeRepID[i] = in.readInt();
}
}
int numElemNodeReps = in.readInt();
if (numElemNodeReps > 0) {
elemNodeNodeNameRepID = new int [numElemNodeReps];
elemNodeAttrNodeRepID = new int[numElemNodeReps];
elemNodeChildNodeRepID = new int[numElemNodeReps];
elemNodeElemDeclRepID = new int[numElemNodeReps];
elemNodeNumAttributes = new int[numElemNodeReps];
elemNodeNumDefaultAttrs = new int[numElemNodeReps];
elemNodeNumChildren = new int[numElemNodeReps];
elemNodeFlags = new int[numElemNodeReps];
for (int i = 0; i < numElemNodeReps; i++) {
elemNodeNodeNameRepID[i] = in.readInt();
elemNodeAttrNodeRepID[i] = in.readInt();
elemNodeChildNodeRepID[i] = in.readInt();
elemNodeElemDeclRepID[i] = in.readInt();
elemNodeNumAttributes[i] = in.readInt();
elemNodeNumDefaultAttrs[i] = in.readInt();
elemNodeNumChildren[i] = in.readInt();
elemNodeFlags[i] = in.readInt();
}
}
int numAttrNodeReps = in.readInt();
if (numAttrNodeReps > 0) {
attrNodeNodeNameRepID = new int[numAttrNodeReps];
attrNodeTextRepID = new int[numAttrNodeReps];
attrNodeAttrDeclRepID = new int[numAttrNodeReps];
for (int i = 0; i < numAttrNodeReps; i++) {
attrNodeNodeNameRepID[i] = in.readInt();
attrNodeTextRepID[i] = in.readInt();
attrNodeAttrDeclRepID[i] = in.readInt();
}
}
numLinkNodeReps = in.readInt();
if (numLinkNodeReps > 0) {
linkNodeKey = new long[numLinkNodeReps];
linkNodeNodeCount = new long[numLinkNodeReps];
linkNodeNodeNameRepID = new int[numLinkNodeReps];
linkNodeNodeRepID = new int[numLinkNodeReps];
for (int i = 0; i < numLinkNodeReps; i++) {
linkNodeKey[i] = in.readLong();
linkNodeNodeCount[i] = in.readLong();
linkNodeNodeNameRepID[i] = in.readInt();
linkNodeNodeRepID[i] = in.readInt();
}
}
int numDocNodeReps = in.readInt();
if (numDocNodeReps > 0) {
docNodeTextRepID = new int[numDocNodeReps];
docNodeChildNodeRepID = new int[numDocNodeReps];
docNodeNumChildren = new int[numDocNodeReps];
for (int i = 0; i < numDocNodeReps; i++) {
docNodeTextRepID[i] = in.readInt();
docNodeChildNodeRepID[i] = in.readInt();
docNodeNumChildren[i] = in.readInt();
}
}
int numPINodeReps = in.readInt();
if (numPINodeReps > 0) {
piNodeTargetAtom = new int[numPINodeReps];
piNodeTextRepID = new int[numPINodeReps];
for (int i = 0; i < numPINodeReps; i++) {
piNodeTargetAtom[i] = in.readInt();
piNodeTextRepID[i] = in.readInt();
}
}
numNSNodeReps = in.readInt();
if (numNSNodeReps > 0) {
nsNodeOrdinal = new long[numNSNodeReps];
nsNodePrevNSNodeRepID = new int[numNSNodeReps];
nsNodePrefixAtom = new int[numNSNodeReps];
nsNodeUriAtom = new int[numNSNodeReps];
for (int i = 0; i < numNSNodeReps; i++) {
nsNodeOrdinal[i] = in.readLong();
nsNodePrevNSNodeRepID[i] = in.readInt();
nsNodePrefixAtom[i] = in.readInt();
nsNodeUriAtom[i] = in.readInt();
}
}
// skip permission node since it's not exposed to the API
uriTextRepID = in.readInt();
colsTextRepID = in.readInt();
numTextReps = in.readInt();
if (numTextReps > 0) {
textReps = new int[numTextReps];
for (int i = 0; i < numTextReps; i++) {
textReps[i] = in.readInt();
}
}
int numArrayNodeReps = in.readInt();
if (numArrayNodeReps > 0) {
arrayNodeTextRepID = new int [numArrayNodeReps];
arrayNodeChildNodeRepID = new int[numArrayNodeReps];
arrayNodeNumChildren = new int[numArrayNodeReps];
for (int i = 0; i < numArrayNodeReps; i++) {
arrayNodeTextRepID[i] = in.readInt();
arrayNodeChildNodeRepID[i] = in.readInt();
arrayNodeNumChildren[i] = in.readInt();
}
}
int numDoubles = in.readInt();
if (numDoubles > 0) {
doubles = new double [numDoubles];
for (int i = 0; i < numDoubles; i++) {
doubles[i] = in.readDouble();
}
}
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(uriKey);
out.writeLong(uniqKey);
out.writeLong(linkKey);
out.writeInt(numKeys);
if (numKeys > 0) {
for (long key : keys) {
out.writeLong(key);
}
}
if (atomData != null && atomData.length > 0) {
out.writeInt(atomData.length);
for (int i = 0; i < atomData.length; i++) {
out.writeByte(atomData[i]);
}
} else {
out.writeInt(0);
}
out.writeInt(atomLimit);
if (atomIndex != null && atomIndex.length > 0) {
for (int i = 0; i < atomIndex.length; i++) {
out.writeInt(atomIndex[i]);
}
}
if (nodeNameNameAtom != null && nodeNameNameAtom.length > 0) {
out.writeInt(nodeNameNameAtom.length);
for (int i = 0; i < nodeNameNameAtom.length; i++) {
out.writeInt(nodeNameNameAtom[i]);
out.writeInt(nodeNameNamespaceAtom[i]);
}
} else {
out.writeInt(0);
}
out.writeInt(numNodeReps);
if (numNodeReps > 0) {
for (int i = 0; i < numNodeReps; i++) {
out.writeLong(nodeOrdinal[i]);
out.writeByte(nodeKind[i]);
out.writeInt(nodeRepID[i]);
out.writeInt(nodeParentNodeRepID[i]);
}
}
if (elemNodeNodeNameRepID != null &&
elemNodeNodeNameRepID.length > 0) {
out.writeInt(elemNodeNodeNameRepID.length);
for (int i = 0; i < elemNodeNodeNameRepID.length; i++) {
out.writeInt(elemNodeNodeNameRepID[i]);
out.writeInt(elemNodeAttrNodeRepID[i]);
out.writeInt(elemNodeChildNodeRepID[i]);
out.writeInt(elemNodeElemDeclRepID[i]);
out.writeInt(elemNodeNumAttributes[i]);
out.writeInt(elemNodeNumDefaultAttrs[i]);
out.writeInt(elemNodeNumChildren[i]);
out.writeInt(elemNodeFlags[i]);
}
} else {
out.writeInt(0);
}
if (attrNodeNodeNameRepID != null &&
attrNodeNodeNameRepID.length > 0) {
out.writeInt(attrNodeNodeNameRepID.length);
for (int i = 0; i < attrNodeNodeNameRepID.length; i++) {
out.writeInt(attrNodeNodeNameRepID[i]);
out.writeInt(attrNodeTextRepID[i]);
out.writeInt(attrNodeAttrDeclRepID[i]);
}
} else {
out.writeInt(0);
}
out.writeInt(numLinkNodeReps);
if (numLinkNodeReps > 0) {
for (int i = 0; i < numLinkNodeReps; i++) {
out.writeLong(linkNodeKey[i]);
out.writeLong(linkNodeNodeCount[i]);
out.writeInt(linkNodeNodeNameRepID[i]);
out.writeInt(linkNodeNodeRepID[i]);
}
}
if (docNodeTextRepID != null && docNodeTextRepID.length > 0) {
out.writeInt(docNodeTextRepID.length);
for (int i = 0; i < docNodeTextRepID.length; i++) {
out.writeInt(docNodeTextRepID[i]);
out.writeInt(docNodeChildNodeRepID[i]);
out.writeInt(docNodeNumChildren[i]);
}
} else {
out.writeInt(0);
}
if (piNodeTargetAtom != null && piNodeTargetAtom.length > 0) {
out.writeInt(piNodeTargetAtom.length);
for (int i = 0; i < piNodeTargetAtom.length; i++) {
out.writeInt(piNodeTargetAtom[i]);
out.writeInt(piNodeTextRepID[i]);
}
} else {
out.writeInt(0);
}
out.writeInt(numNSNodeReps);
if (numNSNodeReps > 0) {
for (int i = 0; i < numNSNodeReps; i++) {
out.writeLong(nsNodeOrdinal[i]);
out.writeInt(nsNodePrevNSNodeRepID[i]);
out.writeInt(nsNodePrefixAtom[i]);
out.writeInt(nsNodeUriAtom[i]);
}
}
// skip permission node since it's not exposed to the API
out.writeInt(uriTextRepID);
out.writeInt(colsTextRepID);
out.writeInt(numTextReps);
if (numTextReps > 0) {
for (int i = 0; i < numTextReps; i++) {
out.writeInt(textReps[i]);
}
}
if (arrayNodeTextRepID != null &&
arrayNodeTextRepID.length > 0) {
out.writeInt(arrayNodeTextRepID.length);
for (int i = 0; i < arrayNodeTextRepID.length; i++) {
out.writeInt(arrayNodeTextRepID[i]);
out.writeInt(arrayNodeChildNodeRepID[i]);
out.writeInt(arrayNodeNumChildren[i]);
}
} else {
out.writeInt(0);
}
if (doubles != null &&
doubles.length > 0) {
out.writeInt(doubles.length);
for (int i = 0; i < doubles.length; i++) {
out.writeDouble(doubles[i]);
}
} else {
out.writeInt(0);
}
}
public String toString() {
StringBuilder sb = new StringBuilder();
putNode(0,sb);
return sb.toString();
}
public void putNode(int index, StringBuilder sb) {
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("putNode index %d nodeKind %d",
index, nodeKind[index]));
}
switch (nodeKind[index]) {
case NodeKind.TEXT: {
sb.append("\"");
int id = nodeRepID[index];
sb.append(getText(id));
sb.append("\"");
break;
}
case NodeKind.DOC: {
int id = nodeRepID[index];
//sb.append(getText(docNodeTextRepID[id]));
int limit = docNodeChildNodeRepID[id]+ docNodeNumChildren[id];
for (int i = docNodeChildNodeRepID[id]; i < limit; i++) {
putNode(i, sb);
}
break;
}
case NodeKind.NULL: {
sb.append("null");
break;
}
case NodeKind.BOOLEAN: {
if (nodeRepID[index]==1) {
sb.append("true");
}
else {
sb.append("false");
}
break;
}
case NodeKind.NUMBER: {
sb.append(String.valueOf(doubles[nodeRepID[index]]));
break;
}
case NodeKind.ARRAY: {
sb.append("[");
int id = nodeRepID[index];
int limit = arrayNodeChildNodeRepID[id] +
arrayNodeNumChildren[id];
int i = 0;
for (int idx = arrayNodeChildNodeRepID[id];
idx < limit; idx++,i++) {
if (i!=0) sb.append(", ");
putNode(idx, sb);
}
sb.append("]");
break;
}
case NodeKind.OBJECT: {
sb.append("{ ");
int id = nodeRepID[index];
int limit = arrayNodeChildNodeRepID[id] +
arrayNodeNumChildren[id];
int i=0;
for (int idx = arrayNodeChildNodeRepID[id];
idx < limit; idx++, i++) {
if (i!=0) sb.append(", ");
sb.append("\"");
sb.append(atomString(textReps[arrayNodeTextRepID[id] + i]));
sb.append("\" : ");
putNode(idx, sb);
}
sb.append(" }");
break;
}
default: {
sb.append("node:UNKNOWN ");
break;
}
}
}
}