All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.tdb.lib.NodeLib Maven / Gradle / Ivy

Go to download

TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.

There is a newer version: 4.10.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdb.lib;

import static org.apache.jena.tdb.sys.SystemTDB.LenNodeHash ;

import java.nio.ByteBuffer ;
import java.security.DigestException ;
import java.security.MessageDigest ;
import java.security.NoSuchAlgorithmException ;
import java.util.Iterator ;

import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.atlas.lib.Bytes ;
import org.apache.jena.atlas.lib.Pool ;
import org.apache.jena.atlas.lib.PoolBase ;
import org.apache.jena.atlas.lib.PoolSync ;
import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.graph.Node ;
import org.apache.jena.riot.out.NodeFmtLib ;
import org.apache.jena.sparql.util.NodeUtils ;
import org.apache.jena.tdb.TDBException ;
import org.apache.jena.tdb.base.block.Block ;
import org.apache.jena.tdb.base.objectfile.ObjectFile ;
import org.apache.jena.tdb.base.record.Record ;
import org.apache.jena.tdb.store.Hash ;
import org.apache.jena.tdb.store.NodeId ;
import org.apache.jena.tdb.store.NodeType ;
import org.apache.jena.tdb.store.nodetable.NodeTable ;
import org.apache.jena.tdb.store.nodetable.Nodec ;
import org.apache.jena.tdb.store.nodetable.NodecSSE ;

public class NodeLib
{
    private static Nodec nodec = new NodecSSE() ;
    
    // Characters in IRIs that are illegal and cause SSE problems, but we wish to keep.
    final private static char MarkerChar = '_' ;
    final private static char[] invalidIRIChars = { MarkerChar , ' ' } ; 
    
    public static long encodeStore(Node node, ObjectFile file)
    {
        // Buffer pool?
        
        // Nodes can be writtern during reads.
        // Make sure this operation is sync'ed. 
        int maxSize = nodec.maxSize(node) ;
        Block block = file.allocWrite(maxSize) ;
        try {
            int len = nodec.encode(node, block.getByteBuffer(), null) ;
            file.completeWrite(block) ;
            return block.getId() ;
        } catch (TDBException ex)
        {
            file.abortWrite(block) ;
            throw ex ;
        }
    }
    
    public static Node fetchDecode(long id, ObjectFile file)
    {
        ByteBuffer bb = file.read(id) ;
        if ( bb == null )
            return null ;
        return decode(bb) ;
    }
    
    /**
     * Encode a node - it is better to use encodeStore which may avoid
     * anadditional copy in getting the node into the ObjectFile
     */
    public static ByteBuffer encode(Node node)
    {
        int maxSize = nodec.maxSize(node) ;
        ByteBuffer bb = ByteBuffer.allocate(maxSize) ;
        int len = nodec.encode(node, bb, null) ;
        bb.limit(len) ;
        bb.position(0) ;
        return bb ;
    }
    
    /**
     * Decode a node - it is better to use fetchDecode which may avoid an
     * additional copy in getting the node from the ObjectFile.
     */
    public static Node decode(ByteBuffer bb)
    {
        bb.position(0) ;
        Node n = nodec.decode(bb, null) ;
        return n ;
    }

    public static Hash hash(Node n)
    { 
        Hash h = new Hash(LenNodeHash) ;
        setHash(h, n) ;
        return h ;
    }
    
    public static void setHash(Hash h, Node n) 
    {
        NodeType nt = NodeType.lookup(n) ;
        switch(nt) 
        {
            case URI:
                hash(h, n.getURI(), null, null, nt) ;
                return ;
            case BNODE:
                hash(h, n.getBlankNodeLabel(), null, null, nt) ;
                return ;
            case LITERAL:
                String dt = n.getLiteralDatatypeURI() ;
                if ( NodeUtils.isSimpleString(n) || NodeUtils.isLangString(n) ) {
                    // RDF 1.1 : No datatype for:
                    //   xsd:String as simple literals
                    //   rdf:langString and @ 
                    dt = null ;
                }
                hash(h, n.getLiteralLexicalForm(), n.getLiteralLanguage(), dt, nt) ;
                return  ;
            case OTHER:
                throw new TDBException("Attempt to hash something strange: "+n) ; 
        }
        throw new TDBException("NodeType broken: "+n) ; 
    }
    
    /** This pattern is common - abstract */ 
    private static int InitialPoolSize = 5 ;
    private static Pool digesters = PoolSync.create(new PoolBase()) ;
    static {
        try {
            for ( int i = 0 ; i < InitialPoolSize ; i++ )
                digesters.put(MessageDigest.getInstance("MD5"));
        }
        catch (NoSuchAlgorithmException e)
        { e.printStackTrace(); }
    }
    
    private static MessageDigest allocDigest()
    {
        try {
            MessageDigest disgest = digesters.get() ;
            if ( disgest == null ) 
                disgest = MessageDigest.getInstance("MD5");
            return disgest ;
        }
        catch (NoSuchAlgorithmException e)
        { e.printStackTrace(); return null ; }
    }

    private static void deallocDigest(MessageDigest digest) { digest.reset() ; digesters.put(digest) ; }
    
    
    private static void hash(Hash h, String lex, String lang, String datatype, NodeType nodeType)
    {
        if ( datatype == null )
            datatype = "" ;
        if ( lang == null )
            lang = "" ;
        String toHash = lex + "|" + lang + "|" + datatype+"|"+nodeType.getName() ;
        MessageDigest digest;
        try
        {
            digest = allocDigest() ; //MessageDigest.getInstance("MD5");
            digest.update(Bytes.string2bytes(toHash)); //digest.update(toHash.getBytes("UTF8"));
            if ( h.getLen() == 16 )
                // MD5 is 16 bytes.
                digest.digest(h.getBytes(), 0, 16) ;
            else
            {
                byte b[] = digest.digest(); // 16 bytes.
                // Avoid the copy? If length is 16.  digest.digest(bytes, 0, length) needs 16 bytes
                System.arraycopy(b, 0, h.getBytes(), 0, h.getLen()) ;
            }
            deallocDigest(digest) ;
            return ;
        }
        catch (DigestException ex) { Log.error(NodeLib.class, "DigestException", ex); } 
    }
    
    public static NodeId getNodeId(Record r, int idx)
    {
        return NodeId.create(Bytes.getLong(r.getKey(), idx)) ;
    }
    
    public static Node termOrAny(Node node)
    {
        if ( node == null || node.isVariable() )
            return Node.ANY ;
        return node ;
    }
    
    public static String format(String sep, Node[] nodes)
    {
        // Sigh ...
        StringBuilder b = new StringBuilder() ;
        for ( int i = 0 ; i < nodes.length ; i++ )
        {
            if ( i != 0 ) 
                b.append(sep) ;
            b.append(NodeFmtLib.str(nodes[i])) ;
        }
        return b.toString() ;
    }
    
    public static Iterator nodes(final NodeTable nodeTable, Iterator iter)
    {
        return Iter.map(iter, nodeTable::getNodeForNodeId) ;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy