All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.tdb.setup.DatasetBuilderStd Maven / Gradle / Ivy

Go to download

TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.

There is a newer version: 4.10.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdb.setup ;

import java.io.File ;
import java.util.Collections ;
import java.util.HashMap ;
import java.util.Map ;

import org.apache.jena.query.ARQ ;
import org.apache.jena.sparql.engine.main.QC ;
import org.apache.jena.sparql.engine.optimizer.reorder.ReorderLib ;
import org.apache.jena.sparql.engine.optimizer.reorder.ReorderTransformation ;
import org.apache.jena.sparql.sse.SSEParseException ;
import org.apache.jena.tdb.TDB ;
import org.apache.jena.tdb.TDBException ;
import org.apache.jena.tdb.base.block.BlockMgr ;
import org.apache.jena.tdb.base.file.BufferChannel ;
import org.apache.jena.tdb.base.file.FileSet ;
import org.apache.jena.tdb.base.file.Location ;
import org.apache.jena.tdb.base.objectfile.ObjectFile;
import org.apache.jena.tdb.base.record.RecordFactory ;
import org.apache.jena.tdb.index.* ;
import org.apache.jena.tdb.index.bplustree.BPlusTree ;
import org.apache.jena.tdb.index.bplustree.BPlusTreeParams ;
import org.apache.jena.tdb.lib.ColumnMap ;
import org.apache.jena.tdb.solver.OpExecutorTDB1 ;
import org.apache.jena.tdb.store.* ;
import org.apache.jena.tdb.store.nodetable.NodeTable ;
import org.apache.jena.tdb.store.nodetable.NodeTableCache ;
import org.apache.jena.tdb.store.nodetable.NodeTableInline ;
import org.apache.jena.tdb.store.nodetable.NodeTableNative ;
import org.apache.jena.tdb.store.nodetupletable.NodeTupleTable ;
import org.apache.jena.tdb.store.nodetupletable.NodeTupleTableConcrete ;
import org.apache.jena.tdb.store.tupletable.TupleIndex ;
import org.apache.jena.tdb.store.tupletable.TupleIndexRecord ;
import org.apache.jena.tdb.sys.* ;
import org.slf4j.Logger ;

/**
 * This class is the process of building a dataset. Records
 * BlockMgr/BufferChannel/NodeTable for use by the transaction builder.
 */

public class DatasetBuilderStd {
    private static final Logger log = TDB.logInfo ;

    private BlockMgrBuilder blockMgrBuilder = new BuilderStdIndex.BlockMgrBuilderStd();
    private ObjectFileBuilder objectFileBuilder = new BuilderStdDB.ObjectFileBuilderStd(); 

    private void setupRecord() {
        if ( this.blockMgrBuilder instanceof BlockMgrBuilderRecorder)
            throw new TDBException("Already recording (BlockMgrBuilder)");
        if ( this.objectFileBuilder instanceof ObjectFileBuilderRecorder)
            throw new TDBException("Already recording (ObjectFileBuilder)");
        
        this.blockMgrBuilder = new BlockMgrBuilderRecorder(blockMgrBuilder, recorder);
        this.objectFileBuilder= new ObjectFileBuilderRecorder(objectFileBuilder, recorder);
    }
    
    private RangeIndex buildRangeIndex(FileSet fileSet, RecordFactory recordFactory, IndexParams indexParams) {
        int blkSize = indexParams.getBlockSize() ;
        int order = BPlusTreeParams.calcOrder(blkSize, recordFactory.recordLength()) ;
        RangeIndex rIndex = createBPTree(fileSet, order, blockMgrBuilder, blockMgrBuilder, recordFactory, indexParams) ;
        return rIndex ;
    }
    
    private Index buildIndex(FileSet fileSet, RecordFactory recordFactory, IndexParams indexParams) {
        // Cheap.
        return buildRangeIndex(fileSet, recordFactory, indexParams) ;
    }
    
    /** Knowing all the parameters, create a B+Tree */
    private RangeIndex createBPTree(FileSet fileset, int order, 
                                    BlockMgrBuilder blockMgrBuilderNodes,
                                    BlockMgrBuilder blockMgrBuilderRecords,
                                    RecordFactory factory, IndexParams indexParams)
    {
        // ---- Checking
        {
            int blockSize = indexParams.getBlockSize() ;
            if (blockSize < 0 )
                throw new IllegalArgumentException("Negative blocksize: "+blockSize) ;
            if (blockSize < 0 && order < 0) throw new IllegalArgumentException("Neither blocksize nor order specified") ;
            if (blockSize >= 0 && order < 0) order = BPlusTreeParams.calcOrder(blockSize, factory.recordLength()) ;
            if (blockSize >= 0 && order >= 0)
            {
                int order2 = BPlusTreeParams.calcOrder(blockSize, factory.recordLength()) ;
                if (order != order2) 
                    throw new IllegalArgumentException("Wrong order (" + order + "), calculated = " + order2) ;
            }
        }

        BPlusTreeParams params = new BPlusTreeParams(order, factory) ;
        
        BlockMgr blkMgrNodes = blockMgrBuilderNodes.buildBlockMgr(fileset, Names.bptExtTree, indexParams) ;
        BlockMgr blkMgrRecords = blockMgrBuilderRecords.buildBlockMgr(fileset, Names.bptExtRecords, indexParams) ;
        return BPlusTree.create(params, blkMgrNodes, blkMgrRecords) ;
    }
    
    private Recorder            recorder = new Recorder() ;  
    
    /**
     * @param location
     * @return DatasetGraphTDB
     */
    public static DatasetGraphTDB create(Location location) {
        return create(location, null) ;
    }
    
    /**
     * Create a {@link DatasetGraphTDB} with a set of {@link StoreParams}.
     * The parameters for a store have 3 inputs: the parameters provided,
     * any parameters 
     * 
     * @param location    Where to create the database.
     * @param appParams   Store parameters to use (null means use default). {See {@link StoreParams}). 
     * @return DatasetGraphTDB
     */
    public static DatasetGraphTDB create(Location location, StoreParams appParams) {
        StoreParams locParams = StoreParamsCodec.read(location) ;
        StoreParams dftParams = StoreParams.getDftStoreParams() ;
        // This can write the chosen parameters if necessary (new database, appParams != null, locParams == null)
        boolean newArea = TDBInternal.isNewDatabaseArea(location) ;
        StoreParams params = Build.decideStoreParams(location, newArea, appParams, locParams, dftParams) ;
        DatasetBuilderStd x = new DatasetBuilderStd() ;
        DatasetGraphTDB dsg = x.build(location, params) ;
        return dsg ;
    }
    
    public static DatasetGraphTDB create(StoreParams params) {
        // Memory version?
        return create(Location.mem(), params) ;
    }

    public static DatasetBuilderStd stdBuilder() {
        return new DatasetBuilderStd() ;
    }

    protected DatasetBuilderStd() { 
        this(new BuilderStdIndex.BlockMgrBuilderStd(),
             new BuilderStdDB.ObjectFileBuilderStd());
    }

    public DatasetBuilderStd(BlockMgrBuilder blockMgrBuilder, ObjectFileBuilder objectFileBuilder) {
        this.blockMgrBuilder = blockMgrBuilder;
        this.objectFileBuilder = objectFileBuilder; 
        this.recorder = new Recorder() ;
        // XXX YUK
        setupRecord();
    }
    
//    private void standardSetup() {
//        ObjectFileBuilder objectFileBuilder = new BuilderStdDB.ObjectFileBuilderStd() ;
//        BlockMgrBuilder blockMgrBuilder = new BuilderStdIndex.BlockMgrBuilderStd() ;
//        IndexBuilder indexBuilderNT = new BuilderStdIndex.IndexBuilderStd(blockMgrBuilder, blockMgrBuilder) ;
//        NodeTableBuilder nodeTableBuilder = new BuilderStdDB.NodeTableBuilderStd(indexBuilderNT, objectFileBuilder) ;
//        setupRecord(blockMgrBuilder, nodeTableBuilder) ;
//    }

    private static void checkLocation(Location location) { 
        if ( location.isMem() )
            return ;
        String dirname = location.getDirectoryPath() ;
        File dir = new File(dirname) ;
        // File location.
        if ( ! dir.exists() )
            error(log, "Does not exist: "+dirname) ;
        if ( ! dir.isDirectory() )
            error(log, "Not a directory: "+dirname) ;
        if ( ! dir.canRead() )
            error(log, "Directory not readable: "+dirname) ;
        if ( ! dir.canWrite() )
            error(log, "Directory not writeable: "+dirname) ;
    }

    public DatasetGraphTDB build(Location location, StoreParams params) {
        // Ensure that there is global synchronization
        synchronized (DatasetBuilderStd.class) {
            log.debug("Build database: "+location.getDirectoryPath()) ;
            checkLocation(location) ;
            return _build(location, params, true, null) ;
        }
    }

    private static String DB_CONFIG_FILE = "tdb.cfg" ; 
    
    // Main engine for building.
    // Called by DatasetBuilderTxn
    // XXX Rework - provide a cloning constructor (copies maps).
    // Or "reset"
    public DatasetGraphTDB _build(Location location, StoreParams params, boolean writeable, ReorderTransformation _transform) {
        return buildWorker(location, writeable, _transform, params) ;
    }
    
    private synchronized DatasetGraphTDB buildWorker(Location location, boolean writeable, ReorderTransformation _transform, StoreParams params) {
        recorder.start() ;
        DatasetControl policy = createConcurrencyPolicy() ;
        NodeTable nodeTable = makeNodeTable(location, params) ;
        TripleTable tripleTable = makeTripleTable(location, nodeTable, policy, params) ;
        QuadTable quadTable = makeQuadTable(location, nodeTable, policy, params) ;
        DatasetPrefixesTDB prefixes = makePrefixTable(location, policy, params) ;

        ReorderTransformation transform = (_transform == null) ? chooseReorderTransformation(location) : _transform ;

        StorageConfig storageConfig = new StorageConfig(location, params, writeable,
                                                        recorder.blockMgrs, recorder.objectFiles, recorder.bufferChannels) ;
        
        recorder.finish() ;
        
        DatasetGraphTDB dsg = new DatasetGraphTDB(tripleTable, quadTable, prefixes, transform, storageConfig) ;
        // TDB does filter placement on BGPs itself.
        dsg.getContext().set(ARQ.optFilterPlacementBGP, false) ;
        QC.setFactory(dsg.getContext(), OpExecutorTDB1.OpExecFactoryTDB) ;
        return dsg ;
    }
    
    private static  Map freeze(Map map) {
        return Collections.unmodifiableMap(new HashMap<>(map)) ;  
    }

    protected DatasetControl createConcurrencyPolicy() {
        return new DatasetControlMRSW() ;
    }

    protected TripleTable makeTripleTable(Location location, NodeTable nodeTable, DatasetControl policy, StoreParams params) {
        String primary = params.getPrimaryIndexTriples() ;
        String[] indexes = params.getTripleIndexes() ;

        // Allow experimentation of other index layouts.
        // if ( indexes.length != 3 )
        // error(log,
        // "Wrong number of triple table indexes: "+StrUtils.strjoin(",",
        // indexes)) ;
        log.debug("Triple table: " + primary + " :: " + String.join(",", indexes)) ;

        TupleIndex tripleIndexes[] = makeTupleIndexes(location, primary, indexes, params) ;

        if ( tripleIndexes.length != indexes.length )
            error(log, "Wrong number of triple table tuples indexes: " + tripleIndexes.length) ;
        TripleTable tripleTable = new TripleTable(tripleIndexes, nodeTable, policy) ;
        return tripleTable ;
    }

    protected QuadTable makeQuadTable(Location location, NodeTable nodeTable, DatasetControl policy, StoreParams params) {
        String primary = params.getPrimaryIndexQuads() ;
        String[] indexes = params.getQuadIndexes() ;

        // Allow experimentation of other index layouts.
        // if ( indexes.length != 6 )
        // error(log,
        // "Wrong number of quad table indexes: "+StrUtils.strjoin(",",
        // indexes)) ;

        log.debug("Quad table: " + primary + " :: " + String.join(",", indexes)) ;

        TupleIndex quadIndexes[] = makeTupleIndexes(location, primary, indexes, params) ;
        if ( quadIndexes.length != indexes.length )
            error(log, "Wrong number of quad table tuples indexes: " + quadIndexes.length) ;
        QuadTable quadTable = new QuadTable(quadIndexes, nodeTable, policy) ;
        return quadTable ;
    }

    protected DatasetPrefixesTDB makePrefixTable(Location location, DatasetControl policy, StoreParams params) {
        String primary = params.getPrimaryIndexPrefix() ;
        String[] indexes = params.getPrefixIndexes() ;

        TupleIndex prefixIndexes[] = makeTupleIndexes(location, primary, indexes, new String[]{params.getIndexPrefix()}, params) ;
        if ( prefixIndexes.length != 1 )
            error(log, "Wrong number of prefix table tuples indexes: " + prefixIndexes.length) ;

        String pnNode2Id = params.getPrefixNode2Id() ;
        String pnId2Node = params.getPrefixId2Node() ;
        
        // No cache - the prefix mapping is a cache
        NodeTable prefixNodes = makeNodeTableNoCache(location, pnNode2Id, pnId2Node, params) ;
        NodeTupleTable prefixTable = new NodeTupleTableConcrete(primary.length(),
                                                                prefixIndexes,
                                                                prefixNodes, policy) ;
        DatasetPrefixesTDB prefixes = new DatasetPrefixesTDB(prefixTable) ;

        log.debug("Prefixes: " + primary + " :: " + String.join(",", indexes)) ;

        return prefixes ;
    }

    protected ReorderTransformation chooseReorderTransformation(Location location) {
        return chooseOptimizer(location) ;
    }

    private TupleIndex[] makeTupleIndexes(Location location, String primary, String[] indexNames, StoreParams params) {
        return makeTupleIndexes(location, primary, indexNames, indexNames, params) ;
    }
    
    private TupleIndex[] makeTupleIndexes(Location location, String primary, String[] indexNames, String[] filenames, StoreParams params) {
        if ( primary.length() != 3 && primary.length() != 4 )
            error(log, "Bad primary key length: " + primary.length()) ;

        int indexRecordLen = primary.length() * NodeId.SIZE ;
        TupleIndex indexes[] = new TupleIndex[indexNames.length] ;
        for ( int i = 0 ; i < indexes.length ; i++ )
            indexes[i] = makeTupleIndex(location, filenames[i], primary, indexNames[i], params) ;
        return indexes ;
    }

    protected TupleIndex makeTupleIndex(Location location, String name, String primary, String indexOrder, StoreParams params) {
        // Commonly, name == indexOrder.
        FileSet fs = new FileSet(location, name) ;
        ColumnMap colMap = new ColumnMap(primary, indexOrder) ;
        return /*tupleIndexBuilder.*/buildTupleIndex(fs, colMap, indexOrder, params) ;
    }
    
    //-------------
    private TupleIndex buildTupleIndex(FileSet fileSet, ColumnMap colMap, String name, StoreParams params) {
        RecordFactory recordFactory = new RecordFactory(SystemTDB.SizeOfNodeId * colMap.length(), 0) ;
        RangeIndex rIdx = /*rangeIndexBuilder.*/buildRangeIndex(fileSet, recordFactory, params) ;
        TupleIndex tIdx = new TupleIndexRecord(colMap.length(), colMap, name, recordFactory, rIdx) ;
        return tIdx ;
    }

    public NodeTable makeNodeTable(Location location, StoreParams params) {
        return makeNodeTable$(location, params.getIndexNode2Id(), params.getIndexId2Node(), params);
    }
    
    /** Make a node table overriding the node->id and id->node table names */ 
    private NodeTable makeNodeTable$(Location location, String indexNode2Id, String indexId2Node, StoreParams params) {
        FileSet fsNodeToId = new FileSet(location, indexNode2Id) ;
        FileSet fsId2Node = new FileSet(location, indexId2Node) ;
        NodeTable nt = /*nodeTableBuilder.*/buildNodeTable(fsNodeToId, fsId2Node, params) ;
        return nt ;
    }
    
    //-------------
    
    private NodeTable buildNodeTable(FileSet fsIndex, FileSet fsObjectFile, StoreParams params) {
        RecordFactory recordFactory = new RecordFactory(SystemTDB.LenNodeHash, SystemTDB.SizeOfNodeId) ;
        Index idx = /*indexBuilder.*/buildIndex(fsIndex, recordFactory, params) ;
        ObjectFile objectFile = objectFileBuilder.buildObjectFile(fsObjectFile, Names.extNodeData) ;
        NodeTable nodeTable = new NodeTableNative(idx, objectFile) ;
        nodeTable = NodeTableCache.create(nodeTable, 
                                          params.getNode2NodeIdCacheSize(),
                                          params.getNodeId2NodeCacheSize(),
                                          params.getNodeMissCacheSize()) ;
        nodeTable = NodeTableInline.create(nodeTable) ;
        return nodeTable ;
    }
    
    protected NodeTable makeNodeTableNoCache(Location location, String indexNode2Id, String indexId2Node, StoreParams params) {
        StoreParamsBuilder spb = StoreParams.builder(params)
            .node2NodeIdCacheSize(-1)
            .nodeId2NodeCacheSize(-1)
            .nodeMissCacheSize(-1) ;
        return makeNodeTable$(location, indexNode2Id, indexId2Node, spb.build()) ;
    }
    
    private static void error(Logger log, String msg) {
        if ( log != null )
            log.error(msg) ;
        throw new TDBException(msg) ;
    }

    private static int parseInt(String str, String messageBase) {
        try {
            return Integer.parseInt(str) ;
        }
        catch (NumberFormatException ex) {
            error(log, messageBase + ": " + str) ;
            return -1 ;
        }
    }

    /**
     * Set the global flag that control the "No BGP optimizer" warning. Set to
     * false to silence the warning
     */
    public static void setOptimizerWarningFlag(boolean b) {
        warnAboutOptimizer = b ;
    }
    private static boolean warnAboutOptimizer = true ;

    public static ReorderTransformation chooseOptimizer(Location location) {
        if ( location == null )
            return ReorderLib.identity() ;

        ReorderTransformation reorder = null ;
        if ( location.exists(Names.optStats) ) {
            try {
                reorder = ReorderLib.weighted(location.getPath(Names.optStats)) ;
                log.debug("Statistics-based BGP optimizer") ;
            }
            catch (SSEParseException ex) {
                log.warn("Error in stats file: " + ex.getMessage()) ;
                reorder = null ;
            }
        }

        if ( reorder == null && location.exists(Names.optFixed) ) {
            // Not as good but better than nothing.
            reorder = ReorderLib.fixed() ;
            log.debug("Fixed pattern BGP optimizer") ;
        }

        if ( location.exists(Names.optNone) ) {
            reorder = ReorderLib.identity() ;
            log.debug("Optimizer explicitly turned off") ;
        }

        if ( reorder == null )
            reorder = SystemTDB.defaultReorderTransform ;

        if ( reorder == null && warnAboutOptimizer )
            ARQ.getExecLogger().warn("No BGP optimizer") ;

        return reorder ;
    }

    interface RecordBlockMgr {
        void record(FileRef fileRef, BlockMgr blockMgr) ;
    }

    interface RecordObjectFile {
        void record(FileRef fileRef, ObjectFile objFile);
    }

    interface RecordNodeTable {
        void record(FileRef fileRef, NodeTable nodeTable) ;
    }

    static class ObjectFileBuilderRecorder implements ObjectFileBuilder {
        private final ObjectFileBuilder builder ;
        private final RecordObjectFile  recorder ;
        
        ObjectFileBuilderRecorder(ObjectFileBuilder objFileBuilder, RecordObjectFile recorder) {
            this.builder = objFileBuilder ;
            this.recorder = recorder ;
        }
        
        @Override
        public ObjectFile buildObjectFile(FileSet fsObjectFile, String ext) {
            ObjectFile objectFile = builder.buildObjectFile(fsObjectFile, ext);
            FileRef ref = FileRef.create(fsObjectFile, ext) ;
            recorder.record(ref, objectFile);
            return objectFile;
        }
    }
    
    static class BlockMgrBuilderRecorder implements BlockMgrBuilder {
        private final BlockMgrBuilder builder ;
        private final RecordBlockMgr  recorder ;

        BlockMgrBuilderRecorder(BlockMgrBuilder blkMgrBuilder, RecordBlockMgr recorder) {
            this.builder = blkMgrBuilder ;
            this.recorder = recorder ;
        }

        @Override
        public BlockMgr buildBlockMgr(FileSet fileSet, String ext, IndexParams params) {
            BlockMgr blkMgr = builder.buildBlockMgr(fileSet, ext, params) ;
            FileRef ref = FileRef.create(fileSet, ext) ;
            //System.err.println("Record (BlockMgr)   = "+ref);
            recorder.record(ref, blkMgr) ;
            return blkMgr ;
        }
    }

    static class Recorder implements RecordBlockMgr, RecordObjectFile, RecordNodeTable {

        Map      blockMgrs      = null ;
        Map    objectFiles    = null ;
        // Not used currently.
        Map bufferChannels = null ;
        Map     nodeTables = null ;
        boolean recording = false ;

        Recorder() { }
        
        void start() {
            if ( recording )
                throw new TDBException("Recorder already recording") ;
            recording      = true ;
            blockMgrs      = new HashMap<>() ;
            
            objectFiles    = new HashMap<>() ;
            bufferChannels = new HashMap<>() ;
            nodeTables     = new HashMap<>() ;
        } 
        void finish() {
            if ( ! recording )
                throw new TDBException("Recorder not recording") ;
            // null out, not .clear.
            blockMgrs      = null ;
            objectFiles    = null ;
            bufferChannels = null ;
            recording      = false ;
        }
        
        @Override
        public void record(FileRef fileRef, BlockMgr blockMgr) {
            if ( recording )
                // log.info("BlockMgr: "+fileRef) ;
                blockMgrs.put(fileRef, blockMgr) ;
        }

        @Override
        public void record(FileRef fileRef, ObjectFile objFile) {
            if ( recording )
                // log.info("ObjectTable: "+fileRef) ;
                objectFiles.put(fileRef, objFile);
        }
        
        @Override
        public void record(FileRef fileRef, NodeTable nodeTable) {
            if ( recording )
                // log.info("NodeTable: "+fileRef) ;
                nodeTables.put(fileRef, nodeTable) ;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy