All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.lexicon.BatchResolveBlobIVsTask Maven / Gradle / Ivy

package com.bigdata.rdf.lexicon;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import org.openrdf.model.Value;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.impl.BlobIV;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;

/**
 * Batch resolve {@link BlobIV}s to RDF {@link Value}s.
 * 
 * @author Bryan
 *         Thompson
 */
class BatchResolveBlobIVsTask implements Callable {

//    static private final transient Logger log = Logger
//            .getLogger(BatchResolveBlobIVsTask.class);

    private final ExecutorService service;
    private final IIndex ndx;
    private final Collection> ivs;
    private final ConcurrentHashMap/* iv */, BigdataValue/* term */> ret;
    private final ITermCache, BigdataValue> termCache;
    private final BigdataValueFactory valueFactory;
    private final int MAX_CHUNK;

    public BatchResolveBlobIVsTask(
            final ExecutorService service,
            final IIndex ndx,
            final Collection> ivs,
            final ConcurrentHashMap/* iv */, BigdataValue/* term */> ret,
            final ITermCache, BigdataValue> termCache,
            final BigdataValueFactory valueFactory,
            final int chunkSize) {

        this.service = service;
        
        this.ndx = ndx;
        
        this.ivs = ivs;
        
        this.ret = ret;
        
        this.termCache = termCache;
        
        this.valueFactory = valueFactory;
        
        this.MAX_CHUNK = chunkSize;
        
    }

    public Void call() throws Exception {

        final int numNotFound = ivs.size();
        
        // An array of IVs that to be resolved against the index.
        final BlobIV[] notFound = ivs.toArray(new BlobIV[numNotFound]);
        
        // Sort IVs into index order.
        Arrays.sort(notFound, 0, numNotFound);

        // Encode IVs as keys for the index.
        final byte[][] keys = new byte[numNotFound][];
        {

            final IKeyBuilder keyBuilder = KeyBuilder.newInstance();

            for (int i = 0; i < numNotFound; i++) {

                keys[i] = notFound[i].encode(keyBuilder.reset()).getKey();

            }
            
        }

        if (numNotFound < MAX_CHUNK) {

            /*
             * Resolve everything in one go.
             */

            new ResolveBlobsTask(ndx, 0/* fromIndex */,
                    numNotFound/* toIndex */, keys, notFound, ret,
                    termCache, valueFactory).call();

        } else {

            /*
             * Break it down into multiple chunks and resolve those chunks
             * in parallel.
             */

            // #of elements.
            final int N = numNotFound;
            // target maximum #of elements per chunk.
            final int M = MAX_CHUNK;
            // #of chunks
            final int nchunks = (int) Math.ceil((double) N / M);
            // #of elements per chunk, with any remainder in the last chunk.
            final int perChunk = N / nchunks;

            // System.err.println("N="+N+", M="+M+", nchunks="+nchunks+", perChunk="+perChunk);

            final List> tasks = new ArrayList>(
                    nchunks);

            int fromIndex = 0;
            int remaining = numNotFound;

            for (int i = 0; i < nchunks; i++) {

                final boolean lastChunk = i + 1 == nchunks;

                final int chunkSize = lastChunk ? remaining : perChunk;

                final int toIndex = fromIndex + chunkSize;

                remaining -= chunkSize;

                // System.err.println("chunkSize=" + chunkSize
                // + ", fromIndex=" + fromIndex + ", toIndex="
                // + toIndex + ", remaining=" + remaining);

                tasks.add(new ResolveBlobsTask(ndx, fromIndex, toIndex,
                        keys, notFound, ret, termCache, valueFactory));

                fromIndex = toIndex;

            }

            try {

                // Run tasks.
                final List> futures = service.invokeAll(tasks);

                // Check futures.
                for (Future f : futures)
                    f.get();

            } catch (Exception e) {

                throw new RuntimeException(e);
                
            }

        }

//          final long elapsed = System.currentTimeMillis() - begin;
//          
//          if (log.isInfoEnabled())
//              log.info("resolved " + numNotFound + " terms in "
//                      + tasks.size() + " chunks and " + elapsed + "ms");
        
        // Done.
        return null;
        
    }
    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy