All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.search.TextIndexWriteProc Maven / Gradle / Ivy

Go to download

Blazegraph(TM) DB Core Platform. It contains all Blazegraph DB dependencies other than Blueprints.

There is a newer version: 2.1.4
Show newest version
/*

 Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

 Contact:
 SYSTAP, LLC DBA Blazegraph
 2501 Calvert ST NW #106
 Washington, DC 20008
 [email protected]

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; version 2 of the License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */
package com.bigdata.search;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;

import org.apache.log4j.Logger;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedure;
import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedureConstructor;
import com.bigdata.btree.proc.IParallelizableIndexProcedure;
import com.bigdata.btree.proc.IResultHandler;
import com.bigdata.btree.proc.LongAggregator;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.btree.raba.codec.IRabaCoder;
import com.bigdata.relation.IMutableRelationIndexWriteProcedure;

/**
 * Writes on the text index.
 * 
 * @author Bryan Thompson
 */
public class TextIndexWriteProc extends AbstractKeyArrayIndexProcedure
        implements IParallelizableIndexProcedure,
        IMutableRelationIndexWriteProcedure {

    /**
     * 
     */
    private static final long serialVersionUID = 9013449121306914750L;

    private static transient final Logger log = Logger.getLogger(TextIndexWriteProc.class);
    
    public static class IndexWriteProcConstructor extends
            AbstractKeyArrayIndexProcedureConstructor {

        /**
         * Variant which always overwrites any existing entry. Note that you
         * must still delete all entries for a document before re-indexing that
         * document.
         */
        public static TextIndexWriteProc.IndexWriteProcConstructor OVERWRITE = new IndexWriteProcConstructor(
                true);

        /**
         * Variant that will not overwrite an existing entry for a
         * {term,doc,field}. This is useful when you have a corpus which (a)
         * only grows in size; and (b) the content of each document is
         * unchanging.
         */
        public static TextIndexWriteProc.IndexWriteProcConstructor NO_OVERWRITE = new IndexWriteProcConstructor(
                false);
        
        private final boolean overwrite;

        /**
         * Values are required.
         */
        @Override
        public final boolean sendValues() {
        
            return true;
            
        }
        
        /**
         * 
         * @param overwrite
         */
        private IndexWriteProcConstructor(final boolean overwrite) {
            
            this.overwrite = overwrite;
            
        }
        
        @Override
        public TextIndexWriteProc newInstance(final IRabaCoder keySer,
                final IRabaCoder valSer, final int fromIndex,
                final int toIndex, final byte[][] keys, final byte[][] vals) {

            return new TextIndexWriteProc(keySer, valSer, fromIndex, toIndex,
                    keys, vals, overwrite);

        }

    }
    
    /**
     * De-serialization constructor.
     */
    public TextIndexWriteProc() {
        
    }
    
    private boolean overwrite;
    
    protected TextIndexWriteProc(final IRabaCoder keySer,
            final IRabaCoder valSer, final int fromIndex, final int toIndex,
            final byte[][] keys, final byte[][] vals, final boolean overwrite) {

        super(keySer, valSer, fromIndex, toIndex, keys, vals);
        
        assert vals != null;
        
    }

    @Override
    public final boolean isReadOnly() {
        
        return false;
        
    }
    
    /**
     * @return The #of pre-existing tuples that were updated as an
     *         {@link Integer}.
     */
    @Override
    public Long applyOnce(final IIndex ndx, final IRaba keys, final IRaba vals) {

        long updateCount = 0;

        final int n = keys.size();

        for (int i = 0; i < n; i++) {

            final byte[] key = keys.get(i);
            assert key != null;
            assert key.length > 0;

            /*
             * Note: The value MAY be used to encoded information. While it is
             * no longer used to encode information in the com.bigdata.search
             * package, the RDF specific full text indices still use the value.
             * Therefore it now MAY be null and these asserts have been removed.
             */
            final byte[] val = vals.get(i);
//            assert val != null;
//            assert val.length > 0;

            /*
             * Write on the index if (a) overwrite was specified; or (b) the
             * index does not contain an entry for the key.
             * 
             * Note: This is an optimization which avoids mutation of the btree
             * when there would be no change in the data.
             */
            if(overwrite) {

            	// overwrite.
            	if (ndx.insert(key, val) != null) {

					updateCount++;

				}
            	
            } else {
            	
            	// conditional mutation.
				if (ndx.putIfAbsent(key, val) != null) {

                    updateCount++;

            	}
            	
            }

//            final boolean write = overwrite || !ndx.contains(key);
//            
//            if (write && ndx.insert(key, val) != null) {
//                
//                updateCount++;
//                
//            }

        }

        if (log.isInfoEnabled())
            log.info("wrote " + n + " tuples of which " + updateCount
                    + " were updated rows");
        
        return updateCount;
        
    }
    
    @Override
    protected void readMetadata(final ObjectInput in) throws IOException, ClassNotFoundException {
        
        super.readMetadata(in);
        
        overwrite = in.readBoolean();
        
    }

    /**
     * Extended to write the {@link #overwrite} flag.
     */
    @Override
    protected void writeMetadata(final ObjectOutput out) throws IOException {

        super.writeMetadata(out);
        
        out.writeBoolean(overwrite);
        
    }

	/**
	 * Uses {@link LongAggregator} to combine the mutation counts.
	 */
	@Override
	protected IResultHandler newAggregator() {

		return new LongAggregator();
		
	}
    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy