All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.byteseek.searcher.multisequence.wu_manber.WuManberMultiByteSearcher Maven / Gradle / Ivy

There is a newer version: 2.0.3
Show newest version
/*
 * Copyright Matt Palmer 2012, All rights reserved.
 * 
 * This code is licensed under a standard 3-clause BSD license:
 * 
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 * 
 *  * Redistributions of source code must retain the above copyright notice, 
 *    this list of conditions and the following disclaimer.
 * 
 *  * Redistributions in binary form must reproduce the above copyright notice, 
 *    this list of conditions and the following disclaimer in the documentation 
 *    and/or other materials provided with the distribution.
 * 
 *  * The names of its contributors may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 * POSSIBILITY OF SUCH DAMAGE.
 */
package net.byteseek.searcher.multisequence.wu_manber;

import java.io.IOException;
import java.util.Collection;
import java.util.List;

import net.byteseek.io.reader.WindowReader;
import net.byteseek.matcher.multisequence.MultiSequenceMatcher;
import net.byteseek.matcher.sequence.SequenceMatcher;
import net.byteseek.searcher.SearchResult;
import net.byteseek.searcher.SearchUtils;

/**
 * THIS CLASS IS NOT YET FULLY IMPLEMENTED.
 * 

* An implementation of the Wu-Manber search algorithm that works with any block * size. It extends the {@link AbstractWuManberSearcher} * - see that class for more details of the Wu-Manber algorithm itself. *

* Note that there are optimised implementations of this algorithm for a block size of * one (@link WuManberOneByteSearcher} and with a block size of two {@link WuManberTwoByteSearcher}. * * @author Matt Palmer */ public class WuManberMultiByteSearcher extends AbstractWuManberSearcher { /** * Constructs a WuManberMultiByteSearcher. * * @param matcher The MultiSequenceMatcher containing the sequences to search for. * @param blockSize The block size to use when searching. */ public WuManberMultiByteSearcher(final MultiSequenceMatcher matcher, final int blockSize) { super(matcher, blockSize); if (matcher.getMinimumLength() < blockSize) { final String message = String.format( "Minimum sequence length (%d) cannot be smaller than the block size: %d", matcher.getMinimumLength(), blockSize); throw new IllegalArgumentException(message); } } @Override protected List> doSearchForwards(WindowReader reader, long searchPosition, long lastSearchPosition) throws IOException { throw new UnsupportedOperationException("Not supported yet."); } @Override protected List> doSearchBackwards(WindowReader reader, long searchPosition, long lastSearchPosition) throws IOException { throw new UnsupportedOperationException("Not supported yet."); } @Override public List> searchForwards(byte[] bytes, int fromPosition, int toPosition) { // Get info needed to search with: final SearchInfo info = forwardInfo.get(); final int[] safeShifts = info.shifts; final int hashBitMask = safeShifts.length - 1; // safe shifts is a power of two size. final MultiSequenceMatcher backMatcher = info.matcher; // Calculate safe bounds for the search: final int lastPossiblePosition = bytes.length - 1; final int lastPosition = toPosition < lastPossiblePosition ? toPosition : lastPossiblePosition; final int lastMinimumPosition = sequences.getMinimumLength() - 1; int searchPosition = fromPosition > 0 ? fromPosition + lastMinimumPosition : lastMinimumPosition; // Search forwards: while (searchPosition <= lastPosition) { // Calculate the hash of the current block: int blockHash = 0; for (int blockPosition = searchPosition - blockSize + 1; blockPosition <= searchPosition; blockPosition++) { final int value = bytes[blockPosition] & 0xFF; blockHash = ((blockHash << 5) - blockHash) * value; } // Get the safe shift for this block: final int safeShift = safeShifts[blockHash & hashBitMask]; // Can we shift safely? if (safeShift == 0) { // No safe shift - see if we have any matches: final Collection matches = backMatcher.allMatchesBackwards(bytes, searchPosition); if (!matches.isEmpty()) { // See if any of the matches are within the bounds of the search: final List> results = SearchUtils.resultsBackFromPosition(searchPosition, matches, fromPosition, toPosition); if (!results.isEmpty()) { return results; } } searchPosition++; // no safe shift other than to advance one on. } else { // we have a safe shift, move on: searchPosition += safeShift; } } return SearchUtils.noResults(); } @Override public List> searchBackwards(byte[] bytes, int fromPosition, int toPosition) { throw new UnsupportedOperationException("Not supported yet."); } @Override public String toString() { return getClass().getSimpleName() + "[block size: " + blockSize + " sequences:" + sequences + ']'; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy