All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.http.BoyerMoore Maven / Gradle / Ivy

/*
 * Copyright 2003-2019 MarkLogic Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.marklogic.http;

/**
 * This class applies Boyer Moore's search algorithm to find a pattern within a
 * byte array.
 * 
 */
public class BoyerMoore {
    public static final int ALPHABET_SIZE = 256;
    
    private byte[] pattern;  
    private int[] jump;
    private int partialMatch;
    
    /**
     * Search for pattern in the byte array text.  Returns the position of the
     * first occurrence of pattern if found and -1 otherwise.
     * 
     * @param text byte array to search
     * @param start start position
     * @param end end position
     * 
     * @return starting position of the first occurrence of pattern in text or
     * -1 if not found.
     */
    public int search(byte[] text, int start, int end) {  
        partialMatch = 0;
        int i = start, j = 0;
        int m = pattern.length;
        while (i < end) {
            for (j = m - 1; j >= 0; j--) {
                if (i + j < end && text[i + j] != pattern[j]) {
                    break;
                }
            }
            if (j < 0) { // found a match or partial match
                if (i + m - 1 >= end) { // partial match
                    partialMatch = end - i;
                    return -1;
                } else { // full match
                    partialMatch = 0;
                    return i;
                }
            } else { // no match
                if (i + m - 1 < end) {
                    int jumpDistance = j - jump[text[i+j] & 0xff];
                    if (jumpDistance <= 0) { // avoid jumping backward
                        i++;
                    } else {
                        i += jumpDistance;
                    }
                } else {
                    i++;
                }
            }
        }
        return -1;
    }  
    
    /**
     * Return the number of matching characters before the end of a chunk.
     * @return the number of matching characters.
     */
    public int partialMatch() {
        return partialMatch;
    }
    
    /**
     * Compute the jump table based on the pattern.
     */
    private void initialize() {
        jump = new int[ALPHABET_SIZE];
        
        for (int k = 0; k < jump.length; k++) { 
            jump[k] = -1;
        }
        for (int j = pattern.length-1; j >= 0; j--) {
            if (jump[pattern[j] & 0xff] < 0) {
                jump[pattern[j] & 0xff] = j;
            }
        }
    }
    
    public BoyerMoore(byte[] pattern) {
        this.pattern = pattern;     
        initialize();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy