
edu.princeton.cs.algs4.BoyerMoore Maven / Gradle / Ivy
/******************************************************************************
* Compilation: javac BoyerMoore.java
* Execution: java BoyerMoore pattern text
* Dependencies: StdOut.java
*
* Reads in two strings, the pattern and the input text, and
* searches for the pattern in the input text using the
* bad-character rule part of the Boyer-Moore algorithm.
* (does not implement the strong good suffix rule)
*
* % java BoyerMoore abracadabra abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: abracadabra
*
* % java BoyerMoore rab abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: rab
*
* % java BoyerMoore bcara abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: bcara
*
* % java BoyerMoore rabrabracad abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: rabrabracad
*
* % java BoyerMoore abacad abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: abacad
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code BoyerMoore} class finds the first occurrence of a pattern string
* in a text string.
*
* This implementation uses the Boyer-Moore algorithm (with the bad-character
* rule, but not the strong good suffix rule).
*
* For additional documentation,
* see Section 5.3 of
* Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne.
*/
public class BoyerMoore {
private final int R; // the radix
private int[] right; // the bad-character skip array
private char[] pattern; // store the pattern as a character array
private String pat; // or as a string
/**
* Preprocesses the pattern string.
*
* @param pat the pattern string
*/
public BoyerMoore(String pat) {
this.R = 256;
this.pat = pat;
// position of rightmost occurrence of c in the pattern
right = new int[R];
for (int c = 0; c < R; c++)
right[c] = -1;
for (int j = 0; j < pat.length(); j++)
right[pat.charAt(j)] = j;
}
/**
* Preprocesses the pattern string.
*
* @param pattern the pattern string
* @param R the alphabet size
*/
public BoyerMoore(char[] pattern, int R) {
this.R = R;
this.pattern = new char[pattern.length];
for (int j = 0; j < pattern.length; j++)
this.pattern[j] = pattern[j];
// position of rightmost occurrence of c in the pattern
right = new int[R];
for (int c = 0; c < R; c++)
right[c] = -1;
for (int j = 0; j < pattern.length; j++)
right[pattern[j]] = j;
}
/**
* Returns the index of the first occurrrence of the pattern string
* in the text string.
*
* @param txt the text string
* @return the index of the first occurrence of the pattern string
* in the text string; n if no such match
*/
public int search(String txt) {
int m = pat.length();
int n = txt.length();
int skip;
for (int i = 0; i <= n - m; i += skip) {
skip = 0;
for (int j = m-1; j >= 0; j--) {
if (pat.charAt(j) != txt.charAt(i+j)) {
skip = Math.max(1, j - right[txt.charAt(i+j)]);
break;
}
}
if (skip == 0) return i; // found
}
return n; // not found
}
/**
* Returns the index of the first occurrrence of the pattern string
* in the text string.
*
* @param text the text string
* @return the index of the first occurrence of the pattern string
* in the text string; n if no such match
*/
public int search(char[] text) {
int m = pattern.length;
int n = text.length;
int skip;
for (int i = 0; i <= n - m; i += skip) {
skip = 0;
for (int j = m-1; j >= 0; j--) {
if (pattern[j] != text[i+j]) {
skip = Math.max(1, j - right[text[i+j]]);
break;
}
}
if (skip == 0) return i; // found
}
return n; // not found
}
/**
* Takes a pattern string and an input string as command-line arguments;
* searches for the pattern string in the text string; and prints
* the first occurrence of the pattern string in the text string.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String pat = args[0];
String txt = args[1];
char[] pattern = pat.toCharArray();
char[] text = txt.toCharArray();
BoyerMoore boyermoore1 = new BoyerMoore(pat);
BoyerMoore boyermoore2 = new BoyerMoore(pattern, 256);
int offset1 = boyermoore1.search(txt);
int offset2 = boyermoore2.search(text);
// print results
StdOut.println("text: " + txt);
StdOut.print("pattern: ");
for (int i = 0; i < offset1; i++)
StdOut.print(" ");
StdOut.println(pat);
StdOut.print("pattern: ");
for (int i = 0; i < offset2; i++)
StdOut.print(" ");
StdOut.println(pat);
}
}
/******************************************************************************
* Copyright 2002-2018, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/