org.apache.xmlbeans.impl.regex.BMPattern Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-xmlbeans Show documentation
Show all versions of commons-xmlbeans Show documentation
The Apache Commons Codec package contains simple encoder and decoders for
various formats such as Base64 and Hexadecimal. In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.
The newest version!
/* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xmlbeans.impl.regex;
import java.text.CharacterIterator;
/**
* Boyer-Moore searcher.
*/
public class BMPattern {
char[] pattern;
int[] shiftTable;
boolean ignoreCase;
public BMPattern(String pat, boolean ignoreCase) {
this(pat, 256, ignoreCase);
}
public BMPattern(String pat, int tableSize, boolean ignoreCase) {
this.pattern = pat.toCharArray();
this.shiftTable = new int[tableSize];
this.ignoreCase = ignoreCase;
int length = pattern.length;
for (int i = 0; i < this.shiftTable.length; i ++)
this.shiftTable[i] = length;
for (int i = 0; i < length; i ++) {
char ch = this.pattern[i];
int diff = length-i-1;
int index = ch % this.shiftTable.length;
if (diff < this.shiftTable[index])
this.shiftTable[index] = diff;
if (this.ignoreCase) {
ch = Character.toUpperCase(ch);
index = ch % this.shiftTable.length;
if (diff < this.shiftTable[index])
this.shiftTable[index] = diff;
ch = Character.toLowerCase(ch);
index = ch % this.shiftTable.length;
if (diff < this.shiftTable[index])
this.shiftTable[index] = diff;
}
}
}
/**
*
* @return -1 if iterator does not contain this pattern.
*/
public int matches(CharacterIterator iterator, int start, int limit) {
if (this.ignoreCase) return this.matchesIgnoreCase(iterator, start, limit);
int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
int pindex = plength;
int nindex = index+1;
char ch;
do {
if ((ch = iterator.setIndex(--index)) != this.pattern[--pindex])
break;
if (pindex == 0)
return index;
} while (pindex > 0);
index += this.shiftTable[ch % this.shiftTable.length]+1;
if (index < nindex) index = nindex;
}
return -1;
}
/**
*
* @return -1 if str does not contain this pattern.
*/
public int matches(String str, int start, int limit) {
if (this.ignoreCase) return this.matchesIgnoreCase(str, start, limit);
int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
//System.err.println("Starts at "+index);
int pindex = plength;
int nindex = index+1;
char ch;
do {
if ((ch = str.charAt(--index)) != this.pattern[--pindex])
break;
if (pindex == 0)
return index;
} while (pindex > 0);
index += this.shiftTable[ch % this.shiftTable.length]+1;
if (index < nindex) index = nindex;
}
return -1;
}
/**
*
* @return -1 if chars does not contain this pattern.
*/
public int matches(char[] chars, int start, int limit) {
if (this.ignoreCase) return this.matchesIgnoreCase(chars, start, limit);
int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
//System.err.println("Starts at "+index);
int pindex = plength;
int nindex = index+1;
char ch;
do {
if ((ch = chars[--index]) != this.pattern[--pindex])
break;
if (pindex == 0)
return index;
} while (pindex > 0);
index += this.shiftTable[ch % this.shiftTable.length]+1;
if (index < nindex) index = nindex;
}
return -1;
}
int matchesIgnoreCase(CharacterIterator iterator, int start, int limit) {
int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
int pindex = plength;
int nindex = index+1;
char ch;
do {
char ch1 = ch = iterator.setIndex(--index);
char ch2 = this.pattern[--pindex];
if (ch1 != ch2) {
ch1 = Character.toUpperCase(ch1);
ch2 = Character.toUpperCase(ch2);
if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
break;
}
if (pindex == 0)
return index;
} while (pindex > 0);
index += this.shiftTable[ch % this.shiftTable.length]+1;
if (index < nindex) index = nindex;
}
return -1;
}
int matchesIgnoreCase(String text, int start, int limit) {
int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
int pindex = plength;
int nindex = index+1;
char ch;
do {
char ch1 = ch = text.charAt(--index);
char ch2 = this.pattern[--pindex];
if (ch1 != ch2) {
ch1 = Character.toUpperCase(ch1);
ch2 = Character.toUpperCase(ch2);
if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
break;
}
if (pindex == 0)
return index;
} while (pindex > 0);
index += this.shiftTable[ch % this.shiftTable.length]+1;
if (index < nindex) index = nindex;
}
return -1;
}
int matchesIgnoreCase(char[] chars, int start, int limit) {
int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
int pindex = plength;
int nindex = index+1;
char ch;
do {
char ch1 = ch = chars[--index];
char ch2 = this.pattern[--pindex];
if (ch1 != ch2) {
ch1 = Character.toUpperCase(ch1);
ch2 = Character.toUpperCase(ch2);
if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
break;
}
if (pindex == 0)
return index;
} while (pindex > 0);
index += this.shiftTable[ch % this.shiftTable.length]+1;
if (index < nindex) index = nindex;
}
return -1;
}
/*
public static void main(String[] argv) {
try {
int[] shiftTable = new int[256];
initializeBoyerMoore(argv[0], shiftTable, true);
int o = -1;
CharacterIterator ite = new java.text.StringCharacterIterator(argv[1]);
long start = System.currentTimeMillis();
//for (int i = 0; i < 10000; i ++)
o = searchIgnoreCasesWithBoyerMoore(ite, 0, argv[0], shiftTable);
start = System.currentTimeMillis()-start;
System.out.println("Result: "+o+", Elapsed: "+start);
} catch (Exception ex) {
ex.printStackTrace();
}
}*/
}