com.hfg.util.BYPStringMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.util;
import java.util.ArrayList;
import java.util.List;
//------------------------------------------------------------------------------
/**
Baeza-Yates, Perleberg string matcher.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class BYPStringMatcher
{
public BYPStringPattern mPattern;
public String mTargetString;
private List mMatches;
private int[] mMismatchCount = new int[ALPHABET_SIZE];
private List mOffsetList = new ArrayList<>(ALPHABET_SIZE);
private static final int ALPHABET_SIZE = 256;
private static final int MOD256 = 0xff;
private static final boolean STOP_AFTER_FIRST_MATCH = true;
private static final boolean FIND_ALL_MATCHES = false;
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
private BYPStringMatcher()
{
}
//---------------------------------------------------------------------------
BYPStringMatcher(BYPStringPattern inPattern, String inTargetString)
{
mPattern = inPattern;
mTargetString = inTargetString;
if (null == mTargetString
|| 0 == mTargetString.length())
{
throw new RuntimeException("No string was specified for searching!");
}
else if (mTargetString.length() < mPattern.length())
{
throw new RuntimeException("No target string cannot be shorter than the pattern!");
}
setup();
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public boolean matches()
{
innerFind(0, STOP_AFTER_FIRST_MATCH, mPattern.length());
return mMatches != null && start() == 0 && end() == mTargetString.length();
}
//---------------------------------------------------------------------------
public boolean find()
{
return find(0);
}
//---------------------------------------------------------------------------
public boolean find(int inStartIndex)
{
innerFind(inStartIndex, STOP_AFTER_FIRST_MATCH);
return mMatches != null;
}
//---------------------------------------------------------------------------
public List findAll()
{
innerFind(0, FIND_ALL_MATCHES);
return mMatches;
}
//---------------------------------------------------------------------------
public String group()
{
return mMatches.get(0).getString();
}
//---------------------------------------------------------------------------
public int start()
{
return mMatches.get(0).start();
}
//---------------------------------------------------------------------------
public int end()
{
return mMatches.get(0).end();
}
//---------------------------------------------------------------------------
public int mismatches()
{
return mMatches.get(0).getNumMismatches();
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
private void setup()
{
if (mPattern.isCaseInsensitive())
{
mTargetString = mTargetString.toUpperCase();
}
int patternLength = mPattern.length();
String patternString = mPattern.getPatternString();
if (mPattern.isCaseInsensitive())
{
patternString = patternString.toUpperCase();
}
for (int i = 0; i < ALPHABET_SIZE; i++)
{
mOffsetList.add(new BYPStringMatcher(). new IndexNode());
mMismatchCount[i] = patternLength;
}
for (int i = 0, j = 128; i < patternLength; i++)
{
mMismatchCount[i] = ALPHABET_SIZE;
char patternChar = patternString.charAt(i);
IndexNode indexNode = mOffsetList.get((int)patternChar);
if (indexNode.getOffset() == -1)
{
indexNode.setOffset(patternLength - i - 1);
}
else
{
short nextIndex = indexNode.nextIndex();
indexNode.setNextIndex((short)j++);
indexNode = mOffsetList.get(indexNode.nextIndex());
indexNode.setOffset(patternLength - i - 1);
indexNode.setNextIndex(nextIndex);
}
}
mMismatchCount[patternLength - 1] = patternLength;
}
//---------------------------------------------------------------------------
private void innerFind(int inStartIndex, boolean inStopAfterFirstMatch)
{
innerFind(inStartIndex, inStopAfterFirstMatch, mTargetString.length());
}
//---------------------------------------------------------------------------
private void innerFind(int inStartIndex, boolean inStopAfterFirstMatch, int inSearchLength)
{
mMatches = null;
int patternLength = mPattern.length();
int maxMismatches = mPattern.getMaxMismatches();
for (int i = inStartIndex; i < inSearchLength; i++)
{
IndexNode indexNode = mOffsetList.get((int)mTargetString.charAt(i));
int offset;
if ((offset = indexNode.getOffset()) >= 0)
{
mMismatchCount[(i + offset)&MOD256]--;
if (indexNode.nextIndex() >= 0)
{
for (indexNode = mOffsetList.get(indexNode.nextIndex()); indexNode != null; indexNode = mOffsetList.get(indexNode.nextIndex()))
{
mMismatchCount[(i + indexNode.getOffset()) & MOD256]--;
if (indexNode.nextIndex() < 0)
{
break;
}
}
}
}
if (mMismatchCount[i&MOD256] <= maxMismatches)
{
int start = i - patternLength + 1;
if (start >= inStartIndex)
{
// System.out.println(String.format("Match in position %d with %d mismatches", i-patternLength+1, mMismatchCount[i&MOD256]));
if (null == mMatches)
{
mMatches = new ArrayList<>(1);
}
mMatches.add(new BYPStringMatch(mTargetString.substring(start, i + 1), start, i + 1).setNumMismatches(mMismatchCount[i & MOD256]));
if (inStopAfterFirstMatch)
{
break;
}
}
}
mMismatchCount[i&MOD256] = patternLength;
}
}
//###########################################################################
// INNER CLASS
//###########################################################################
private class IndexNode
{
int mOffset = -1;
short mNext = -1;
public int getOffset()
{
return mOffset;
}
public void setOffset(int inValue)
{
mOffset = inValue;
}
public short nextIndex()
{
return mNext;
}
public void setNextIndex(short inValue)
{
mNext = inValue;
}
public String toString()
{
return "Offset: " + mOffset + "; Next: " + mNext;
}
}
}