All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.util.BYPStringMatcher Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.util;

import java.util.ArrayList;
import java.util.List;

//------------------------------------------------------------------------------
/**
 Baeza-Yates, Perleberg string matcher.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class BYPStringMatcher { public BYPStringPattern mPattern; public String mTargetString; private List mMatches; private int[] mMismatchCount = new int[ALPHABET_SIZE]; private List mOffsetList = new ArrayList<>(ALPHABET_SIZE); private static final int ALPHABET_SIZE = 256; private static final int MOD256 = 0xff; private static final boolean STOP_AFTER_FIRST_MATCH = true; private static final boolean FIND_ALL_MATCHES = false; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- private BYPStringMatcher() { } //--------------------------------------------------------------------------- BYPStringMatcher(BYPStringPattern inPattern, String inTargetString) { mPattern = inPattern; mTargetString = inTargetString; if (null == mTargetString || 0 == mTargetString.length()) { throw new RuntimeException("No string was specified for searching!"); } else if (mTargetString.length() < mPattern.length()) { throw new RuntimeException("No target string cannot be shorter than the pattern!"); } setup(); } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public boolean matches() { innerFind(0, STOP_AFTER_FIRST_MATCH, mPattern.length()); return mMatches != null && start() == 0 && end() == mTargetString.length(); } //--------------------------------------------------------------------------- public boolean find() { return find(0); } //--------------------------------------------------------------------------- public boolean find(int inStartIndex) { innerFind(inStartIndex, STOP_AFTER_FIRST_MATCH); return mMatches != null; } //--------------------------------------------------------------------------- public List findAll() { innerFind(0, FIND_ALL_MATCHES); return mMatches; } //--------------------------------------------------------------------------- public String group() { return mMatches.get(0).getString(); } //--------------------------------------------------------------------------- public int start() { return mMatches.get(0).start(); } //--------------------------------------------------------------------------- public int end() { return mMatches.get(0).end(); } //--------------------------------------------------------------------------- public int mismatches() { return mMatches.get(0).getNumMismatches(); } //########################################################################### // PRIVATE METHODS //########################################################################### //--------------------------------------------------------------------------- private void setup() { if (mPattern.isCaseInsensitive()) { mTargetString = mTargetString.toUpperCase(); } int patternLength = mPattern.length(); String patternString = mPattern.getPatternString(); if (mPattern.isCaseInsensitive()) { patternString = patternString.toUpperCase(); } for (int i = 0; i < ALPHABET_SIZE; i++) { mOffsetList.add(new BYPStringMatcher(). new IndexNode()); mMismatchCount[i] = patternLength; } for (int i = 0, j = 128; i < patternLength; i++) { mMismatchCount[i] = ALPHABET_SIZE; char patternChar = patternString.charAt(i); IndexNode indexNode = mOffsetList.get((int)patternChar); if (indexNode.getOffset() == -1) { indexNode.setOffset(patternLength - i - 1); } else { short nextIndex = indexNode.nextIndex(); indexNode.setNextIndex((short)j++); indexNode = mOffsetList.get(indexNode.nextIndex()); indexNode.setOffset(patternLength - i - 1); indexNode.setNextIndex(nextIndex); } } mMismatchCount[patternLength - 1] = patternLength; } //--------------------------------------------------------------------------- private void innerFind(int inStartIndex, boolean inStopAfterFirstMatch) { innerFind(inStartIndex, inStopAfterFirstMatch, mTargetString.length()); } //--------------------------------------------------------------------------- private void innerFind(int inStartIndex, boolean inStopAfterFirstMatch, int inSearchLength) { mMatches = null; int patternLength = mPattern.length(); int maxMismatches = mPattern.getMaxMismatches(); for (int i = inStartIndex; i < inSearchLength; i++) { IndexNode indexNode = mOffsetList.get((int)mTargetString.charAt(i)); int offset; if ((offset = indexNode.getOffset()) >= 0) { mMismatchCount[(i + offset)&MOD256]--; if (indexNode.nextIndex() >= 0) { for (indexNode = mOffsetList.get(indexNode.nextIndex()); indexNode != null; indexNode = mOffsetList.get(indexNode.nextIndex())) { mMismatchCount[(i + indexNode.getOffset()) & MOD256]--; if (indexNode.nextIndex() < 0) { break; } } } } if (mMismatchCount[i&MOD256] <= maxMismatches) { int start = i - patternLength + 1; if (start >= inStartIndex) { // System.out.println(String.format("Match in position %d with %d mismatches", i-patternLength+1, mMismatchCount[i&MOD256])); if (null == mMatches) { mMatches = new ArrayList<>(1); } mMatches.add(new BYPStringMatch(mTargetString.substring(start, i + 1), start, i + 1).setNumMismatches(mMismatchCount[i & MOD256])); if (inStopAfterFirstMatch) { break; } } } mMismatchCount[i&MOD256] = patternLength; } } //########################################################################### // INNER CLASS //########################################################################### private class IndexNode { int mOffset = -1; short mNext = -1; public int getOffset() { return mOffset; } public void setOffset(int inValue) { mOffset = inValue; } public short nextIndex() { return mNext; } public void setNextIndex(short inValue) { mNext = inValue; } public String toString() { return "Offset: " + mOffset + "; Next: " + mNext; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy