com.hfg.bio.seq.pattern.BruteForceMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.pattern;
import java.util.List;
import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.SeqLocation;
//------------------------------------------------------------------------------
/**
Brute force string matching algorithm. Use when mismatches are allowed
and there is ambiguity within positions or range specifications.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
class BruteForceMatcher implements SeqPatternMatcher
{
private static String sAlgorithmName = "Brute Force";
private SeqPattern mPattern;
private S mTarget;
private String mTargetString;
private SeqLocation mSeqLocation;
private List mPatternPositions;
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
protected BruteForceMatcher(SeqPattern inPattern, S inTarget)
{
this(inPattern, inTarget, null);
}
//---------------------------------------------------------------------------
protected BruteForceMatcher(SeqPattern inPattern, S inTarget, SeqLocation inSeqLocation)
{
mPattern = inPattern;
mTarget = inTarget;
mTargetString = inTarget.getSubSequence(inSeqLocation);
mSeqLocation = inSeqLocation;
mPatternPositions = mPattern.getPrositePatternPositions();
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public T find(SeqLocation inSeqLocation)
{
T match = null;
SeqLocation seqLocation = (inSeqLocation != null ? inSeqLocation : mSeqLocation);
if (null == seqLocation)
{
seqLocation = new SeqLocation(1, getTarget().length());
}
String targetString = mTargetString.substring(seqLocation.getStart() - 1, seqLocation.getEnd());
if (! getPattern().isCaseSensitive())
{
targetString = targetString.toUpperCase();
}
if (getPattern().isRestrictedToSeqStart())
{
if (seqLocation.getStart().equals(1))
{
match = eval(targetString, 0, 0, 0, 0, 0, getPattern().getMaxMismatches(), getPattern().getIgnoreGaps());
}
}
else
{
for (int i = 0; i < targetString.length(); i++)
{
match = eval(targetString, i, seqLocation.getStart() - 1, 0, 0, 0, getPattern().getMaxMismatches(), getPattern().getIgnoreGaps());
if (match != null)
{
break;
}
}
}
return match;
}
//---------------------------------------------------------------------------
public S getTarget()
{
return mTarget;
}
//###########################################################################
// PROTECTED METHODS
//###########################################################################
//---------------------------------------------------------------------------
protected SeqPattern getPattern()
{
return mPattern;
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
private T eval(String inTargetString, int inTargetIndex, int inTargetOffset, int inPatternIndex, int inRangeSize,
int inMismatchCount, int inMaxMismatches, boolean inIgnoreGaps)
{
T match = null;
int mismatchCount = inMismatchCount;
int targetIndex = inTargetIndex;
int patternPositionIndex = inPatternIndex;
PrositePatternPosition position;
while (patternPositionIndex < mPatternPositions.size()
&& targetIndex < inTargetString.length())
{
position = mPatternPositions.get(patternPositionIndex);
char targetResidue = inTargetString.charAt(targetIndex);
if ('-' == targetResidue
&& inIgnoreGaps)
{
targetIndex++;
}
if (position.hasCountRange())
{
// Repeat the position until we get to the min count for the range
for (int i = 0; i < position.getCountRange().getStart() - 1; i++)
{
if (! position.matchesResidue(targetResidue))
{
mismatchCount++;
if (mismatchCount > inMaxMismatches)
{
break;
}
}
if (targetIndex == inTargetString.length() - 1)
{
break;
}
targetResidue = inTargetString.charAt(++targetIndex);
}
if (mismatchCount > inMaxMismatches)
{
break;
}
for (int i = 0; i < position.getCountRange().length(); i++)
{
if (! position.matchesResidue(targetResidue))
{
mismatchCount++;
if (mismatchCount > inMaxMismatches)
{
break;
}
}
if (mismatchCount > inMaxMismatches)
{
break;
}
match = eval(inTargetString, targetIndex + 1, inTargetOffset,
patternPositionIndex + 1, inRangeSize + i + position.getCountRange().getStart() - 1, mismatchCount, inMaxMismatches, inIgnoreGaps);
if (match != null
|| targetIndex == inTargetString.length() - 1)
{
break;
}
targetResidue = inTargetString.charAt(++targetIndex);
}
// We just finished recursively searching. No need to continue the loop.
break;
}
else if (! position.matchesResidue(targetResidue))
{
mismatchCount++;
if (mismatchCount > inMaxMismatches)
{
break;
}
}
targetIndex++;
patternPositionIndex++;
}
if (null == match
&& mismatchCount <= inMaxMismatches
&& (patternPositionIndex == mPatternPositions.size()
|| (patternPositionIndex == mPatternPositions.size() - 1
&& mPatternPositions.get(mPatternPositions.size() - 1).getType().equals(PrositePatternPositionType.ONE_OF)
&& mPatternPositions.get(mPatternPositions.size() - 1).matchesResidue('>'))))
{
SeqLocation matchLoc = new SeqLocation(inTargetOffset + inTargetIndex - inPatternIndex - inRangeSize + 1, inTargetOffset + targetIndex);
// Was the pattern restricted to the end of the sequence?
if (! getPattern().isRestrictedToSeqEnd()
|| matchLoc.getEnd().equals(getTarget().length()))
{
match = getPattern().createMatch(getTarget().getSubSequence(matchLoc), matchLoc);
match.setNumMismatches(mismatchCount);
}
}
return match;
}
/*
//---------------------------------------------------------------------------
private T eval(String inTargetString, int inTargetIndex, int inTargetOffset, int inMaxMismatches, boolean inIgnoreGaps)
{
T match = null;
int mismatchCount = 0;
int targetIndex = inTargetIndex;
int patternPositionIndex = 0;
while (patternPositionIndex < mPatternPositions.size()
&& targetIndex < inTargetString.length())
{
PrositePatternPosition position = mPatternPositions.get(patternPositionIndex);
char targetResidue = inTargetString.charAt(targetIndex);
if ('-' == targetResidue
&& inIgnoreGaps)
{
targetIndex++;
}
if (position.hasCountRange())
{
// Repeat the position until we get to the min count for the range
for (int i = 0; i < position.getCountRange().getStart(); i++)
{
if (! position.matchesResidue(targetResidue))
{
mismatchCount++;
if (mismatchCount > inMaxMismatches)
{
break;
}
}
}
if (mismatchCount > inMaxMismatches)
{
break;
}
}
else if (! position.matchesResidue(targetResidue))
{
mismatchCount++;
if (mismatchCount > inMaxMismatches)
{
break;
}
}
targetIndex++;
patternPositionIndex++;
}
if (mismatchCount <= inMaxMismatches
&& patternPositionIndex == mPatternPositions.size())
{
SeqLocation matchLoc = new SeqLocation(inTargetOffset + inTargetIndex + 1, inTargetOffset + targetIndex);
match = getPattern().createMatch(getTarget().getSubSequence(matchLoc), matchLoc);
match.setNumMismatches(mismatchCount);
}
return match;
}
*/
}