All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.pattern.BruteForceMatcher Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.pattern;

import java.util.List;

import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.SeqLocation;

//------------------------------------------------------------------------------
/**
 Brute force string matching algorithm. Use when mismatches are allowed
 and there is ambiguity within positions or range specifications.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ class BruteForceMatcher implements SeqPatternMatcher { private static String sAlgorithmName = "Brute Force"; private SeqPattern mPattern; private S mTarget; private String mTargetString; private SeqLocation mSeqLocation; private List mPatternPositions; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- protected BruteForceMatcher(SeqPattern inPattern, S inTarget) { this(inPattern, inTarget, null); } //--------------------------------------------------------------------------- protected BruteForceMatcher(SeqPattern inPattern, S inTarget, SeqLocation inSeqLocation) { mPattern = inPattern; mTarget = inTarget; mTargetString = inTarget.getSubSequence(inSeqLocation); mSeqLocation = inSeqLocation; mPatternPositions = mPattern.getPrositePatternPositions(); } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public T find(SeqLocation inSeqLocation) { T match = null; SeqLocation seqLocation = (inSeqLocation != null ? inSeqLocation : mSeqLocation); if (null == seqLocation) { seqLocation = new SeqLocation(1, getTarget().length()); } String targetString = mTargetString.substring(seqLocation.getStart() - 1, seqLocation.getEnd()); if (! getPattern().isCaseSensitive()) { targetString = targetString.toUpperCase(); } if (getPattern().isRestrictedToSeqStart()) { if (seqLocation.getStart().equals(1)) { match = eval(targetString, 0, 0, 0, 0, 0, getPattern().getMaxMismatches(), getPattern().getIgnoreGaps()); } } else { for (int i = 0; i < targetString.length(); i++) { match = eval(targetString, i, seqLocation.getStart() - 1, 0, 0, 0, getPattern().getMaxMismatches(), getPattern().getIgnoreGaps()); if (match != null) { break; } } } return match; } //--------------------------------------------------------------------------- public S getTarget() { return mTarget; } //########################################################################### // PROTECTED METHODS //########################################################################### //--------------------------------------------------------------------------- protected SeqPattern getPattern() { return mPattern; } //########################################################################### // PRIVATE METHODS //########################################################################### //--------------------------------------------------------------------------- private T eval(String inTargetString, int inTargetIndex, int inTargetOffset, int inPatternIndex, int inRangeSize, int inMismatchCount, int inMaxMismatches, boolean inIgnoreGaps) { T match = null; int mismatchCount = inMismatchCount; int targetIndex = inTargetIndex; int patternPositionIndex = inPatternIndex; PrositePatternPosition position; while (patternPositionIndex < mPatternPositions.size() && targetIndex < inTargetString.length()) { position = mPatternPositions.get(patternPositionIndex); char targetResidue = inTargetString.charAt(targetIndex); if ('-' == targetResidue && inIgnoreGaps) { targetIndex++; } if (position.hasCountRange()) { // Repeat the position until we get to the min count for the range for (int i = 0; i < position.getCountRange().getStart() - 1; i++) { if (! position.matchesResidue(targetResidue)) { mismatchCount++; if (mismatchCount > inMaxMismatches) { break; } } if (targetIndex == inTargetString.length() - 1) { break; } targetResidue = inTargetString.charAt(++targetIndex); } if (mismatchCount > inMaxMismatches) { break; } for (int i = 0; i < position.getCountRange().length(); i++) { if (! position.matchesResidue(targetResidue)) { mismatchCount++; if (mismatchCount > inMaxMismatches) { break; } } if (mismatchCount > inMaxMismatches) { break; } match = eval(inTargetString, targetIndex + 1, inTargetOffset, patternPositionIndex + 1, inRangeSize + i + position.getCountRange().getStart() - 1, mismatchCount, inMaxMismatches, inIgnoreGaps); if (match != null || targetIndex == inTargetString.length() - 1) { break; } targetResidue = inTargetString.charAt(++targetIndex); } // We just finished recursively searching. No need to continue the loop. break; } else if (! position.matchesResidue(targetResidue)) { mismatchCount++; if (mismatchCount > inMaxMismatches) { break; } } targetIndex++; patternPositionIndex++; } if (null == match && mismatchCount <= inMaxMismatches && (patternPositionIndex == mPatternPositions.size() || (patternPositionIndex == mPatternPositions.size() - 1 && mPatternPositions.get(mPatternPositions.size() - 1).getType().equals(PrositePatternPositionType.ONE_OF) && mPatternPositions.get(mPatternPositions.size() - 1).matchesResidue('>')))) { SeqLocation matchLoc = new SeqLocation(inTargetOffset + inTargetIndex - inPatternIndex - inRangeSize + 1, inTargetOffset + targetIndex); // Was the pattern restricted to the end of the sequence? if (! getPattern().isRestrictedToSeqEnd() || matchLoc.getEnd().equals(getTarget().length())) { match = getPattern().createMatch(getTarget().getSubSequence(matchLoc), matchLoc); match.setNumMismatches(mismatchCount); } } return match; } /* //--------------------------------------------------------------------------- private T eval(String inTargetString, int inTargetIndex, int inTargetOffset, int inMaxMismatches, boolean inIgnoreGaps) { T match = null; int mismatchCount = 0; int targetIndex = inTargetIndex; int patternPositionIndex = 0; while (patternPositionIndex < mPatternPositions.size() && targetIndex < inTargetString.length()) { PrositePatternPosition position = mPatternPositions.get(patternPositionIndex); char targetResidue = inTargetString.charAt(targetIndex); if ('-' == targetResidue && inIgnoreGaps) { targetIndex++; } if (position.hasCountRange()) { // Repeat the position until we get to the min count for the range for (int i = 0; i < position.getCountRange().getStart(); i++) { if (! position.matchesResidue(targetResidue)) { mismatchCount++; if (mismatchCount > inMaxMismatches) { break; } } } if (mismatchCount > inMaxMismatches) { break; } } else if (! position.matchesResidue(targetResidue)) { mismatchCount++; if (mismatchCount > inMaxMismatches) { break; } } targetIndex++; patternPositionIndex++; } if (mismatchCount <= inMaxMismatches && patternPositionIndex == mPatternPositions.size()) { SeqLocation matchLoc = new SeqLocation(inTargetOffset + inTargetIndex + 1, inTargetOffset + targetIndex); match = getPattern().createMatch(getTarget().getSubSequence(matchLoc), matchLoc); match.setNumMismatches(mismatchCount); } return match; } */ }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy