All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.alignment.matrix.SubstitutionMatrix Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.alignment.matrix;

import java.io.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import com.hfg.exception.ProgrammingException;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;


//------------------------------------------------------------------------------
/**
 Substitution matrix for use with pairwise sequence alignments.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class SubstitutionMatrix { private String mName; private float[][] mMatrix; private List mOrderedResidues; private int[] mResidueIndexLookup; private String mToStringCache; private static Pattern sNumPattern = Pattern.compile("\\d+"); private static Map sInstanceMap = new HashMap<>(); public static final SubstitutionMatrix IDENTITY = new SubstitutionMatrix("IDENTITY"); public static final SubstitutionMatrix IDENTITY_GAP = new SubstitutionMatrix("IDENTITY_GAP"); public static final SubstitutionMatrix NUCLEOTIDE = new SubstitutionMatrix("NUCLEOTIDE"); public static final SubstitutionMatrix PAM10 = new SubstitutionMatrix("PAM10"); public static final SubstitutionMatrix PAM100 = new SubstitutionMatrix("PAM100"); public static final SubstitutionMatrix PAM120 = new SubstitutionMatrix("PAM120"); public static final SubstitutionMatrix PAM250 = new SubstitutionMatrix("PAM250"); public static final SubstitutionMatrix BLOSUM62 = new SubstitutionMatrix("BLOSUM62"); public static final SubstitutionMatrix BLOSUM90 = new SubstitutionMatrix("BLOSUM90"); public static final SubstitutionMatrix BLOSUM90_GAP = new SubstitutionMatrix("BLOSUM90_GAP"); //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- /** Standard matrices will be retrievable from resource files. */ private SubstitutionMatrix(String inName) { mName = inName; sInstanceMap.put(mName, this); } //--------------------------------------------------------------------------- /** Constructor using a BufferedReader. @param inName the name to use for the substitution matrix @param inReader reader containing the substitution matrix data @throws IOException if a problem is encountered reading from the specified Reader */ public SubstitutionMatrix(String inName, BufferedReader inReader) throws IOException { this(inName); mMatrix = readMatrixData(inReader); } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public String name() { return mName; } //--------------------------------------------------------------------------- @Override public String toString() { if (null == mToStringCache) { StringBuilderPlus buffer = new StringBuilderPlus(); buffer.appendln("# " + name()); for (char residue : mOrderedResidues) { buffer.append(String.format("%5s ", residue + "")); } buffer.appendln(); for (char residue1 : mOrderedResidues) { buffer.append(String.format("%5s ", residue1 + "")); for (char residue2 : mOrderedResidues) { buffer.append(String.format("%5.1f ", score(residue1, residue2))); } buffer.appendln(); } mToStringCache = buffer.toString(); } return mToStringCache; } //--------------------------------------------------------------------------- /** Returns whether or not this substitution matrix supports the specified character. * @param inResidue the character to be checked for inclusion in this matrix * @return whether or not this substitution matrix supports the specified character */ public boolean contains(char inResidue) { return mOrderedResidues.contains(inResidue); } //--------------------------------------------------------------------------- public int getResidueIndex(char inResidue) { int[] residueIndexLookup = getResidueIndexLookup(); if (((int) inResidue) >= residueIndexLookup.length) { throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!"); } int residueIndex = residueIndexLookup[(int) inResidue]; if (-1 == residueIndex) { throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!"); } return residueIndex; } //--------------------------------------------------------------------------- // For performance reasons some method contents are included here directly. public int[] getResidueIndicesForSequence(String inSequence) { if (null == mResidueIndexLookup) { init(); } int length = inSequence.length(); int indices[] = new int[length]; for (int i = 0; i < length; i++) { char theChar = inSequence.charAt(i); if (((int) theChar) >= mResidueIndexLookup.length) { throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!"); } int residueIndex = mResidueIndexLookup[(int) theChar]; if (-1 == residueIndex) { throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!"); } indices[i] = residueIndex; } return indices; } //--------------------------------------------------------------------------- public float score(char inResidue1, char inResidue2) { int index1 = getResidueIndex(inResidue1); int index2 = getResidueIndex(inResidue2); return getRawMatrix()[index1][index2]; } //--------------------------------------------------------------------------- public float scoreCaseInsensitive(char inResidue1, char inResidue2) { return score(Character.toUpperCase(inResidue1), Character.toUpperCase(inResidue2)); } //--------------------------------------------------------------------------- public float[][] getRawMatrix() { if (null == mMatrix) { init(); } return mMatrix; } //--------------------------------------------------------------------------- public static Collection values() { return sInstanceMap.values(); } //########################################################################### // PRIVATE METHODS //########################################################################### //--------------------------------------------------------------------------- private synchronized void init() { if (null == mMatrix) { if (StringUtil.isSet(name())) { mMatrix = readMatrixDataFromRsrc(); } else { throw new ProgrammingException("The substitution matrix has not been properly initialized!"); } } } //--------------------------------------------------------------------------- private float[][] readMatrixDataFromRsrc() { float[][] matrix; InputStream rsrcStream = this.getClass().getResourceAsStream(name() + ".mat"); if (null == rsrcStream) { // Try looking in the pam directory rsrcStream = this.getClass().getResourceAsStream("pam/" + name() + ".mat"); if (null == rsrcStream) { // Try looking in the blosum directory rsrcStream = this.getClass().getResourceAsStream("blosum/" + name() + ".mat"); if (null == rsrcStream) { throw new ProgrammingException("The rsrc for substitution matrix " + StringUtil.singleQuote(name()) + " couldn't be located!?"); } } } try { BufferedReader reader = new BufferedReader(new InputStreamReader(rsrcStream)); matrix = readMatrixData(reader); reader.close(); } catch (IOException e) { throw new RuntimeException("Problem reading data for substitution matrix " + StringUtil.singleQuote(name()) + "!", e); } return matrix; } //--------------------------------------------------------------------------- private void readMatrixDataFromFile(File inFile) throws IOException { if (null == inFile) { throw new IOException("No substitution matrix file was specified!"); } else if (! inFile.exists()) { throw new IOException("The substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + " does not exist!"); } else if (! inFile.canRead()) { throw new IOException("No read permissions for the substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + "!"); } BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(inFile)); readMatrixData(reader); } finally { if (reader != null) { reader.close(); } } } //--------------------------------------------------------------------------- private float[][] readMatrixData(BufferedReader inReader) throws IOException { float[][] matrix = null; // For performance, we will use an int[] instead of a Map to lookup the proper matrix position for a given residue int[] residueIndexLookup = null; String line; while ((line = inReader.readLine()) != null) { line = line.trim(); if (0 == line.length() // Skip blank lines || line.startsWith("#")) // Skip comment lines { continue; } // Read the header line of residues if (null == residueIndexLookup && ! sNumPattern.matcher(line).find()) { // mResidueIndex = new OrderedMap(); int inex = 0; int maxIndex = -1; String pieces[] = line.split("\\s+"); for (String piece : pieces) { if (piece.length() > 1) { throw new RuntimeException("The matrix header line " + StringUtil.singleQuote(line) + " is not in the proper format!"); } if (((int)piece.charAt(0)) > maxIndex) { maxIndex = (int) piece.charAt(0); } // mResidueIndex.put(piece.charAt(0), index++); } residueIndexLookup = new int[maxIndex + 1]; mOrderedResidues = new ArrayList<>(pieces.length); for (int i = 0; i <= maxIndex; i++) { residueIndexLookup[i] = -1; } int matrixColIndex = 0; for (String piece : pieces) { residueIndexLookup[(int) piece.charAt(0)] = matrixColIndex++; mOrderedResidues.add(piece.charAt(0)); } // matrix = new float[mResidueIndex.size()][mResidueIndex.size()]; matrix = new float[matrixColIndex][matrixColIndex]; } else if (matrix != null) { // Read matrix data String pieces[] = line.split("\\s+"); char residue = pieces[0].charAt(0); // int residueIndex = mResidueIndex.get(residue); int residueIndex = residueIndexLookup[(int)residue]; for (int i = 1; i < pieces.length; i++) { matrix[residueIndex][i - 1] = Float.parseFloat(pieces[i]); } } } mResidueIndexLookup = residueIndexLookup; return matrix; } //--------------------------------------------------------------------------- private int[] getResidueIndexLookup() { if (null == mResidueIndexLookup) { init(); } return mResidueIndexLookup; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy