com.hfg.bio.seq.alignment.matrix.SubstitutionMatrix Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.alignment.matrix;
import java.io.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import com.hfg.exception.ProgrammingException;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
//------------------------------------------------------------------------------
/**
Substitution matrix for use with pairwise sequence alignments.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class SubstitutionMatrix
{
private String mName;
private float[][] mMatrix;
private List mOrderedResidues;
private int[] mResidueIndexLookup;
private String mToStringCache;
private static Pattern sNumPattern = Pattern.compile("\\d+");
private static Map sInstanceMap = new HashMap<>();
public static final SubstitutionMatrix IDENTITY = new SubstitutionMatrix("IDENTITY");
public static final SubstitutionMatrix IDENTITY_GAP = new SubstitutionMatrix("IDENTITY_GAP");
public static final SubstitutionMatrix NUCLEOTIDE = new SubstitutionMatrix("NUCLEOTIDE");
public static final SubstitutionMatrix PAM10 = new SubstitutionMatrix("PAM10");
public static final SubstitutionMatrix PAM100 = new SubstitutionMatrix("PAM100");
public static final SubstitutionMatrix PAM120 = new SubstitutionMatrix("PAM120");
public static final SubstitutionMatrix PAM250 = new SubstitutionMatrix("PAM250");
public static final SubstitutionMatrix BLOSUM62 = new SubstitutionMatrix("BLOSUM62");
public static final SubstitutionMatrix BLOSUM90 = new SubstitutionMatrix("BLOSUM90");
public static final SubstitutionMatrix BLOSUM90_GAP = new SubstitutionMatrix("BLOSUM90_GAP");
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
/**
Standard matrices will be retrievable from resource files.
*/
private SubstitutionMatrix(String inName)
{
mName = inName;
sInstanceMap.put(mName, this);
}
//---------------------------------------------------------------------------
/**
Constructor using a BufferedReader.
@param inName the name to use for the substitution matrix
@param inReader reader containing the substitution matrix data
@throws IOException if a problem is encountered reading from the specified Reader
*/
public SubstitutionMatrix(String inName, BufferedReader inReader)
throws IOException
{
this(inName);
mMatrix = readMatrixData(inReader);
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public String name()
{
return mName;
}
//---------------------------------------------------------------------------
@Override
public String toString()
{
if (null == mToStringCache)
{
StringBuilderPlus buffer = new StringBuilderPlus();
buffer.appendln("# " + name());
for (char residue : mOrderedResidues)
{
buffer.append(String.format("%5s ", residue + ""));
}
buffer.appendln();
for (char residue1 : mOrderedResidues)
{
buffer.append(String.format("%5s ", residue1 + ""));
for (char residue2 : mOrderedResidues)
{
buffer.append(String.format("%5.1f ", score(residue1, residue2)));
}
buffer.appendln();
}
mToStringCache = buffer.toString();
}
return mToStringCache;
}
//---------------------------------------------------------------------------
/**
Returns whether or not this substitution matrix supports the specified character.
* @param inResidue the character to be checked for inclusion in this matrix
* @return whether or not this substitution matrix supports the specified character
*/
public boolean contains(char inResidue)
{
return mOrderedResidues.contains(inResidue);
}
//---------------------------------------------------------------------------
public int getResidueIndex(char inResidue)
{
int[] residueIndexLookup = getResidueIndexLookup();
if (((int) inResidue) >= residueIndexLookup.length)
{
throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!");
}
int residueIndex = residueIndexLookup[(int) inResidue];
if (-1 == residueIndex)
{
throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!");
}
return residueIndex;
}
//---------------------------------------------------------------------------
// For performance reasons some method contents are included here directly.
public int[] getResidueIndicesForSequence(String inSequence)
{
if (null == mResidueIndexLookup)
{
init();
}
int length = inSequence.length();
int indices[] = new int[length];
for (int i = 0; i < length; i++)
{
char theChar = inSequence.charAt(i);
if (((int) theChar) >= mResidueIndexLookup.length)
{
throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!");
}
int residueIndex = mResidueIndexLookup[(int) theChar];
if (-1 == residueIndex)
{
throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!");
}
indices[i] = residueIndex;
}
return indices;
}
//---------------------------------------------------------------------------
public float score(char inResidue1, char inResidue2)
{
int index1 = getResidueIndex(inResidue1);
int index2 = getResidueIndex(inResidue2);
return getRawMatrix()[index1][index2];
}
//---------------------------------------------------------------------------
public float scoreCaseInsensitive(char inResidue1, char inResidue2)
{
return score(Character.toUpperCase(inResidue1), Character.toUpperCase(inResidue2));
}
//---------------------------------------------------------------------------
public float[][] getRawMatrix()
{
if (null == mMatrix)
{
init();
}
return mMatrix;
}
//---------------------------------------------------------------------------
public static Collection values()
{
return sInstanceMap.values();
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
private synchronized void init()
{
if (null == mMatrix)
{
if (StringUtil.isSet(name()))
{
mMatrix = readMatrixDataFromRsrc();
}
else
{
throw new ProgrammingException("The substitution matrix has not been properly initialized!");
}
}
}
//---------------------------------------------------------------------------
private float[][] readMatrixDataFromRsrc()
{
float[][] matrix;
InputStream rsrcStream = this.getClass().getResourceAsStream(name() + ".mat");
if (null == rsrcStream)
{
// Try looking in the pam directory
rsrcStream = this.getClass().getResourceAsStream("pam/" + name() + ".mat");
if (null == rsrcStream)
{
// Try looking in the blosum directory
rsrcStream = this.getClass().getResourceAsStream("blosum/" + name() + ".mat");
if (null == rsrcStream)
{
throw new ProgrammingException("The rsrc for substitution matrix " + StringUtil.singleQuote(name()) + " couldn't be located!?");
}
}
}
try
{
BufferedReader reader = new BufferedReader(new InputStreamReader(rsrcStream));
matrix = readMatrixData(reader);
reader.close();
}
catch (IOException e)
{
throw new RuntimeException("Problem reading data for substitution matrix " + StringUtil.singleQuote(name()) + "!", e);
}
return matrix;
}
//---------------------------------------------------------------------------
private void readMatrixDataFromFile(File inFile)
throws IOException
{
if (null == inFile)
{
throw new IOException("No substitution matrix file was specified!");
}
else if (! inFile.exists())
{
throw new IOException("The substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + " does not exist!");
}
else if (! inFile.canRead())
{
throw new IOException("No read permissions for the substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + "!");
}
BufferedReader reader = null;
try
{
reader = new BufferedReader(new FileReader(inFile));
readMatrixData(reader);
}
finally
{
if (reader != null)
{
reader.close();
}
}
}
//---------------------------------------------------------------------------
private float[][] readMatrixData(BufferedReader inReader)
throws IOException
{
float[][] matrix = null;
// For performance, we will use an int[] instead of a Map to lookup the proper matrix position for a given residue
int[] residueIndexLookup = null;
String line;
while ((line = inReader.readLine()) != null)
{
line = line.trim();
if (0 == line.length() // Skip blank lines
|| line.startsWith("#")) // Skip comment lines
{
continue;
}
// Read the header line of residues
if (null == residueIndexLookup
&& ! sNumPattern.matcher(line).find())
{
// mResidueIndex = new OrderedMap();
int inex = 0;
int maxIndex = -1;
String pieces[] = line.split("\\s+");
for (String piece : pieces)
{
if (piece.length() > 1)
{
throw new RuntimeException("The matrix header line " + StringUtil.singleQuote(line) + " is not in the proper format!");
}
if (((int)piece.charAt(0)) > maxIndex)
{
maxIndex = (int) piece.charAt(0);
}
// mResidueIndex.put(piece.charAt(0), index++);
}
residueIndexLookup = new int[maxIndex + 1];
mOrderedResidues = new ArrayList<>(pieces.length);
for (int i = 0; i <= maxIndex; i++)
{
residueIndexLookup[i] = -1;
}
int matrixColIndex = 0;
for (String piece : pieces)
{
residueIndexLookup[(int) piece.charAt(0)] = matrixColIndex++;
mOrderedResidues.add(piece.charAt(0));
}
// matrix = new float[mResidueIndex.size()][mResidueIndex.size()];
matrix = new float[matrixColIndex][matrixColIndex];
}
else if (matrix != null)
{
// Read matrix data
String pieces[] = line.split("\\s+");
char residue = pieces[0].charAt(0);
// int residueIndex = mResidueIndex.get(residue);
int residueIndex = residueIndexLookup[(int)residue];
for (int i = 1; i < pieces.length; i++)
{
matrix[residueIndex][i - 1] = Float.parseFloat(pieces[i]);
}
}
}
mResidueIndexLookup = residueIndexLookup;
return matrix;
}
//---------------------------------------------------------------------------
private int[] getResidueIndexLookup()
{
if (null == mResidueIndexLookup)
{
init();
}
return mResidueIndexLookup;
}
}