com.hfg.bio.seq.PositionalFrequencyMatrix Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq;
import com.hfg.bio.seq.alignment.MultipleSequenceAlignment;
import com.hfg.exception.ProgrammingException;
import com.hfg.math.Counter;
import com.hfg.util.collection.OrderedSet;
import com.hfg.util.collection.SparseMatrix;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
//------------------------------------------------------------------------------
/**
Positional frequency matrix.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class PositionalFrequencyMatrix implements Cloneable
{
public enum Flag
{
CASE_SENSITIVE
}
private Set mFlags = new HashSet<>(4);
private SparseMatrix mMatrix;
private Map mPositionTotalCountMap;
private Counter mSequenceCounter = new Counter();
private Map mPositionGapOpenCountMap;
private Map mPositionGapExtCountMap;
//##########################################################################
// CONSTRUCTORS
//##########################################################################
//--------------------------------------------------------------------------
public PositionalFrequencyMatrix(MultipleSequenceAlignment inMSA)
{
this(inMSA, (Flag)null);
}
//--------------------------------------------------------------------------
public PositionalFrequencyMatrix(MultipleSequenceAlignment inMSA, Flag... inFlags)
{
// Flags must be set first
setFlags(inFlags);
if (inMSA != null)
{
// Initialize values to zero
Set residueSet;
if (BioSequenceType.PROTEIN.equals(inMSA.getBioSequenceType()))
{
residueSet = ((Protein)inMSA.getSequences().get(0)).getAminoAcidSet().getResidueChars();
}
else
{
residueSet = new HashSet<>(30);
for (char nucleotide : "ATGCN".toCharArray())
{
residueSet.add(nucleotide);
}
}
init(residueSet, inMSA.length());
for (T seq : inMSA.getSequences())
{
addSequence(seq);
}
}
}
//##########################################################################
// PUBLIC METHODS
//##########################################################################
//---------------------------------------------------------------------------
@Override
public PositionalFrequencyMatrix clone()
{
PositionalFrequencyMatrix cloneObj;
try
{
cloneObj = (PositionalFrequencyMatrix) super.clone();
}
catch (CloneNotSupportedException e)
{
throw new ProgrammingException(e);
}
if (mMatrix != null)
{
cloneObj.mMatrix = mMatrix.clone();
}
if (mPositionTotalCountMap != null)
{
for (Integer key : mPositionTotalCountMap.keySet())
{
cloneObj.mPositionTotalCountMap.put(key, mPositionTotalCountMap.get(key).clone());
}
}
return cloneObj;
}
//--------------------------------------------------------------------------
public void addSequence(T inSequence)
{
if (inSequence.length() != mMatrix.colKeySet().size())
{
throw new RuntimeException("The added sequence (" + inSequence.getID() + ") is not the same length as the others for this " + getClass().getSimpleName() + "!");
}
String sequenceString = inSequence.getSequence();
if (! mFlags.contains(Flag.CASE_SENSITIVE))
{
sequenceString = sequenceString.toUpperCase();
}
int position = 1;
char prevResidue = ' ';
for (char residue : sequenceString.toCharArray())
{
if ('-' == residue)
{
Map gapMap;
if (prevResidue != '-')
{
gapMap = mPositionGapOpenCountMap;
}
else
{
gapMap = mPositionGapExtCountMap;
}
Counter counter = gapMap.get(position);
if (null == counter)
{
counter = new Counter();
gapMap.put(position, counter);
}
counter.increment();
}
else
{
mPositionTotalCountMap.get(position).increment();
Counter counter = mMatrix.get(residue, position);
if (null == counter)
{
counter = new Counter();
mMatrix.put(residue, position, counter);
}
counter.increment();
}
position++;
prevResidue = residue;
}
mSequenceCounter.increment();
}
//--------------------------------------------------------------------------
public Set getResidueKeys()
{
return mMatrix.rowKeySet();
}
//--------------------------------------------------------------------------
public Set getPositionKeys()
{
return mMatrix.colKeySet();
}
//--------------------------------------------------------------------------
public void insertGapAtPosition(int inPosition)
{
List positionKeys = new ArrayList<>(getPositionKeys());
if (inPosition > 0
&& inPosition <= positionKeys.get(positionKeys.size() - 1) + 1)
{
if (inPosition <= positionKeys.size())
{
for (int i = positionKeys.size(); i >= inPosition; i--)
{
mMatrix.putCol(i + 1, mMatrix.getCol(i));
mPositionTotalCountMap.put(i + 1, mPositionTotalCountMap.get(i));
}
}
// Put all zero values in the new column
for (Character residue : getResidueKeys())
{
mMatrix.put(residue, inPosition, new Counter());
}
int numGapsAtPrevPosition = (inPosition > 1 ? getGapCount(inPosition - 1) : 0);
Counter counter = mPositionGapOpenCountMap.get(inPosition);
if (null == counter)
{
counter = new Counter();
mPositionGapOpenCountMap.put(inPosition, counter);
}
counter.add(getPositionTotal(inPosition - 1));
counter = mPositionGapExtCountMap.get(inPosition);
if (null == counter)
{
counter = new Counter();
mPositionGapExtCountMap.put(inPosition, counter);
}
counter.add(numGapsAtPrevPosition);
mPositionTotalCountMap.put(inPosition, new Counter());
}
}
//--------------------------------------------------------------------------
public int getCount(Character inResidue, Integer inPosition)
{
int count = 0;
if (inResidue == '-')
{
count = getGapCount(inPosition);
}
else
{
Counter counter = mMatrix.get(inResidue, inPosition);
if (null == counter)
{
if (!getPositionKeys().contains(inPosition))
{
throw new RuntimeException("Position out of bounds: " + inPosition + "!");
}
else if (!getResidueKeys().contains(inResidue))
{
throw new RuntimeException("Invalid residue: " + inResidue + "!");
}
}
count = counter.intValue();
}
return count;
}
//--------------------------------------------------------------------------
public int getGapCount(Integer inPosition)
{
return getGapOpenCount(inPosition) + getGapExtCount(inPosition);
}
//--------------------------------------------------------------------------
public int getGapOpenCount(Integer inPosition)
{
Counter counter = mPositionGapOpenCountMap.get(inPosition);
return (counter != null ? counter.intValue() : 0);
}
//--------------------------------------------------------------------------
public int getGapExtCount(Integer inPosition)
{
Counter counter = mPositionGapExtCountMap.get(inPosition);
return (counter != null ? counter.intValue() : 0);
}
//--------------------------------------------------------------------------
/**
Returns the residue with the highest frequency at the specified position or
multiple residues if they share the highest frequency value.
@param inPosition the (1-based) position to evaluate
@return the residue with the highest frequency at the specified position or
multiple residues if they share the highest frequency value
*/
public Set getHighestFreqResidues(Integer inPosition)
{
Map positionMap = mMatrix.getCol(inPosition);
int maxCount = 0;
for (Character key : positionMap.keySet())
{
Counter counter = positionMap.get(key);
if (counter.intValue() > maxCount)
{
maxCount = counter.intValue();
}
}
Set resultSet = new OrderedSet<>(10);
for (Character key : positionMap.keySet())
{
if (positionMap.get(key).intValue() == maxCount)
{
resultSet.add(key);
}
}
return resultSet;
}
//--------------------------------------------------------------------------
public Set getResidues(Integer inPosition)
{
return getResidues(inPosition, null);
}
//--------------------------------------------------------------------------
/**
* Retrieves the residues at the specified position that are represented above
* the specified minimum fraction.
* @param inPosition the aligned sequence position
* @param inMinFraction the minimum fraction for residues to return. Null returns
* residues that are present in at least one sequence.
* @return a Set of residue characters from the specified position
*/
public Set getResidues(Integer inPosition, Float inMinFraction)
{
if (! getPositionKeys().contains(inPosition))
{
throw new RuntimeException("Position out of bounds: " + inPosition + "!");
}
Map residueCountMap = mMatrix.getCol(inPosition);
Set residues = new OrderedSet<>(30);
float total = mPositionTotalCountMap.get(inPosition).floatValue();
for (Character residue : residueCountMap.keySet())
{
if (inMinFraction != null)
{
if (residueCountMap.get(residue).intValue()/total > inMinFraction)
{
residues.add(residue);
}
}
else if (residueCountMap.get(residue).intValue() > 0)
{
residues.add(residue);
}
}
return residues;
}
//--------------------------------------------------------------------------
public float getFraction(Character inResidue, Integer inPosition)
{
int count = getCount(inResidue, inPosition);
float fraction;
if (inResidue == '-')
{
fraction = count / (float) (getPositionTotal(inPosition) + count);
}
else
{
fraction = count / (float) getPositionTotal(inPosition);
}
return fraction;
}
//--------------------------------------------------------------------------
public float getFractionIncludingGaps(Character inResidue, Integer inPosition)
{
return getCount(inResidue, inPosition) / (float) (getPositionTotal(inPosition) + getGapCount(inPosition));
}
//--------------------------------------------------------------------------
/**
Returns the number of sequences with a residue and not a gap at the specified position.
* @param inPosition the 1-based position in the matrix being queried
* @return the number of sequences with a residue and not a gap at the specified position
*/
public int getPositionTotal(Integer inPosition)
{
if (! getPositionKeys().contains(inPosition))
{
throw new RuntimeException("Position out of bounds: " + inPosition + "!");
}
return mPositionTotalCountMap.get(inPosition).intValue();
}
//--------------------------------------------------------------------------
public Set getFlags()
{
return Collections.unmodifiableSet(mFlags);
}
//##########################################################################
// PRIVATE METHODS
//##########################################################################
//--------------------------------------------------------------------------
private void setFlags(Flag[] inFlags)
{
mFlags.clear();
if (inFlags != null)
{
for (Flag flag : inFlags)
{
mFlags.add(flag);
}
}
}
//--------------------------------------------------------------------------
private void init(Set inResidueSet, int inLength)
{
// Initialize values to zero
Set residueSet = new HashSet<>(30);
for (Character residue : inResidueSet)
{
if (null == residue)
{
residue = 'X';
}
if (! mFlags.contains(Flag.CASE_SENSITIVE))
{
residue = Character.toUpperCase(residue);
}
residueSet.add(residue);
}
// Initialize the matrix and position total map with zeroed counters
mMatrix = new SparseMatrix<>(residueSet.size(), inLength);
mPositionTotalCountMap = new HashMap<>(inLength);
for (int position = 1; position <= inLength; position++)
{
mPositionTotalCountMap.put(position, new Counter());
for (char residue : residueSet)
{
mMatrix.put(residue, position, new Counter());
}
}
mPositionGapOpenCountMap = new HashMap<>(inLength);
mPositionGapExtCountMap = new HashMap<>(inLength);
}
}