All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.PositionalFrequencyMatrix Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq;

import com.hfg.bio.seq.alignment.MultipleSequenceAlignment;
import com.hfg.exception.ProgrammingException;
import com.hfg.math.Counter;
import com.hfg.util.collection.OrderedSet;
import com.hfg.util.collection.SparseMatrix;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

//------------------------------------------------------------------------------
/**
 Positional frequency matrix.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class PositionalFrequencyMatrix implements Cloneable { public enum Flag { CASE_SENSITIVE } private Set mFlags = new HashSet<>(4); private SparseMatrix mMatrix; private Map mPositionTotalCountMap; private Counter mSequenceCounter = new Counter(); private Map mPositionGapOpenCountMap; private Map mPositionGapExtCountMap; //########################################################################## // CONSTRUCTORS //########################################################################## //-------------------------------------------------------------------------- public PositionalFrequencyMatrix(MultipleSequenceAlignment inMSA) { this(inMSA, (Flag)null); } //-------------------------------------------------------------------------- public PositionalFrequencyMatrix(MultipleSequenceAlignment inMSA, Flag... inFlags) { // Flags must be set first setFlags(inFlags); if (inMSA != null) { // Initialize values to zero Set residueSet; if (BioSequenceType.PROTEIN.equals(inMSA.getBioSequenceType())) { residueSet = ((Protein)inMSA.getSequences().get(0)).getAminoAcidSet().getResidueChars(); } else { residueSet = new HashSet<>(30); for (char nucleotide : "ATGCN".toCharArray()) { residueSet.add(nucleotide); } } init(residueSet, inMSA.length()); for (T seq : inMSA.getSequences()) { addSequence(seq); } } } //########################################################################## // PUBLIC METHODS //########################################################################## //--------------------------------------------------------------------------- @Override public PositionalFrequencyMatrix clone() { PositionalFrequencyMatrix cloneObj; try { cloneObj = (PositionalFrequencyMatrix) super.clone(); } catch (CloneNotSupportedException e) { throw new ProgrammingException(e); } if (mMatrix != null) { cloneObj.mMatrix = mMatrix.clone(); } if (mPositionTotalCountMap != null) { for (Integer key : mPositionTotalCountMap.keySet()) { cloneObj.mPositionTotalCountMap.put(key, mPositionTotalCountMap.get(key).clone()); } } return cloneObj; } //-------------------------------------------------------------------------- public void addSequence(T inSequence) { if (inSequence.length() != mMatrix.colKeySet().size()) { throw new RuntimeException("The added sequence (" + inSequence.getID() + ") is not the same length as the others for this " + getClass().getSimpleName() + "!"); } String sequenceString = inSequence.getSequence(); if (! mFlags.contains(Flag.CASE_SENSITIVE)) { sequenceString = sequenceString.toUpperCase(); } int position = 1; char prevResidue = ' '; for (char residue : sequenceString.toCharArray()) { if ('-' == residue) { Map gapMap; if (prevResidue != '-') { gapMap = mPositionGapOpenCountMap; } else { gapMap = mPositionGapExtCountMap; } Counter counter = gapMap.get(position); if (null == counter) { counter = new Counter(); gapMap.put(position, counter); } counter.increment(); } else { mPositionTotalCountMap.get(position).increment(); Counter counter = mMatrix.get(residue, position); if (null == counter) { counter = new Counter(); mMatrix.put(residue, position, counter); } counter.increment(); } position++; prevResidue = residue; } mSequenceCounter.increment(); } //-------------------------------------------------------------------------- public Set getResidueKeys() { return mMatrix.rowKeySet(); } //-------------------------------------------------------------------------- public Set getPositionKeys() { return mMatrix.colKeySet(); } //-------------------------------------------------------------------------- public void insertGapAtPosition(int inPosition) { List positionKeys = new ArrayList<>(getPositionKeys()); if (inPosition > 0 && inPosition <= positionKeys.get(positionKeys.size() - 1) + 1) { if (inPosition <= positionKeys.size()) { for (int i = positionKeys.size(); i >= inPosition; i--) { mMatrix.putCol(i + 1, mMatrix.getCol(i)); mPositionTotalCountMap.put(i + 1, mPositionTotalCountMap.get(i)); } } // Put all zero values in the new column for (Character residue : getResidueKeys()) { mMatrix.put(residue, inPosition, new Counter()); } int numGapsAtPrevPosition = (inPosition > 1 ? getGapCount(inPosition - 1) : 0); Counter counter = mPositionGapOpenCountMap.get(inPosition); if (null == counter) { counter = new Counter(); mPositionGapOpenCountMap.put(inPosition, counter); } counter.add(getPositionTotal(inPosition - 1)); counter = mPositionGapExtCountMap.get(inPosition); if (null == counter) { counter = new Counter(); mPositionGapExtCountMap.put(inPosition, counter); } counter.add(numGapsAtPrevPosition); mPositionTotalCountMap.put(inPosition, new Counter()); } } //-------------------------------------------------------------------------- public int getCount(Character inResidue, Integer inPosition) { int count = 0; if (inResidue == '-') { count = getGapCount(inPosition); } else { Counter counter = mMatrix.get(inResidue, inPosition); if (null == counter) { if (!getPositionKeys().contains(inPosition)) { throw new RuntimeException("Position out of bounds: " + inPosition + "!"); } else if (!getResidueKeys().contains(inResidue)) { throw new RuntimeException("Invalid residue: " + inResidue + "!"); } } count = counter.intValue(); } return count; } //-------------------------------------------------------------------------- public int getGapCount(Integer inPosition) { return getGapOpenCount(inPosition) + getGapExtCount(inPosition); } //-------------------------------------------------------------------------- public int getGapOpenCount(Integer inPosition) { Counter counter = mPositionGapOpenCountMap.get(inPosition); return (counter != null ? counter.intValue() : 0); } //-------------------------------------------------------------------------- public int getGapExtCount(Integer inPosition) { Counter counter = mPositionGapExtCountMap.get(inPosition); return (counter != null ? counter.intValue() : 0); } //-------------------------------------------------------------------------- /** Returns the residue with the highest frequency at the specified position or multiple residues if they share the highest frequency value. @param inPosition the (1-based) position to evaluate @return the residue with the highest frequency at the specified position or multiple residues if they share the highest frequency value */ public Set getHighestFreqResidues(Integer inPosition) { Map positionMap = mMatrix.getCol(inPosition); int maxCount = 0; for (Character key : positionMap.keySet()) { Counter counter = positionMap.get(key); if (counter.intValue() > maxCount) { maxCount = counter.intValue(); } } Set resultSet = new OrderedSet<>(10); for (Character key : positionMap.keySet()) { if (positionMap.get(key).intValue() == maxCount) { resultSet.add(key); } } return resultSet; } //-------------------------------------------------------------------------- public Set getResidues(Integer inPosition) { return getResidues(inPosition, null); } //-------------------------------------------------------------------------- /** * Retrieves the residues at the specified position that are represented above * the specified minimum fraction. * @param inPosition the aligned sequence position * @param inMinFraction the minimum fraction for residues to return. Null returns * residues that are present in at least one sequence. * @return a Set of residue characters from the specified position */ public Set getResidues(Integer inPosition, Float inMinFraction) { if (! getPositionKeys().contains(inPosition)) { throw new RuntimeException("Position out of bounds: " + inPosition + "!"); } Map residueCountMap = mMatrix.getCol(inPosition); Set residues = new OrderedSet<>(30); float total = mPositionTotalCountMap.get(inPosition).floatValue(); for (Character residue : residueCountMap.keySet()) { if (inMinFraction != null) { if (residueCountMap.get(residue).intValue()/total > inMinFraction) { residues.add(residue); } } else if (residueCountMap.get(residue).intValue() > 0) { residues.add(residue); } } return residues; } //-------------------------------------------------------------------------- public float getFraction(Character inResidue, Integer inPosition) { int count = getCount(inResidue, inPosition); float fraction; if (inResidue == '-') { fraction = count / (float) (getPositionTotal(inPosition) + count); } else { fraction = count / (float) getPositionTotal(inPosition); } return fraction; } //-------------------------------------------------------------------------- public float getFractionIncludingGaps(Character inResidue, Integer inPosition) { return getCount(inResidue, inPosition) / (float) (getPositionTotal(inPosition) + getGapCount(inPosition)); } //-------------------------------------------------------------------------- /** Returns the number of sequences with a residue and not a gap at the specified position. * @param inPosition the 1-based position in the matrix being queried * @return the number of sequences with a residue and not a gap at the specified position */ public int getPositionTotal(Integer inPosition) { if (! getPositionKeys().contains(inPosition)) { throw new RuntimeException("Position out of bounds: " + inPosition + "!"); } return mPositionTotalCountMap.get(inPosition).intValue(); } //-------------------------------------------------------------------------- public Set getFlags() { return Collections.unmodifiableSet(mFlags); } //########################################################################## // PRIVATE METHODS //########################################################################## //-------------------------------------------------------------------------- private void setFlags(Flag[] inFlags) { mFlags.clear(); if (inFlags != null) { for (Flag flag : inFlags) { mFlags.add(flag); } } } //-------------------------------------------------------------------------- private void init(Set inResidueSet, int inLength) { // Initialize values to zero Set residueSet = new HashSet<>(30); for (Character residue : inResidueSet) { if (null == residue) { residue = 'X'; } if (! mFlags.contains(Flag.CASE_SENSITIVE)) { residue = Character.toUpperCase(residue); } residueSet.add(residue); } // Initialize the matrix and position total map with zeroed counters mMatrix = new SparseMatrix<>(residueSet.size(), inLength); mPositionTotalCountMap = new HashMap<>(inLength); for (int position = 1; position <= inLength; position++) { mPositionTotalCountMap.put(position, new Counter()); for (char residue : residueSet) { mMatrix.put(residue, position, new Counter()); } } mPositionGapOpenCountMap = new HashMap<>(inLength); mPositionGapExtCountMap = new HashMap<>(inLength); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy