All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.alignment.MultipleSequenceAlignment Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.alignment;

import java.util.*;

import com.hfg.bio.phylogeny.DistanceMatrix;
import com.hfg.bio.phylogeny.DistanceMatrixModel;
import com.hfg.bio.phylogeny.UncorrectedModel;
import com.hfg.bio.seq.PositionalFrequencyMatrix;
import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.BioSequenceType;
import com.hfg.exception.ProgrammingException;
import com.hfg.network.Edge;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.DataTable;
import com.hfg.util.collection.SparseMatrix;

//------------------------------------------------------------------------------
/**
 Container for aligned sequences.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class MultipleSequenceAlignment implements Cloneable { //************************************************************************** // PRIVATE FIELDS //************************************************************************** private String mId; private String mTitle; private List mAlignedSeqs = new ArrayList<>(); private int mAlignmentLength; private BioSequenceType mBioSequenceType; private PositionalFrequencyMatrix mPositionFreqMatrix; private DataTable mDataTable; private Map mAttributes; //************************************************************************** // CONSTRUCTORS //************************************************************************** //--------------------------------------------------------------------------- public MultipleSequenceAlignment() { } //--------------------------------------------------------------------------- public MultipleSequenceAlignment(Collection inAlignedSeqs) { if (inAlignedSeqs != null) { for (T seq : inAlignedSeqs) { addSequence(seq); } } } //************************************************************************** // PUBLIC METHODS //************************************************************************** //--------------------------------------------------------------------------- public MultipleSequenceAlignment setId(String inValue) { mId = inValue; return this; } //--------------------------------------------------------------------------- public String getId() { return mId; } //--------------------------------------------------------------------------- public MultipleSequenceAlignment setTitle(String inValue) { mTitle = inValue; return this; } //--------------------------------------------------------------------------- public String getTitle() { return mTitle; } //--------------------------------------------------------------------------- public MultipleSequenceAlignment setDataTable(DataTable inValue) { mDataTable = inValue; return this; } //--------------------------------------------------------------------------- public DataTable getDataTable() { return mDataTable; } //--------------------------------------------------------------------------- @Override public MultipleSequenceAlignment clone() { MultipleSequenceAlignment cloneObj; try { cloneObj = (MultipleSequenceAlignment) super.clone(); } catch (CloneNotSupportedException e) { throw new ProgrammingException(e); } if (mAlignedSeqs != null) { cloneObj.mAlignedSeqs = new ArrayList<>(mAlignedSeqs.size()); for (T seq : mAlignedSeqs) { cloneObj.mAlignedSeqs.add((T) seq.clone()); } } if (mPositionFreqMatrix != null) { cloneObj.mPositionFreqMatrix = mPositionFreqMatrix.clone(); } return cloneObj; } //--------------------------------------------------------------------------- public void clearCachedData() { mPositionFreqMatrix = null; } //--------------------------------------------------------------------------- public MultipleSequenceAlignment subset(Collection inSeqIds) { MultipleSequenceAlignment subset = clone(); subset.mAlignedSeqs.clear(); for (String seqId : inSeqIds) { subset.mAlignedSeqs.add(getSequence(seqId)); } // Force recalculation of the position freq. data subset.mPositionFreqMatrix = null; return subset; } //--------------------------------------------------------------------------- public void addSequence(T inSeq) { if (inSeq != null) { if (inSeq.length() != mAlignmentLength && CollectionUtil.hasValues(mAlignedSeqs)) { throw new RuntimeException(inSeq.getID() + "'s length of " + inSeq.length() + " is different from the alignment length (" + mAlignmentLength + ")! They must be the same."); } mAlignedSeqs.add(inSeq); if (0 == mAlignmentLength) mAlignmentLength = inSeq.length(); } if (mPositionFreqMatrix != null) { mPositionFreqMatrix.addSequence(inSeq); } } //--------------------------------------------------------------------------- public List getSequences() { return mAlignedSeqs; } //--------------------------------------------------------------------------- public T getSequence(String inSeqId) { T requestedSeq = null; for (T seq : getSequences()) { if (seq.getID().equals(inSeqId)) { requestedSeq = seq; break; } } return requestedSeq; } //--------------------------------------------------------------------------- public void removeSequence(T inSeq) { getSequences().remove(inSeq); clearCachedData(); } //--------------------------------------------------------------------------- public void addInsert(int inIndex) { for (T seq : getSequences()) { StringBuilder buffer = new StringBuilder(seq.getSequence()); buffer.insert(inIndex, "-"); seq.setSequence(buffer); } mAlignmentLength++; mPositionFreqMatrix = null; } //--------------------------------------------------------------------------- public int size() { return (getSequences() != null ? getSequences().size() : 0); } //--------------------------------------------------------------------------- public BioSequenceType getBioSequenceType() { if (null == mBioSequenceType && CollectionUtil.hasValues(mAlignedSeqs)) { mBioSequenceType = mAlignedSeqs.get(0).getType(); } return mBioSequenceType; } //--------------------------------------------------------------------------- public int length() { return mAlignmentLength; } //--------------------------------------------------------------------------- public PositionalFrequencyMatrix getPositionFreqMatrix() { return getPositionFreqMatrix(null); } //--------------------------------------------------------------------------- public PositionalFrequencyMatrix getPositionFreqMatrix(PositionalFrequencyMatrix.Flag[] inFlags) { if (mPositionFreqMatrix != null) { Set currentFlags = mPositionFreqMatrix.getFlags(); boolean flagsDiffer = true; if (null == inFlags) { if (! CollectionUtil.hasValues(currentFlags)) { flagsDiffer = false; } } else if (CollectionUtil.hasValues(currentFlags) && inFlags.length == currentFlags.size()) { for (PositionalFrequencyMatrix.Flag flag : inFlags) { flagsDiffer = false; if (! currentFlags.contains(flag)) { flagsDiffer = true; break; } } } if (flagsDiffer) { mPositionFreqMatrix = new PositionalFrequencyMatrix(this, inFlags); } } else { if (CollectionUtil.hasValues(getSequences())) { mPositionFreqMatrix = new PositionalFrequencyMatrix(this, inFlags); } } return mPositionFreqMatrix; } /* //--------------------------------------------------------------------------- public SparseMatrix getPositionProbabilityMatrix() { SparseMatrix probabilityMatrix = null; int size = size(); PositionalFrequencyMatrix freqMatrix = getPositionFreqMatrix(); if (freqMatrix != null) { probabilityMatrix = new SparseMatrix(); for (Character residue : freqMatrix.getResidueKeys()) { for (Integer position : freqMatrix.getPositionKeys()) { probabilityMatrix.put(residue, position, freqMatrix.getCount(residue, position) / (float) size); } } } return probabilityMatrix; } */ //--------------------------------------------------------------------------- /** Returns a percent identity matrix adjusted for any terminal gaps. @return the generated percent identity matrix */ public SparseMatrix getPctIdentityMatrix() { int matrixWidth = mAlignedSeqs != null ? mAlignedSeqs.size() + 10 : 10; SparseMatrix matrix = new SparseMatrix<>(matrixWidth, matrixWidth); if (mAlignedSeqs != null) { // Calculate the pct. id between ea. pair of sequences. // (The A-B pct. id is the not necessarily the same as the B-A pct. id.) for (int i = 0; i < mAlignedSeqs.size(); i++) { BioSequence seq1 = mAlignedSeqs.get(i); matrix.put(seq1.getID(), seq1.getID(), 100f); // Identity diagonal for (int j = i + 1; j < mAlignedSeqs.size(); j++) { BioSequence seq2 = mAlignedSeqs.get(j); AlignedQuery query = new AlignedQuery(seq1, seq1.getSequence(), 1); AlignedSubject subject = new AlignedSubject(seq2, seq2.getSequence(), 1); PairwiseSeqAlignment pairwiseSeqAlignment = new PairwiseSeqAlignment(query, subject); matrix.put(seq1.getID(), seq2.getID(), pairwiseSeqAlignment.getAdjustedPctIdentity()); // Invert query = new AlignedQuery(seq2, seq2.getSequence(), 1); subject = new AlignedSubject(seq1, seq1.getSequence(), 1); pairwiseSeqAlignment = new PairwiseSeqAlignment(query, subject); matrix.put(seq2.getID(), seq1.getID(), pairwiseSeqAlignment.getAdjustedPctIdentity()); } } } return matrix; } //--------------------------------------------------------------------------- /** Returns a distance matrix using the specified model. For a simple distance matrix based on mismatches and without any evolutionary compensation, use the UncorrectedModel. @param inAlgorithm the distance matrix mode to use when calculating the distance matrix @return the generated DistanceMatrix */ public DistanceMatrix getDistanceMatrix(DistanceMatrixModel inAlgorithm) { if (null == inAlgorithm) { throw new RuntimeException("A DistanceMatrixAlgorithm must be specified!"); } DistanceMatrix matrix = new DistanceMatrix(mAlignedSeqs != null ? mAlignedSeqs.size() + 10 : 10); if (mAlignedSeqs != null) { // Calculate the distance between ea. pair of sequences. // (The A-B distance is the same as the B-A distance.) for (int i = 0; i < mAlignedSeqs.size() - 1; i++) { BioSequence seq1 = mAlignedSeqs.get(i); for (int j = i + 1; j < mAlignedSeqs.size(); j++) { BioSequence seq2 = mAlignedSeqs.get(j); matrix.setDistance(seq1.getID(), seq2.getID(), inAlgorithm.calculateDistance(seq1, seq2)); } } } return matrix; } //--------------------------------------------------------------------------- public void orderByDistanceTo(String inSeqID) { if (null == getSequence(inSeqID)) { throw new RuntimeException("The MSA does not contain a sequence with id " + StringUtil.singleQuote(inSeqID) + "!"); } DistanceMatrix matrix = getDistanceMatrix(new UncorrectedModel()); List> sortedEdges = matrix.getSortedEdges(inSeqID); List resortedAlignedSeqs = new ArrayList(getSequences().size()); for (Edge edge : sortedEdges) { resortedAlignedSeqs.add(getSequence(edge.getTo())); } mAlignedSeqs = resortedAlignedSeqs; } //--------------------------------------------------------------------------- public List getPositionResidues(int inPosition) { List positionResidues = new ArrayList<>(size()); for (T seq : getSequences()) { positionResidues.add(seq.residueAt(inPosition)); } return positionResidues; } //--------------------------------------------------------------------------- public Set getPositionResidueSet(int inPosition) { Set positionResidues = new HashSet<>(20); for (T seq : getSequences()) { positionResidues.add(seq.residueAt(inPosition)); } return positionResidues; } //-------------------------------------------------------------------------- public void setAttribute(String inName, Object inValue) { if (null == mAttributes) { mAttributes = new HashMap<>(); } mAttributes.put(inName, inValue); } //-------------------------------------------------------------------------- public boolean hasAttribute(String inName) { return mAttributes != null && mAttributes.containsKey(inName); } //-------------------------------------------------------------------------- public Object getAttribute(String inName) { Object attr = null; if (mAttributes != null) { attr = mAttributes.get(inName); } return attr; } //-------------------------------------------------------------------------- public Collection getAttributeNames() { Collection attrNames = null; if (mAttributes != null) { attrNames = mAttributes.keySet(); } return attrNames; } //-------------------------------------------------------------------------- public void clearAttributes() { if (mAttributes != null) { mAttributes.clear(); } } //-------------------------------------------------------------------------- public Object removeAttribute(String inName) { Object attr = null; if (mAttributes != null) { attr = mAttributes.remove(inName); } return attr; } //************************************************************************** // PROTECTED METHODS //************************************************************************** //--------------------------------------------------------------------------- protected void setLength(int inValue) { mAlignmentLength = inValue; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy