maltcms.datastructures.fragments.PairwiseAlignment Maven / Gradle / Ivy
Show all versions of maltcms-commands Show documentation
/*
* Maltcms, modular application toolkit for chromatography-mass spectrometry.
* Copyright (C) 2008-2014, The authors of Maltcms. All rights reserved.
*
* Project website: http://maltcms.sf.net
*
* Maltcms may be used under the terms of either the
*
* GNU Lesser General Public License (LGPL)
* http://www.gnu.org/licenses/lgpl.html
*
* or the
*
* Eclipse Public License (EPL)
* http://www.eclipse.org/org/documents/epl-v10.php
*
* As a user/recipient of Maltcms, you may choose which license to receive the code
* under. Certain files or entire directories may not be covered by this
* dual license, but are subject to licenses compatible to both LGPL and EPL.
* License exceptions are explicitly declared in all relevant files or in a
* LICENSE file in the relevant directories.
*
* Maltcms is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. Please consult the relevant license documentation
* for details.
*/
package maltcms.datastructures.fragments;
import cross.Factory;
import cross.IConfigurable;
import cross.annotations.Configurable;
import cross.annotations.ProvidesVariables;
import cross.datastructures.fragments.IFileFragment;
import cross.datastructures.fragments.IVariableFragment;
import cross.datastructures.fragments.VariableFragment;
import cross.datastructures.tools.EvalTools;
import cross.datastructures.tools.FragmentTools;
import cross.datastructures.tuple.Tuple2D;
import cross.datastructures.tuple.Tuple2DI;
import cross.datastructures.workflow.DefaultWorkflow;
import cross.datastructures.workflow.IWorkflow;
import cross.datastructures.workflow.IWorkflowElement;
import cross.datastructures.workflow.WorkflowSlot;
import cross.io.IFileFragmentProvider;
import cross.tools.StringTools;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import maltcms.commands.distances.DtwRecurrence;
import maltcms.commands.distances.IRecurrence;
import maltcms.commands.distances.PairwiseFeatureSimilarity;
import maltcms.datastructures.alignment.DefaultPairSet;
import maltcms.datastructures.array.IArrayD2Double;
import maltcms.datastructures.ms.IAnchor;
import maltcms.io.csv.CSVWriter;
import maltcms.tools.PathTools;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.math3.analysis.UnivariateFunction;
import org.apache.commons.math3.analysis.interpolation.SplineInterpolator;
import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.jdom2.Element;
import org.slf4j.LoggerFactory;
import ucar.ma2.ArrayByte;
import ucar.ma2.ArrayDouble;
import ucar.ma2.ArrayDouble.D0;
import ucar.ma2.ArrayDouble.D1;
import ucar.ma2.DataType;
import ucar.nc2.Dimension;
/**
* Implementation of IFileFragmentProvider for PairwiseAlignment.
*
* @author Nils Hoffmann
*
*/
@ProvidesVariables(names = {"var.minimizing_array_comp"})
public class PairwiseAlignment implements IFileFragmentProvider, IConfigurable,
IWorkflowElement {
private static final org.slf4j.Logger log = LoggerFactory.getLogger(PairwiseAlignment.class);
private IFileFragment ff;
@Configurable(name = "alignment.save.pairwise.distance.matrix")
private boolean savePWDM;
@Configurable(name = "alignment.save.cumulative.distance.matrix")
private boolean saveCDM;
private IArrayD2Double alignment;
private IArrayD2Double distance;
private boolean isMinimize;
private int refsize, querysize;
private IRecurrence cd;
private D0 result;
private D1 resultVector;
private PairwiseFeatureSimilarity pwd;
private IFileFragment ref;
private IFileFragment target;
private List path;
private Class> creator;
private List localPathOptima;
private DefaultPairSet anchors;
@Configurable(name = "var.alignment.cumulative_distance")
private String cumulativeDistanceVariableName = "cumulative_distance";
@Configurable(name = "var.alignment.pairwise_distance")
private String pairwiseDistanceVariableName = "pairwise_distance";
@Configurable(name = "var.minimizing_array_comp")
private String arrayComparatorVariableName = "minimizing_array_comp";
@Configurable(name = "alignment.algorithm.distance")
private String arrayDistanceClassName = "maltcms.commands.distances.ArrayLp";
private IWorkflow iw;
private ArrayByte.D2 predecessors;
@Configurable(name = "normalizeAlignmentValueByMapWeights")
private boolean normalizeAlignmentValueByMapWeights;
private List interppath;
/**
* Getter for the field interppath
.
*
* @return a {@link java.util.List} object.
*/
public List getInterppath() {
return interppath;
}
/**
* Setter for the field interppath
.
*
* @param interppath a {@link java.util.List} object.
*/
public void setInterppath(List interppath) {
this.interppath = interppath;
}
/*
* (non-Javadoc)
*
* @see cross.io.misc.IXMLSerializable#appendXML(org.jdom.Element)
*/
/** {@inheritDoc} */
@Override
public void appendXML(final Element e) {
}
/** {@inheritDoc} */
@Override
public void configure(final Configuration cfg) {
this.saveCDM = cfg.getBoolean(
"alignment.save.cumulative.distance.matrix", false);
this.savePWDM = cfg.getBoolean(
"alignment.save.pairwise.distance.matrix", false);
this.cumulativeDistanceVariableName = cfg.getString(
"var.alignment.cumulative_distance", "cumulative_distance");
this.pairwiseDistanceVariableName = cfg.getString(
"var.alignment.pairwise_distance", "pairwise_distance");
this.arrayComparatorVariableName = cfg.getString(
"var.minimizing_array_comp", "minimizing_array_comp");
this.arrayDistanceClassName = cfg.getString(
"alignment.algorithm.distance",
"maltcms.commands.distances.ArrayLp");
this.normalizeAlignmentValueByMapWeights = cfg.getBoolean(this
.getClass().getName()
+ ".normalizeAlignmentValueByMapWeights", false);
}
/**
* Getter for the field alignment
.
*
* @return the alignment
*/
public IArrayD2Double getAlignment() {
return this.alignment;
}
/**
* Getter for the field arrayDistanceClassName
.
*
* @return the arrayDistanceClassName
*/
public String getArrayDistanceClassName() {
return this.arrayDistanceClassName;
}
/**
* Getter for the field cd
.
*
* @return the cumulativeDistanceClass
*/
public IRecurrence getCd() {
return this.cd;
}
/**
* Getter for the field creator
.
*
* @return the creator
*/
public Class> getCreator() {
return this.creator;
}
/**
* Getter for the field distance
.
*
* @return the distance
*/
public IArrayD2Double getDistance() {
return this.distance;
}
/**
* Getter for the field ff
.
*
* @return the ff
*/
public IFileFragment getFf() {
return this.ff;
}
/**
* Retrieve paired anchors
*
* @return a {@link maltcms.datastructures.alignment.DefaultPairSet} object.
*/
public DefaultPairSet getAnchors() {
return anchors;
}
/**
* Set paired anchors
*
* @param anchors a {@link maltcms.datastructures.alignment.DefaultPairSet} object.
*/
public void setAnchors(DefaultPairSet anchors) {
this.anchors = anchors;
}
/*
* (non-Javadoc)
*
* @see cross.datastructures.workflow.IWorkflowElement#getWorkflow()
*/
/** {@inheritDoc} */
@Override
public IWorkflow getWorkflow() {
return this.iw;
}
/**
* Getter for the field path
.
*
* @return the path
*/
public List getPath() {
return this.path;
}
/**
* Getter for the field predecessors
.
*
* @return the predecessors
*/
public ArrayByte.D2 getPredecessors() {
return this.predecessors;
}
/**
* Getter for the field pwd
.
*
* @return the pwd
*/
public PairwiseFeatureSimilarity getPwd() {
return this.pwd;
}
/**
* Getter for the field querysize
.
*
* @return the querysize
*/
public int getQuerysize() {
return this.querysize;
}
/**
* Getter for the field ref
.
*
* @return the ref
*/
public IFileFragment getRef() {
return this.ref;
}
/**
* Getter for the field refsize
.
*
* @return the refsize
*/
public int getRefsize() {
return this.refsize;
}
/**
* Getter for the field result
.
*
* @return the result
*/
public D0 getResult() {
return this.result;
}
/**
* Getter for the field resultVector
.
*
* @return the resultVector
*/
public D1 getResultVector() {
return this.resultVector;
}
/**
* Getter for the field target
.
*
* @return the target
*/
public IFileFragment getTarget() {
return this.target;
}
/*
* (non-Javadoc)
*
* @see cross.datastructures.workflow.IWorkflowElement#getWorkflowSlot()
*/
/** {@inheritDoc} */
@Override
public WorkflowSlot getWorkflowSlot() {
return WorkflowSlot.ALIGNMENT;
}
/**
* isMinimize.
*
* @return the isMinimize
*/
public boolean isMinimize() {
return this.isMinimize;
}
/**
* isNormalizeByMapWeights.
*
* @return the normalizeAlignmentValueByMapWeights
*/
public boolean isNormalizeByMapWeights() {
return this.normalizeAlignmentValueByMapWeights;
}
/**
* isSaveCDM.
*
* @return the saveCDM
*/
public boolean isSaveCDM() {
return this.saveCDM;
}
/**
* isSavePWDM.
*
* @return the savePWDM
*/
public boolean isSavePWDM() {
return this.savePWDM;
}
/** {@inheritDoc} */
@Override
public IFileFragment provideFileFragment() {
if (this.ff == null) {
long start = System.currentTimeMillis();
if (this.iw == null) {
this.iw = new DefaultWorkflow();
}
this.ff = FragmentTools.createFragment(this.ref, this.target,
getWorkflow().getOutputDirectory(this));
final PathTools pt = Factory.getInstance().getObjectFactory()
.instantiate(PathTools.class);
long time = start;
if (this.path == null) {
EvalTools.notNull(new Object[]{this.ff, this.alignment,
this.distance, this.isMinimize, this.cd}, this);
// path = pt.makeMap(ff, this.alignment, this.distance,
// this.isMinimize, this.cd);
this.path = pt.traceback(this.predecessors, this.ref,
this.target);
// this.interppath = interpolatePath(this.path, this.alignment);
time = System.currentTimeMillis() - start;
log.info("Calculated traceback in {} milliseconds", time);
start = System.currentTimeMillis();
pt.savePathCSV(this.ff, this.alignment, this.distance,
this.path, getWorkflow(), isMinimize());
pt.decorate(this.ff, this.distance);
} else {
PathTools.getFragments(this.ff, this.path, this.distance);
}
List> rows = new ArrayList<>();
List row = new ArrayList<>();
row.add(this.ref.getName());
row.add(this.target.getName());
rows.add(row);
CSVWriter csvw = Factory.getInstance().getObjectFactory()
.instantiate(CSVWriter.class);
csvw.setWorkflow(getWorkflow());
csvw.writeTableByRows(getWorkflow().getOutputDirectory(this)
.getAbsolutePath(), StringTools.removeFileExt(this.ff
.getName())
+ "_names.txt", rows, WorkflowSlot.ALIGNMENT);
if (this.anchors != null) {
// row.add(this.ref.getName());
// row.add(this.target.getName());
// rows.add(row);
for (Tuple2D t : this.anchors) {
row = new ArrayList<>();
row.add(t.getFirst().getScanIndex() + "");
row.add(t.getSecond().getScanIndex() + "");
rows.add(row);
}
CSVWriter csvw2 = Factory.getInstance().getObjectFactory()
.instantiate(CSVWriter.class);
csvw2.setWorkflow(getWorkflow());
csvw2.writeTableByRows(getWorkflow().getOutputDirectory(this)
.getAbsolutePath(), StringTools.removeFileExt(this.ff
.getName())
+ "_anchors.csv", rows, WorkflowSlot.ALIGNMENT);
}
final double expw = pt.getNexp()
* this.pwd.getSimilarityFunction().getExpansionWeight();
final double compw = pt.getNcomp()
* this.pwd.getSimilarityFunction().getCompressionWeight();
final double diagw = pt.getNdiag()
* this.pwd.getSimilarityFunction().getMatchWeight();
final double gapPenaltiesW = (pt.getNexp() + pt.getNcomp())
* this.cd.getGlobalGapPenalty();
// log.info("Alignment arrayDistanceClassName: {}",
// this.alignment.get(this.alignment
// .rows() - 1, this.alignment.columns() - 1)
// / path.size());
final int maplength = this.path.size();
if (this.saveCDM) {
final IVariableFragment vf = new VariableFragment(this.ff,
this.cumulativeDistanceVariableName, null);
vf.setDimensions(new Dimension[]{
new Dimension("reference_scan", this.refsize, true,
false, false),
new Dimension("query_scan", this.querysize, true,
false, false)});
vf.setDataType(DataType.DOUBLE);
vf.setArray(this.alignment.getArray());
// CSVWriter csvw = new CSVWriter();
// csvw.write(new File(target.getAbsolutePath()).getParent(),
// target.getName()+"_cdist.csv", this.alignment);
}
if (this.savePWDM) {
final IVariableFragment vf = new VariableFragment(this.ff,
this.pairwiseDistanceVariableName, null);
vf.setDimensions(new Dimension[]{
new Dimension("reference_scan", this.refsize, true,
false, false),
new Dimension("query_scan", this.querysize, true,
false, false)});
vf.setDataType(DataType.DOUBLE);
vf.setArray(this.distance.getArray());
// CSVWriter csvw = new CSVWriter();
// csvw.write(new File(target.getAbsolutePath()).getParent(),
// target.getName()+"_pwdist.csv", this.distance);
}
String arrayComparatorVariableName = Factory.getInstance()
.getConfiguration().getString(
"var.alignment.pairwise_distance.class",
"pairwise_distance_class");
String arrayDistanceClassName = Factory.getInstance()
.getConfiguration().getString(
"var.alignment.cumulative_distance.class",
"cumulative_distance_class");
String alignmentClassVariableName = Factory.getInstance()
.getConfiguration().getString("var.alignment.class",
"alignment_class");
String alignmentClassName = "maltcms.commands.distances.dtw.MZIDynamicTimeWarp";
FragmentTools.createString(this.ff, arrayComparatorVariableName,
this.pwd.getSimilarityFunction().getClass().getName());
FragmentTools.createString(this.ff, arrayDistanceClassName, this.cd
.getClass().getName());
FragmentTools.createString(this.ff, alignmentClassVariableName,
alignmentClassName);
if (this.result == null) {
this.result = new ArrayDouble.D0();
double distance1 = this.alignment
.get(this.alignment.rows() - 1, this.alignment
.columns() - 1);
if (this.normalizeAlignmentValueByMapWeights) {
distance1 = (distance1 - gapPenaltiesW)
/ (expw + compw + diagw);
log.info(
"Alignment value normalized by path weights: {}",
distance1);
}
this.result.set(distance1);
}
final String distvar = Factory.getInstance().getConfiguration()
.getString("var.alignment.distance", "distance");
final IVariableFragment dvar = new VariableFragment(this.ff,
distvar);
dvar.setArray(this.result);
this.resultVector = new ArrayDouble.D1(1);
this.resultVector.set(0, this.result.get());
time = System.currentTimeMillis() - start;
log.debug("Set Variables on {} in {} milliseconds", this.ff
.getName(), time);
}
return this.ff;
}
private List interpolatePath(List path,
IArrayD2Double alignment) {
List interp = new ArrayList<>();
List strictlyIncreasingPoints = new ArrayList<>();
try {
Tuple2DI p = path.get(0);// start point
strictlyIncreasingPoints.add(p);
for (int i = 0; i < path.size() - 1; i++) {
Tuple2DI q = path.get(i);
if (q.getFirst() > p.getFirst()
&& q.getSecond() > p.getSecond()) {
if (i < 10) {
log.info("Adding q={}", q);
}
strictlyIncreasingPoints.add(q);
p = q;
}
}
p = strictlyIncreasingPoints
.get(strictlyIncreasingPoints.size() - 1);
Tuple2DI q = new Tuple2DI(alignment.rows() - 1,
alignment.columns() - 1);
if (q.getFirst() > p.getFirst() && q.getSecond() > p.getSecond()) {
strictlyIncreasingPoints.add(q);
} else {
strictlyIncreasingPoints
.remove(strictlyIncreasingPoints.size() - 1);
strictlyIncreasingPoints.add(q);
}
log.info("Number of Surviving Points: {}",
strictlyIncreasingPoints.size());
double[] x = new double[strictlyIncreasingPoints.size()];
double[] y = new double[strictlyIncreasingPoints.size()];
for (int i = 0; i < strictlyIncreasingPoints.size(); i++) {
x[i] = strictlyIncreasingPoints.get(i).getFirst();
y[i] = strictlyIncreasingPoints.get(i).getSecond();
}
log.info("x = {}", Arrays.toString(Arrays
.copyOfRange(x, 0, 10)));
log.info("y = {}", Arrays.toString(Arrays
.copyOfRange(y, 0, 10)));
UnivariateInterpolator interpolator = new SplineInterpolator();
UnivariateFunction function = interpolator.interpolate(x, y);
for (int i = 0; i < alignment.rows(); i += 10) {
Tuple2DI ip = new Tuple2DI(i, (int) (Math.round(function
.value((double) i))));
interp.add(ip);
}
if (interp.get(interp.size() - 1).getFirst() != alignment.columns() - 1) {
Tuple2DI ip = new Tuple2DI(alignment.columns() - 1,
(int) (Math.round(function.value((double) alignment
.columns() - 1))));
interp.add(ip);
}
} catch (MathIllegalArgumentException e) {
log.error(e.getLocalizedMessage());
}
return interp;
}
/**
* Setter for the field alignment
.
*
* @param al a {@link maltcms.datastructures.array.IArrayD2Double} object.
*/
public void setAlignment(final IArrayD2Double al) {
// ArrayDouble.D2 d2 = new ArrayDouble.D2(al.columns(),al.rows());
// for(int i=0;iSetter for the field arrayDistanceClassName
.
*
* @param arrayDistanceClassName the arrayDistanceClassName to set
*/
public void setArrayDistanceClassName(final String arrayDistanceClassName) {
this.arrayDistanceClassName = arrayDistanceClassName;
}
/**
* Setter for the field cd
.
*
* @param cd a {@link maltcms.commands.distances.IRecurrence} object.
*/
public void setCd(final IRecurrence cd) {
this.cd = cd;
}
/**
* Setter for the field creator
.
*
* @param creator the creator to set
*/
public void setCreator(final Class> creator) {
this.creator = creator;
}
/**
* setCumulativeDistance.
*
* @param cd1 a {@link maltcms.commands.distances.DtwRecurrence} object.
*/
public void setCumulativeDistance(final DtwRecurrence cd1) {
this.cd = cd1;
}
/**
* Setter for the field distance
.
*
* @param distance the distance to set
*/
public void setDistance(final IArrayD2Double distance) {
this.distance = distance;
}
/**
* Setter for the field ff
.
*
* @param ff the ff to set
*/
public void setFf(final IFileFragment ff) {
this.ff = ff;
}
/**
* setFileFragments.
*
* @param ref1 a {@link cross.datastructures.fragments.IFileFragment} object.
* @param target1 a {@link cross.datastructures.fragments.IFileFragment} object.
* @param creator1 a {@link java.lang.Class} object.
*/
public void setFileFragments(final IFileFragment ref1,
final IFileFragment target1, final Class> creator1) {
this.ref = ref1;
this.target = target1;
this.creator = creator1;
}
/**
* setIsMinimizing.
*
* @param b a boolean.
*/
public void setIsMinimizing(final boolean b) {
this.isMinimize = b;
}
/*
* (non-Javadoc)
*
* @seecross.datastructures.workflow.IWorkflowElement#setWorkflow(cross.
* datastructures.workflow.IWorkflow)
*/
/** {@inheritDoc} */
@Override
public void setWorkflow(final IWorkflow iw1) {
this.iw = iw1;
}
/**
* setMinimize.
*
* @param isMinimize the isMinimize to set
*/
public void setMinimize(final boolean isMinimize) {
this.isMinimize = isMinimize;
}
/**
* setNormalizeByMapLength.
*
* @param normalizeByMapWeights a boolean.
*/
public void setNormalizeByMapLength(final boolean normalizeByMapWeights) {
this.normalizeAlignmentValueByMapWeights = normalizeByMapWeights;
}
/**
* setNumberOfScansQuery.
*
* @param n a int.
*/
public void setNumberOfScansQuery(final int n) {
this.querysize = n;
}
/**
* setNumberOfScansReference.
*
* @param n a int.
*/
public void setNumberOfScansReference(final int n) {
this.refsize = n;
}
/**
* setPairwiseDistance.
*
* @param pwd1 a {@link maltcms.commands.distances.PairwiseFeatureSimilarity} object.
*/
public void setPairwiseDistance(final PairwiseFeatureSimilarity pwd1) {
this.pwd = pwd1;
}
/**
* setPairwiseDistances.
*
* @param pwd1 a {@link maltcms.datastructures.array.IArrayD2Double} object.
*/
public void setPairwiseDistances(final IArrayD2Double pwd1) {
// ArrayDouble.D2 d2 = new ArrayDouble.D2(pwd1.columns(),pwd1.rows());
// for(int i=0;iSetter for the field path
.
*
* @param path1 a {@link java.util.List} object.
*/
public void setPath(final List path1) {
this.path = path1;
}
/**
* Setter for the field predecessors
.
*
* @param predecessors the predecessors to set
*/
public void setPredecessors(final ArrayByte.D2 predecessors) {
this.predecessors = predecessors;
}
/**
* Setter for the field pwd
.
*
* @param pwd the pwd to set
*/
public void setPwd(final PairwiseFeatureSimilarity pwd) {
this.pwd = pwd;
}
/**
* Setter for the field querysize
.
*
* @param querysize the querysize to set
*/
public void setQuerysize(final int querysize) {
this.querysize = querysize;
}
/**
* Setter for the field ref
.
*
* @param ref the ref to set
*/
public void setRef(final IFileFragment ref) {
this.ref = ref;
}
/**
* Setter for the field refsize
.
*
* @param refsize the refsize to set
*/
public void setRefsize(final int refsize) {
this.refsize = refsize;
}
/**
* Setter for the field result
.
*
* @param d a double.
*/
public void setResult(final double d) {
this.result = new ArrayDouble.D0();
this.result.set(d);
}
/**
* Setter for the field saveCDM
.
*
* @param saveCDM the saveCDM to set
*/
public void setSaveCDM(final boolean saveCDM) {
this.saveCDM = saveCDM;
}
/**
* Setter for the field savePWDM
.
*
* @param savePWDM the savePWDM to set
*/
public void setSavePWDM(final boolean savePWDM) {
this.savePWDM = savePWDM;
}
/**
* Setter for the field target
.
*
* @param target the target to set
*/
public void setTarget(final IFileFragment target) {
this.target = target;
}
/**
* setTraceMatrix.
*
* @param predecessors1 a {@link ucar.ma2.ArrayByte.D2} object.
*/
public void setTraceMatrix(final ArrayByte.D2 predecessors1) {
this.predecessors = predecessors1;
}
}