org.biojava.nbio.ronn.Jronn Maven / Gradle / Ivy

Go to download
/*        BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.ronn;

import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.data.sequence.FastaSequence;
import org.biojava.nbio.data.sequence.SequenceUtil;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.stream.Collectors;


/**
 * This class gives public API to RONN functions.
 * It is build on top of the command line client. Due to this fact a few things
 * could be improved and extended pending the refactoring of the command line client.
 *
 * The input sequence limitations - the input sequence must not contain any ambiguous characters,
 * and have a minimum length of 19 amino acids.
 *
 * @author Peter Troshin
 * @version 1.0
 * @since 3.0.2
 *
 */
public class Jronn implements Serializable {

	/**
	 *
	 */
	private static final long serialVersionUID = 8104272449130849946L;
	// Load models
	private static final ModelLoader loader = new ModelLoader();
	static {
		try {
			loader.loadModels();
		} catch (NumberFormatException e) {
			throw new RuntimeException("Fails to load models!" + e.getMessage(), e);
		} catch (IOException e) {
			throw new RuntimeException("Fails to load models!" + e.getMessage(), e);
		}
	}


	/**
	 * Holder for the ranges, contain pointers to starting and ending position
	 * on the sequence which comprises a disordered region. Immutable.
	 * @author pvtroshin
	 */
	public static class Range {
		/**
		 * Range starting position counts from 1 (the first position on the sequence is 1)
		 */
		public final int from;
		/**
		 * The range ending position includes the last residue.
		 */
		public final int to;

		public final float score;
		public Range(int from, int to, float score) {
			assert from>=0;
			assert from0;
		assert probability>0 && probability<1;

		int count=0;
		int regionLen=0;
		List ranges = new ArrayList();
		for(float score: scores) {
			count++;
			// Round to 2 decimal points before comparison
			score = (float) (Math.round(score*100.0)/100.0);
			if(score>probability) {
				regionLen++;
			} else {
				if(regionLen>0) {
					ranges.add(new Range(count-regionLen, count-1,score));
				}
				regionLen=0;
			}
		}
		// In case of the range to boundary runs to the very end of the sequence
		if(regionLen>1) {
			ranges.add(new Range(count-regionLen+1, count,scores[scores.length-1]));
		}
		return ranges.toArray(new Range[ranges.size()]);

	}

	/**
	 * Calculates the probability of disorder scores for each residue in the sequence for
	 * many sequences in the input.
	 *
	 * @param sequences the list of the FastaSequence objects
	 * @return the Map with key->FastaSequence, value->probability of disorder for each residue
	 * @see #getDisorder(FastaSequence)
	 */
	public static Map getDisorderScores(List sequences) {
		Map results = new TreeMap();
		results = sequences.stream().collect(Collectors.toMap(fastaSequence ->  fastaSequence, fastaSequence -> predictSerial(fastaSequence)));
		return results;
	}

	/**
	 * Calculates the disordered regions of the sequence for many sequences in the input.
	 *
	 * @param sequences sequences the list of the FastaSequence objects
	 * @return
	 * @see #getDisorder(FastaSequence)
	 */
	public static Map getDisorder(List sequences) {
		Map disorderRanges = new TreeMap();
		disorderRanges = sequences.stream().collect(Collectors.toMap(fastaSequence -> fastaSequence, fastaSequence -> getDisorder(fastaSequence) ));
		return disorderRanges;
	}

	/**
	 * Calculates the disordered regions of the protein sequence.
	 * @param fastaFile input file name containing the sequence in FASTA
	 * @return the Map with key->FastaSequence, value->the list of disordered regions for each sequence
	 * @throws FileNotFoundException if the input file cannot be found
	 * @throws IOException of the system cannot access or read from the input file
	 * @see #getDisorder(FastaSequence)
	 * @see #Jronn.Range
	 */
	public static Map getDisorder(String fastaFile) throws FileNotFoundException, IOException {
		final List sequences = SequenceUtil.readFasta(new FileInputStream(fastaFile));
		return getDisorder(sequences);
	}

	/**
	 * TODO
	 *
	 * High performance method for calculating disorder. Use multiple threads to achieve the speedup.
	 *
	 * @param fastaFile  fully qualified path to the input FASTA file
	 * @param outputFile file name of the file for the results
	 * @param threadNumber the number of threads to use, default
	 * @param controls the format of the result file
	 * @throws FileNotFoundException if input file in not found
	 * @throws IOException if the input or the output files cannot be accessed
	 * @see ORonn.ResultLayout

	public static void calculateDisorder(String fastaFile, String outputFile, int threadNumber, ResultLayout layout) throws FileNotFoundException, IOException {
		final List sequences = SequenceUtil.readFasta(new FileInputStream(fastaFile));
		InputParameters in = new InputParameters();
		in.setFilePrm(fastaFile, InputParameters.inputKey);
		in.setFilePrm(outputFile, InputParameters.outputKey);
		//in.setThreadNum(Integer.toString(threadNumber));
		ORonn.predictParallel(sequences, in, loader);
	}
	*/
}