All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.genome.io.fastq.FastqTools Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.genome.io.fastq;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
import org.biojava.nbio.core.sequence.features.QualityFeature;
import org.biojava.nbio.core.sequence.features.QuantityFeature;
import org.biojava.nbio.core.sequence.template.AbstractSequence;

import java.util.List;

/**
 * Utility methods for FASTQ formatted sequences.
 *
 * @since 3.0.3
 */
public final class FastqTools
{

	/**
	 * Private no-arg constructor.
	 */
	private FastqTools()
	{
		// empty
	}


	/**
	 * Create and return a new {@link DNASequence} from the specified FASTQ formatted sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return a new {@link DNASequence} from the specified FASTQ formatted sequence
	 * @throws CompoundNotFoundException if DNA sequence in fastq contains unrecognised compounds
	 */
	public static DNASequence createDNASequence(final Fastq fastq) throws CompoundNotFoundException
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		DNASequence sequence = new DNASequence(fastq.getSequence());
		sequence.setOriginalHeader(fastq.getDescription());
		return sequence;
	}

	/**
	 * Create and return a new {@link DNASequence} with quality scores from the specified
	 * FASTQ formatted sequence.  The quality scores are stored in a {@link QualityFeature}
	 * with a type "qualityScores" the same length as the sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return a new {@link DNASequence} with quality scores from the specified FASTQ formatted sequence
	 * @throws CompoundNotFoundException if DNA sequence in fastq contains unrecognised compounds
	 */
	public static DNASequence createDNASequenceWithQualityScores(final Fastq fastq) throws CompoundNotFoundException
	{
		DNASequence sequence = createDNASequence(fastq);
		sequence.addFeature(1, sequence.getLength(), createQualityScores(fastq));
		return sequence;
	}

	/**
	 * Create and return a new {@link DNASequence} with error probabilities from the specified
	 * FASTQ formatted sequence.  The error probabilities are stored in a {@link QuantityFeature}
	 * with a type "errorProbabilities" the same length as the sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return a new {@link DNASequence} with error probabilities from the specified FASTQ formatted sequence
	 * @throws CompoundNotFoundException if DNA sequence in fastq contains unrecognised compounds
	 */
	public static DNASequence createDNASequenceWithErrorProbabilities(final Fastq fastq) throws CompoundNotFoundException
	{
		DNASequence sequence = createDNASequence(fastq);
		sequence.addFeature(1, sequence.getLength(), createErrorProbabilities(fastq));
		return sequence;
	}

	/**
	 * Create and return a new {@link DNASequence} with quality scores and error probabilities from the
	 * specified FASTQ formatted sequence.  The quality scores are stored in a {@link QualityFeature}
	 * with a type "qualityScores" the same length as the sequence and the error
	 * probabilities are stored in a {@link QuantityFeature} with a type "errorProbabilities"
	 * the same length as the sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return a new {@link DNASequence} with quality scores and error probabilities from the specified
	 *    FASTQ formatted sequence
	 * @throws CompoundNotFoundException if DNA sequence in fastq contains unrecognised compounds
	 */
	public static DNASequence createDNASequenceWithQualityScoresAndErrorProbabilities(final Fastq fastq) throws CompoundNotFoundException
	{
		DNASequence sequence = createDNASequence(fastq);
		sequence.addFeature(1, sequence.getLength(), createQualityScores(fastq));
		sequence.addFeature(1, sequence.getLength(), createErrorProbabilities(fastq));
		return sequence;
	}

	/**
	 * Create and return a new {@link QualityFeature} from the quality scores of the specified
	 * FASTQ formatted sequence.  The quality scores feature has a type "qualityScores"
	 * and will be the same length as the sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return a new {@link QualityFeature} from the quality scores of the specified FASTQ
	 *    formatted sequence
	 */
	public static QualityFeature, NucleotideCompound> createQualityScores(final Fastq fastq)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		QualityFeature, NucleotideCompound> qualityScores = new QualityFeature<>("qualityScores", "sequencing");
		qualityScores.setQualities(toList(qualityScores(fastq)));
		return qualityScores;
	}

	/**
	 * Create and return a new {@link QuantityFeature} from the error probabilities of the specified
	 * FASTQ formatted sequence.  The error probabilities feature has a type "errorProbabilities"
	 * and will be the same length as the sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return a new {@link QualityFeature} from the error probabilities of the specified FASTQ
	 *    formatted sequence
	 */
	public static QuantityFeature, NucleotideCompound> createErrorProbabilities(final Fastq fastq)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		QuantityFeature, NucleotideCompound> errorProbabilities = new QuantityFeature<>("errorProbabilities", "sequencing");
		errorProbabilities.setQuantities(toList(errorProbabilities(fastq)));
		return errorProbabilities;
	}

	/**
	 * Return the quality scores from the specified FASTQ formatted sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return the quality scores from the specified FASTQ formatted sequence
	 */
	public static Iterable qualityScores(final Fastq fastq)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		int size = fastq.getQuality().length();
		List qualityScores = Lists.newArrayListWithExpectedSize(size);
		FastqVariant variant = fastq.getVariant();
		for (int i = 0; i < size; i++)
		{
			char c = fastq.getQuality().charAt(i);
			qualityScores.add(variant.qualityScore(c));
		}
		return ImmutableList.copyOf(qualityScores);
	}

	/**
	 * Copy the quality scores from the specified FASTQ formatted sequence into the specified int array.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @param qualityScores int array of quality scores, must not be null and must be the same
	 *    length as the FASTQ formatted sequence quality
	 * @return the specified int array of quality scores
	 */
	public static int[] qualityScores(final Fastq fastq, final int[] qualityScores)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		if (qualityScores == null)
		{
			throw new IllegalArgumentException("qualityScores must not be null");
		}
		int size = fastq.getQuality().length();
		if (qualityScores.length != size)
		{
			throw new IllegalArgumentException("qualityScores must be the same length as the FASTQ formatted sequence quality");
		}
		FastqVariant variant = fastq.getVariant();
		for (int i = 0; i < size; i++)
		{
			char c = fastq.getQuality().charAt(i);
			qualityScores[i] = variant.qualityScore(c);
		}
		return qualityScores;
	}

	/**
	 * Return the error probabilities from the specified FASTQ formatted sequence.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @return the error probabilities from the specified FASTQ formatted sequence
	 */
	public static Iterable errorProbabilities(final Fastq fastq)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		int size = fastq.getQuality().length();
		List errorProbabilities = Lists.newArrayListWithExpectedSize(size);
		FastqVariant variant = fastq.getVariant();
		for (int i = 0; i < size; i++)
		{
			char c = fastq.getQuality().charAt(i);
			errorProbabilities.add(variant.errorProbability(c));
		}
		return ImmutableList.copyOf(errorProbabilities);
	}

	/**
	 * Copy the error probabilities from the specified FASTQ formatted sequence into the specified double array.
	 *
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @param errorProbabilities double array of error probabilities, must not be null and must be the same
	 *    length as the FASTQ formatted sequence quality
	 * @return the specified double array of error probabilities
	 */
	public static double[] errorProbabilities(final Fastq fastq, final double[] errorProbabilities)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		if (errorProbabilities == null)
		{
			throw new IllegalArgumentException("errorProbabilities must not be null");
		}
		int size = fastq.getQuality().length();
		if (errorProbabilities.length != size)
		{
			throw new IllegalArgumentException("errorProbabilities must be the same length as the FASTQ formatted sequence quality");
		}
		FastqVariant variant = fastq.getVariant();
		for (int i = 0; i < size; i++)
		{
			char c = fastq.getQuality().charAt(i);
			errorProbabilities[i] = variant.errorProbability(c);
		}
		return errorProbabilities;
	}

	/**
	 * Convert the specified FASTQ formatted sequence to the
	 * specified FASTQ sequence format variant.
	 *
	 * @since 4.2
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @param variant FASTQ sequence format variant, must not be null
	 * @return the specified FASTQ formatted sequence converted to the
	 *    specified FASTQ sequence format variant
	 */
	public static Fastq convert(final Fastq fastq, final FastqVariant variant)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		if (variant == null)
		{
			throw new IllegalArgumentException("variant must not be null");
		}
		if (fastq.getVariant().equals(variant))
		{
			return fastq;
		}
		return new Fastq(fastq.getDescription(), fastq.getSequence(), convertQualities(fastq, variant), variant);
	}

	/**
	 * Convert the qualities in the specified FASTQ formatted sequence to the
	 * specified FASTQ sequence format variant.
	 *
	 * @since 4.2
	 * @param fastq FASTQ formatted sequence, must not be null
	 * @param variant FASTQ sequence format variant, must not be null
	 * @return the qualities in the specified FASTQ formatted sequence converted to the
	 *    specified FASTQ sequence format variant
	 */
	static String convertQualities(final Fastq fastq, final FastqVariant variant)
	{
		if (fastq == null)
		{
			throw new IllegalArgumentException("fastq must not be null");
		}
		if (variant == null)
		{
			throw new IllegalArgumentException("variant must not be null");
		}
		if (fastq.getVariant().equals(variant))
		{
			return fastq.getQuality();
		}
		int size = fastq.getQuality().length();
		double[] errorProbabilities = errorProbabilities(fastq, new double[size]);
		StringBuilder sb = new StringBuilder(size);
		for (int i = 0; i < size; i++)
		{
			sb.append(variant.quality(variant.qualityScore(errorProbabilities[i])));
		}
		return sb.toString();
	}

	/**
	 * Return the specified iterable as a list.
	 *
	 * @paam  element type
	 * @param iterable iterable
	 * @return the specified iterable as a list
	 */
	@SuppressWarnings("unchecked")
	static  List toList(final Iterable iterable)
	{
		if (iterable instanceof List)
		{
			return (List) iterable;
		}
		return ImmutableList.copyOf(iterable);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy