All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.format.IlluminaFASTQ Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.format;

import com.hfg.bio.seq.BioSequenceFactory;
import com.hfg.bio.seq.NucleicAcid;
import com.hfg.util.BooleanUtil;
import com.hfg.util.StringUtil;

//------------------------------------------------------------------------------
/**
 FASTQ sequence format from Illumina. Fields are parsed from the header line into
 attributes on the sequence object.
 
See Illumina's file format description.
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class IlluminaFASTQ extends FASTQ { //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public IlluminaFASTQ() { super(null); } //--------------------------------------------------------------------------- public IlluminaFASTQ(BioSequenceFactory inSeqFactory) { super(inSeqFactory); } //########################################################################### // PROTECTED METHODS //########################################################################### //--------------------------------------------------------------------------- // See: https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm // Header line format: // @::::::: ::: // // Ex: @HWI-M01141:63:A4NDL:1:1101:16668:1377 1:N:0:TATAGCGAGACACCGT // instrument = HWI-M01141 // run number = 63 // flowcell ID = A4NDL // lane = 1 // tile = 1101 // x-pos = 16668 // y-pos = 1377 // UMI (optional) = // read = 1 // is filtered = N // control number = 0 // index = TATAGCGAGACACCGT protected void parseHeaderLine(String inLine, T inSeq) { // Let the super class break the header line into id and description super.parseHeaderLine(inLine, inSeq); // Extract Illumina fields from the id String[] fields = inSeq.getID().split(":"); if (fields.length < 7 || fields.length > 8) { throw new SeqFormatException("Unexpected number of fields in the header id " + StringUtil.singleQuote(inSeq.getID()) + "!"); } inSeq.setAttribute("instrument", fields[0]); inSeq.setAttribute("run number", Integer.parseInt(fields[1])); inSeq.setAttribute("flowcell ID", fields[2]); inSeq.setAttribute("lane", Integer.parseInt(fields[3])); inSeq.setAttribute("tile", Integer.parseInt(fields[4])); inSeq.setAttribute("x-pos", Integer.parseInt(fields[5])); inSeq.setAttribute("y-pos", Integer.parseInt(fields[6])); if (8 == fields.length) { inSeq.setAttribute("UMI", fields[7]); } // Extract Illumina fields from the description fields = inSeq.getDescription().split(":"); if (fields.length != 4) { throw new SeqFormatException("Unexpected number of fields in the header description " + StringUtil.singleQuote(inSeq.getDescription()) + "!"); } inSeq.setAttribute("read", Integer.parseInt(fields[0])); inSeq.setAttribute("is filtered", BooleanUtil.valueOf(fields[1])); inSeq.setAttribute("control number", Integer.parseInt(fields[2])); inSeq.setAttribute("index", fields[3]); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy