org.biojava.nbio.genome.parsers.gff.GFF3Writer Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.genome.parsers.gff;
import org.biojava.nbio.genome.GeneFeatureHelper;
import org.biojava.nbio.core.sequence.*;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
/**
*
* @author Scooter Willis
*/
public class GFF3Writer {
/**
* Output gff3 format for a DNA Sequence
* @param fileName
* @param chromosomeSequence
* @throws Exception
*/
public void write(OutputStream outputStream, LinkedHashMap chromosomeSequenceList) throws Exception {
outputStream.write("##gff-version 3\n".getBytes());
for (String key : chromosomeSequenceList.keySet()) {
ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(key);
String gff3line = "";
// if(source.length() == 0){
// Collection genes = chromosomeSequence.getGeneSequences().values();
// for(GeneSequence gene : genes){
// source = gene.getSource();
// break;
// }
// }
// gff3line = key + "\t" + source + "\t" + "size" + "\t" + "1" + "\t" + chromosomeSequence.getBioEnd() + "\t.\t.\t.\tName=" + key + "\r\n";
// outputStream.write(gff3line.getBytes());
for (GeneSequence geneSequence : chromosomeSequence.getGeneSequences().values()) {
gff3line = key + "\t" + geneSequence.getSource() + "\t" + "gene" + "\t" + geneSequence.getBioBegin() + "\t" + geneSequence.getBioEnd() + "\t";
Double score = geneSequence.getSequenceScore();
if (score == null) {
gff3line = gff3line + ".\t";
} else {
gff3line = gff3line + score + "\t";
}
gff3line = gff3line + geneSequence.getStrand().getStringRepresentation() + "\t";
gff3line = gff3line + ".\t";
gff3line = gff3line + "ID=" + geneSequence.getAccession().getID() + ";Name=" + geneSequence.getAccession().getID();
gff3line = gff3line + getGFF3Note(geneSequence.getNotesList());
gff3line = gff3line + "\n";
outputStream.write(gff3line.getBytes());
int transcriptIndex = 0;
for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) {
transcriptIndex++;
gff3line = key + "\t" + transcriptSequence.getSource() + "\t" + "mRNA" + "\t" + transcriptSequence.getBioBegin() + "\t" + transcriptSequence.getBioEnd() + "\t";
score = transcriptSequence.getSequenceScore();
if (score == null) {
gff3line = gff3line + ".\t";
} else {
gff3line = gff3line + score + "\t";
}
gff3line = gff3line + transcriptSequence.getStrand().getStringRepresentation() + "\t";
gff3line = gff3line + ".\t";
String id = geneSequence.getAccession().getID() + "." + transcriptIndex;
gff3line = gff3line + "ID=" + id + ";Parent=" + geneSequence.getAccession().getID() + ";Name=" + id;
gff3line = gff3line + getGFF3Note(transcriptSequence.getNotesList());
gff3line = gff3line + "\n";
outputStream.write(gff3line.getBytes());
String transcriptParentName = geneSequence.getAccession().getID() + "." + transcriptIndex;
ArrayList cdsSequenceList = new ArrayList(transcriptSequence.getCDSSequences().values());
Collections.sort(cdsSequenceList, new SequenceComparator());
for (CDSSequence cdsSequence : cdsSequenceList) {
gff3line = key + "\t" + cdsSequence.getSource() + "\t" + "CDS" + "\t" + cdsSequence.getBioBegin() + "\t" + cdsSequence.getBioEnd() + "\t";
score = cdsSequence.getSequenceScore();
if (score == null) {
gff3line = gff3line + ".\t";
} else {
gff3line = gff3line + score + "\t";
}
gff3line = gff3line + cdsSequence.getStrand().getStringRepresentation() + "\t";
gff3line = gff3line + cdsSequence.getPhase() + "\t";
gff3line = gff3line + "ID=" + cdsSequence.getAccession().getID() + ";Parent=" + transcriptParentName;
gff3line = gff3line + getGFF3Note(cdsSequence.getNotesList());
gff3line = gff3line + "\n";
outputStream.write(gff3line.getBytes());
}
}
}
}
}
private String getGFF3Note(ArrayList notesList) {
String notes = "";
if (notesList.size() > 0) {
notes = ";Note=";
int noteindex = 1;
for (String note : notesList) {
notes = notes + note;
if (noteindex < notesList.size() - 1) {
notes = notes + " ";
}
}
}
return notes;
}
public static void main(String args[]) throws Exception {
if (true) {
FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/geneid/geneid/c1-geneid.gff3");//-16
LinkedHashMap dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGeneIDGFF2(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), new File("/Users/Scooter/scripps/dyadic/geneid/geneid/c1_geneid.gff"));
GFF3Writer gff3Writer = new GFF3Writer();
gff3Writer.write(fo, dnaSequenceList);
// LinkedHashMap proteinSequenceList = GeneFeatureHelper.getProteinSequences(chromosomeSequenceList.values());
// for(String id : proteinSequenceList.keySet()){
// ProteinSequence sequence = proteinSequenceList.get(id);
// System.out.println(id + " " + sequence.getSequenceAsString());
// }
fo.close();
}
/*
if (false) {
FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/genemark_hmm.gff3");//-16
LinkedHashMap dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGeneMarkGTF(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/genemark_hmm.gtf"));
GFF3Writer gff3Writer = new GFF3Writer();
gff3Writer.write(fo, dnaSequenceList);
fo.close();
}
if (false) {
LinkedHashMap dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGlimmerGFF3(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds-16.fna"), new File("/Users/Scooter/scripps/dyadic/GlimmerHMM/c1_glimmerhmm-16.gff"));
GFF3Writer gff3Writer = new GFF3Writer();
gff3Writer.write(System.out, dnaSequenceList);
}
*/
// System.out.println(listGenes);
// GeneMarkGTF.write( list, args[1] );
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy