org.biojava.nbio.genome.parsers.cytoband.CytobandParser Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* created at 20 Feb 2014
* Author: ap3
*/
package org.biojava.nbio.genome.parsers.cytoband;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
/**
* Parses the cytoband (karyotype) file from UCSC.
*
*/
public class CytobandParser {
private static final Logger logger = LoggerFactory
.getLogger(CytobandParser.class);
public static final String DEFAULT_LOCATION = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz";
public static void main(String[] args) {
CytobandParser me = new CytobandParser();
try {
SortedSet cytobands = me.getAllCytobands(new URL(
DEFAULT_LOCATION));
SortedSet types = new TreeSet();
for (Cytoband c : cytobands) {
logger.info("Cytoband: {}", c);
if (!types.contains(c.getType()))
types.add(c.getType());
}
logger.info("Strain Type: {}", types);
} catch (Exception e) {
// TODO Auto-generated catch block
logger.error("Exception: ", e);
}
}
public SortedSet getAllCytobands(URL u) throws IOException {
InputStream stream = new GZIPInputStream(u.openStream());
return getAllCytobands(stream);
}
public SortedSet getAllCytobands(InputStream instream)
throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(
instream));
String line = null;
SortedSet cytobands = new TreeSet();
while ((line = reader.readLine()) != null) {
String[] spl = line.split("\t");
if (spl.length != 5) {
logger.warn(
"WRONG LINE LENGHT, expected 5, but got {} for: {}",
spl.length, line);
}
Cytoband b = new Cytoband();
b.setChromosome(spl[0]);
b.setStart(Integer.parseInt(spl[1]));
b.setEnd(Integer.parseInt(spl[2]));
b.setLocus(spl[3]);
StainType type = StainType.getStainTypeFromString(spl[4]);
if (type == null)
logger.warn("unknown type: {}", spl[4]);
b.setType(type);
cytobands.add(b);
}
return cytobands;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy