All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.genome.parsers.cytoband.CytobandParser Maven / Gradle / Ivy

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * created at 20 Feb 2014
 * Author: ap3
 */

package org.biojava.nbio.genome.parsers.cytoband;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;

/**
 * Parses the cytoband (karyotype) file from UCSC.
 *
 */
public class CytobandParser {

	private static final Logger logger = LoggerFactory
			.getLogger(CytobandParser.class);

	public static final String DEFAULT_LOCATION = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz";

	public static void main(String[] args) {

		CytobandParser me = new CytobandParser();
		try {
			SortedSet cytobands = me.getAllCytobands(new URL(
					DEFAULT_LOCATION));
			SortedSet types = new TreeSet();
			for (Cytoband c : cytobands) {
				logger.info("Cytoband: {}", c);
				if (!types.contains(c.getType()))
					types.add(c.getType());
			}
			logger.info("Strain Type: {}", types);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			logger.error("Exception: ", e);
		}

	}

	public SortedSet getAllCytobands(URL u) throws IOException {
		InputStream stream = new GZIPInputStream(u.openStream());
		return getAllCytobands(stream);

	}

	public SortedSet getAllCytobands(InputStream instream)
			throws IOException {
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				instream));
		String line = null;
		SortedSet cytobands = new TreeSet();
		while ((line = reader.readLine()) != null) {
			String[] spl = line.split("\t");
			if (spl.length != 5) {
				logger.warn(
						"WRONG LINE LENGHT, expected 5, but got {} for: {}",
						spl.length, line);
			}

			Cytoband b = new Cytoband();
			b.setChromosome(spl[0]);
			b.setStart(Integer.parseInt(spl[1]));
			b.setEnd(Integer.parseInt(spl[2]));
			b.setLocus(spl[3]);
			StainType type = StainType.getStainTypeFromString(spl[4]);
			if (type == null)
				logger.warn("unknown type: {}", spl[4]);
			b.setType(type);
			cytobands.add(b);
		}

		return cytobands;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy