All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.phosphosite.Site Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.phosphosite;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;

/**
 * Created by ap3 on 31/10/2014.
 */
public class Site {

	private final static Logger logger = LoggerFactory.getLogger(Site.class);

	public Site(){


	}

	public static List parseSites(File f) throws IOException {

		InputStream inStream = new FileInputStream(f);
		InputStream gzipStream = new GZIPInputStream(inStream);

		Reader decoder = new InputStreamReader(gzipStream);
		BufferedReader buf = new BufferedReader(decoder);

		String line = null;

		List data = new ArrayList();

		List headerFields = null;

		int proteinIndex = -1;
		int uniprotIndex = -1;
		int residueIndex = -1;
		int orgIndex     = -1;
		int groupIndex   = -1;
		int geneIndex    = -1;

		boolean inHeader = true;


		while ((line = buf.readLine()) != null){
			if ( line.startsWith("GENE") ||
					line.startsWith("PROTEIN")) {

				headerFields = parseHeaderFields(line);

				proteinIndex = headerFields.indexOf("PROTEIN");
				uniprotIndex = headerFields.indexOf("ACC_ID");
				residueIndex = headerFields.indexOf("MOD_RSD");
				orgIndex     = headerFields.indexOf("ORGANISM");
				groupIndex   = headerFields.indexOf("SITE_GRP_ID");
				geneIndex 	 = headerFields.indexOf("GENE");

				inHeader = false;
				continue;
			}
			if ( inHeader)
				continue;

			if ( line.trim().length() == 0)
				continue;

			// fields are:
			String[] spl = line.split("\t");
			if ( spl.length  < 5){
				logger.info("Found wrong line length: " + line);
				continue;

			}

			String protein = spl[proteinIndex];
			String uniprot = spl[uniprotIndex];

			String residue = spl[residueIndex];

			String[] resSpl = residue.split("-");
			String modType = null;
			if ( resSpl.length == 2) {

				 modType = resSpl[1];
			}
			String group    = spl[groupIndex];

			String organism = spl[orgIndex];

			String geneSymb = spl[geneIndex];

			Site s = new Site();
			s.setProtein(protein);
			s.setUniprot(uniprot);
			s.setGeneSymb(geneSymb);
			s.setModType(modType);
			s.setResidue(residue);
			s.setGroup(group);
			s.setOrganism(organism);
			data.add(s);

		}
		buf.close();

		return data;

	}

	private static List parseHeaderFields(String line) {
		String[] spl = line.split("\t");

		List h = new ArrayList();
		for (String s: spl){
			h.add(s);

		}

		return h;
	}

	String protein;
	String uniprot;
	String geneSymb;
	String chrLoc;
	String modType;
	String residue ;
	String group;
	String organism;

	public String getProtein() {
		return protein;
	}

	public void setProtein(String protein) {
		this.protein = protein;
	}

	public String getUniprot() {
		return uniprot;
	}

	public void setUniprot(String uniprot) {
		this.uniprot = uniprot;
	}

	public String getGeneSymb() {
		return geneSymb;
	}

	public void setGeneSymb(String geneSymb) {
		this.geneSymb = geneSymb;
	}

	public String getChrLoc() {
		return chrLoc;
	}

	public void setChrLoc(String chrLoc) {
		this.chrLoc = chrLoc;
	}

	public String getModType() {
		return modType;
	}

	public void setModType(String modType) {
		this.modType = modType;
	}

	public String getResidue() {
		return residue;
	}

	public void setResidue(String residue) {
		this.residue = residue;
	}

	public String getGroup() {
		return group;
	}

	public void setGroup(String group) {
		this.group = group;
	}

	public String getOrganism() {
		return organism;
	}

	public void setOrganism(String organism) {
		this.organism = organism;
	}

	@Override
	public String toString() {
		StringBuffer s = new StringBuffer();

		s.append("Site{" +
				"protein='" + protein + '\'');
		if ( uniprot != null)
				s.append(", uniprot='" + uniprot + '\'' );
		if ( geneSymb != null)
			s.append(
				", geneSymb='" + geneSymb + '\'' );
		if (chrLoc != null)
				s.append(", chrLoc='" + chrLoc + '\'' );
		if (modType != null)
			s.append(", modType='" + modType + '\'' );

		if (residue != null)
			s.append(        ", residue='" + residue + '\'' );
		if ( group != null)
				s.append(", group='" + group + '\'' );
		if (organism != null)
			s.append(", organism='" + organism + '\'' );

		  s.append(      '}');

		return s.toString();
	}
}






© 2015 - 2025 Weber Informatics LLC | Privacy Policy