All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.genome.parsers.gff.Feature Maven / Gradle / Ivy

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.genome.parsers.gff;

import java.util.HashMap;


/**
 * A Feature corresponds to a single row in a GFF file.
 *
 * @author Hanno Hinsch
 */
public class Feature implements FeatureI {

	private Location mLocation;
	private String mSeqname;
	private String mSource;
	private String mType;
	private double mScore;			//or . if none
	private int mFrame;				//0,1,2
	private String mAttributes;			//any trailing stuff
	private HashMap mUserMap;

	/**
	 * Get the sequence name. (GFF field 1). Note that feature objects have
	 * no link or reference to the actual sequence object to which
	 * they refer; they are completely uncoupled.
	 *
	 * @return Sequence name.
	 */
	@Override
	public String seqname() {
		return mSeqname;
	}

	;

	/**
	 * Get source (aka method). (GFF field 2). This is often the name of
	 * the program or procedure that created the features.
	 *
	 * @return Source field.
	 */
	public String source() {
		return mSource;
	}

	;

	/**
	 * Get feature type, such as "exon" or "CDS". (GFF field 3).
	 *
	 * @return Feature type.
	 */
	@Override
	public String type() {
		return mType;
	}

	;

	/**
	 * Get location of feature. Note that feature objects have
	 * no link or reference to the actual sequence object to which
	 * they refer; they are completely uncoupled.
	 *
	 * @return Location of feature.
	 */
	@Override
	public Location location() {
		return mLocation;
	}

	/**
	 * Get score. (GFF field 7). The meaning of the score varies from file to file.
	 *
	 * @return Score value.
	 */
	public double score() {
		return mScore;
	}

	;

	/**
	 * Get frame (aka phase). (GFF field 8). Specifies the offset of the
	 * first nucleotide of the first in-frame codon, assuming this feature
	 * is a dna/rna sequence that codes
	 * for a protein. If you
	 * intend to use this field, you probably want to look it up on the web first.
	 *
	 * @return The frame (0, 1, 2).
	 */
	public int frame() {
		return mFrame;
	}

	;

	/**
	 * Get the string of key/value attributes. (GFF field 9). The format and
	 * meaning of this field varies from flavor to flavor of GFF/GTF. This method
	 * simply returns the whole string. Other methods in this class make assumptions
	 * about its format and provide additional utility.
	 *
	 * @return The attribute string.
	 */
	public String attributes() {
		return mAttributes;
	}

	;

	@SuppressWarnings("unused")
	private Feature() {
	}

	;        //unavailable

	/**
	 * Make a copy of the specified feature. The mappings in the userMap() HashMap
	 * are copied, so each feature has independent user data. Note, however, that the
	 * actual objects in the HashMap are shared (not copied), so a change to such an object may
	 * affect multiple features.
	 *
	 * @param feature Feature to clone.
	 */
	public Feature(Feature feature) {

		mSeqname = feature.mSeqname;
		mSource = feature.mSource;
		mType = feature.mType;
		mLocation = feature.mLocation;
		mScore = feature.mScore;
		mFrame = feature.mFrame;
		mAttributes = feature.mAttributes;
		initAttributeHashMap();
		mUserMap = new HashMap(feature.mUserMap);
	}

	/**
	 * Construct a new Feature from raw data (usually a GFF row).
	 *
	 * @param seqname The sequence name field (field 1).
	 * @param source The source or method field (field 2).
	 * @param type The type of feature field (field 3).
	 * @param location The location of the feature. (calculated from GFF start, end and strand fields).
	 * @param score The score field (field 7).
	 * @param frame The frame or phase field (field 8).
	 * @param attributes A string of key/value pairs separated by semicolons (field 9).
	 */
	public Feature(String seqname, String source, String type, Location location, Double score, int frame, String attributes) {

		mSeqname = seqname;
		mSource = source;
		mType = type;
		mLocation = location;
		mScore = score;
		mFrame = frame;
		mAttributes = attributes;
		initAttributeHashMap();
		mUserMap = new HashMap();

	}

	/**
	 * Get HashMap of user data. Each Feature object has a Java HashMap object
	 * which can be used to annotate the Feature. JavaGene does not use or interpret
	 * the keys or values. The values can be any subtype of the Java Object class.
	 *

* If a Feature is constructed from data fields, the initial HashMap has no mappings (is empty). * If a Feature is constructed from another Feature, a copy of the mappings is made. * Note that the Objects in the copied mapping are shared, even though the mapping itself * is copied (not shared). Thus removing or adding a mapping to one Feature will not affect the * other, but changing an Object which is part of an established mapping may affect both Features. * * @return The user HashMap. */ @Override public HashMap userData() { return mUserMap; } HashMap attributeHashMap = new HashMap(); private void initAttributeHashMap(){ String[] values = mAttributes.split(";"); for(String attribute : values){ attribute = attribute.trim(); int equalindex = attribute.indexOf("="); String splitData = "="; if(equalindex == -1) //gtf uses space and gff3 uses = splitData = " "; String[] data = attribute.split(splitData); String value = ""; if(data.length >= 2 && data[1].indexOf('"') != -1){ // an attibute field could be empty value = data[1].replaceAll('"' + "","").trim(); }else if(data.length >= 2){ value = data[1].trim(); } attributeHashMap.put(data[0].trim(), value); } } /** * Get value of specified attribute key. Returns null if the attribute key has no value (does not exist). * Keys are case-sensitive. Assumes attributes are correctly formatted in GFF style. * Known bug: a semicolon within a quoted value will cause parse failure. * * @param key The key. * @return The corresponding value. Null if the key has no value defined. */ @Override public String getAttribute(String key) { return attributeHashMap.get(key); } public String getAttributeOld(String key) { int start = 0; int end = mAttributes.indexOf(';'); while (0 < end) { //find the first word (up to space) in chunk, // see if it is this key int i = mAttributes.indexOf(' ', start); if (0 < i && i < end) { if (mAttributes.substring(start, i).equals(key)) { //remove quotes, if needed if (mAttributes.charAt(i + 1) == '\"' && mAttributes.charAt(end - 1) == '\"') { return mAttributes.substring(i + 2, end - 1);//return attribute } else { return mAttributes.substring(i + 1, end); //return attribute } } } start = end + 2; //skip required semicolon and single space end = mAttributes.indexOf(';', start); } return null; } @Override public boolean hasAttribute(String key) { return attributeHashMap.containsKey(key); } @Override public boolean hasAttribute(String key, String value) { String data = getAttribute(key); if(data == null) return false; if(data.equals(value)) return true; else return false; } /** * Get the first item (everything before first semicolon, if it has one) * in the attribute field, which is assumed to * be a group identifer. This is appropriate for GFF1 files and variants. It is not * appropriate for GTF and GFF2 files, although they may use a named attribute key, * such as "gene_id" or "transcript_id", for grouping. * * @return The group id. Everything before the first semicolon in the attributes string (minus trailing whitespace). */ @Override public String group() { int i = mAttributes.indexOf(';'); return (i < 0) ? mAttributes.trim() : mAttributes.substring(0, i).trim(); } /** * */ @Override public String toString() { String s = mSeqname + '\t'; s += mSource + '\t'; s += mType + '\t'; s += mLocation.start() + "\t"; s += mLocation.end() + "\t"; s += Double.toString(mScore) + "\t"; if (mFrame == -1) { s += ".\t"; } else { s += mFrame + "\t"; } s += mAttributes; return s; } /** * @deprecated */ @Deprecated public static void main(String args[]) throws Exception { //Feature f= new Feature(); //intentionally perverse //f.group= "gene_id transcript; transcript \"gene_id fantom2\"; "; // f.addAttribute( "author", "julian" ); // f.addAttribute( "curator", "nick" ); // f.addAttribute( "author", "hanno" ); //Log.log( f.group ); //f.addAttribute( "perverse", "foo;goo" ); //assert f.getAttribute( "perverse").equals( "foo;goo" ); // assert f.getAttribute( "gene_id" ).equals( "transcript" ); // assert f.getAttribute( "author" ).equals( "julian hanno" ); // assert f.getAttribute( "curator" ).equals( "nick" ); // assert f.getAttribute( "transcript").equals( "gene_id fantom2" ); //Log.log( "passed test." ); } @Override public HashMap getAttributes() { return attributeHashMap; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy