Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package net.maizegenetics.dna.map;
import com.google.common.collect.BoundType;
import com.google.common.collect.Multimap;
import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import net.maizegenetics.util.DirectedGraph;
import net.maizegenetics.util.Utils;
import org.apache.commons.lang.StringUtils;
import org.json.simple.JSONObject;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.*;
/**
* Created by jgw87 on 7/2/14.
* A map to hold genome features for lookup by name and by location. The features themselves are hierarchical and so
* can be traced up and down the tree.
*
* As methods are added to return based on different filters, try to unify the results. That is, all filter-like methods
* should return the same sort of data structure, such as a HashSet of GenomeFeatures. It may be worthwhile to create
* a custom GenomeFeatureSet class to be able to string such operations together (mygenes = mygenes.ofType("exon").onChrom(1).inRange(1, 10000);),
* although whether such filters are needed for the bulk of this class's purpose (matching SNPs to genome annotations) is yet
* to be seen.
*
* This class shouldn't be created directly, but should instead use the GenomeFeatureMapBuilder class .build() method
*/
//TODO: Add functionality to write out a full constructed map to a file of some sort
public class GenomeFeatureMap {
//Graph of all the genome features, rooted at the genome itself
DirectedGraph featureTree = null;
//Lookups to identify GenomeFeatures by their name, type, and location
private HashMap nameLookup = new HashMap<>();
private Multimap typeLookup = null;
private HashMap>> locationLookup = null;
/**
* Default constructor for creating a GenomeFeatureMap from pre-made data structures. This should ONLY be called from
* the {@link GenomeFeatureMapBuilder} class
*
* @param nameLookup Lookup table of unique IDs -> {@link GenomeFeature} objects
* @param locationLookup Lookup table to retrieve {@link GenomeFeature}s by their genomic location
* @param typeLookup Lookup table to retrieve {@link GenomeFeature}s by their type (exon, UTR, etc)
* @param featureTree The graph of genomic features. Should be a directed graph, with the first two levels being genome and chromosome
*/
GenomeFeatureMap(HashMap nameLookup, Multimap typeLookup,
HashMap>> locationLookup, DirectedGraph featureTree) {
this.typeLookup = typeLookup;
this.nameLookup = nameLookup;
this.locationLookup = locationLookup;
this.featureTree = featureTree;
}
public GenomeFeature getFeatureFromId(String id) {
return nameLookup.get(id);
}
/**
* Get a {@link HashSet} of {@link GenomeFeature}s at a specified genome location. Takes chromsome as a String
* for ones like "Pt", "scaffold487", etc.
* @param chrom The chromosome name
* @param start Beginning physical position
* @param end End physical position
* @return A {@link HashSet} of GenomeFeatures
*/
public HashSet getFeaturesInRange(String chrom, int start, int end) {
Range myrange = Range.closed(start, end); //'Closed' = inclusive, so closed(1,3) = 1,2,3 and closed(1,1) = 1
Map, HashSet> chromMap = locationLookup.get(chrom).subRangeMap(myrange).asMapOfRanges();
HashSet featureSet = new HashSet<>();
for(Range r: chromMap.keySet()){
featureSet.addAll(chromMap.get(r));
}
return featureSet;
}
/**
* Get a {@link HashSet} of {@link GenomeFeature}s at a specified genome location
* @param chrom Chromosome number (should be the same as its name)
* @param position Physical position (base pair)
* @return A HashSet of GenomeFeatures
*/
public HashSet getFeaturesAtLocation(int chrom, int position) {
return getFeaturesInRange(chrom, position, position);
}
/**
* Get a {@link HashSet} of {@link GenomeFeature}s at a specified genome location Takes chromsome as a String
* for ones like "Pt", "scaffold487", etc.
* @param chrom Chromosome name
* @param position Physical position (base pair)
* @return A HashSet of GenomeFeatures
*/
public HashSet getFeaturesAtLocation(String chrom, int position) {
return getFeaturesInRange(chrom, position, position);
}
/**
* Get a {@link HashSet} of {@link GenomeFeature}s at a specified genome location. Takes chromsome as a String
* for ones like "Pt", "scaffold487", etc.
* @param chrom The chromosome number (should be the same as its name)
* @param start Beginning physical position
* @param end End physical position
* @return A HashSet of GenomeFeatures
*/
public HashSet getFeaturesInRange(int chrom, int start, int end) {
return getFeaturesInRange("" + chrom, start, end);
}
/** Get all {@link GenomeFeature}s of a specified type
* @param type The type of feature to get
* @return A {@link HashSet} of GenomeFeatures
*/
public HashSet getFeaturesOfType(String type){
HashSet featureSet = new HashSet<>();
featureSet.addAll(typeLookup.get(type));
return featureSet;
}
/**
* Write just the location lookup to a tab-delimited file. This is mostly to check that your locations loaded properly,
* as there is no way to read them back in.
*
* @param filename The output file to be written to
*/
public void writeLocationLookupToFile(String filename) {
try {
BufferedWriter writer = Utils.getBufferedWriter(filename);
writer.append("chrom\tstart\tstop\tfeatures\n");
String[] chroms = locationLookup.keySet().toArray(new String[0]);
Arrays.sort(chroms);
for (String chrom : chroms) {
Map, HashSet> itermap = locationLookup.get(chrom).asMapOfRanges();
for (Range r : itermap.keySet()) {
int start = r.lowerEndpoint();
int stop = r.upperEndpoint();
//Adjust start-stop positions if ranges are open on that end (= up to but not including that number)
if (r.lowerBoundType() == BoundType.OPEN) {
start++;
}
if (r.upperBoundType() == BoundType.OPEN) {
stop--;
}
writer.append(chrom + "\t" + start + "\t" + stop + "\t");
//List of features
HashSet features = itermap.get(r);
for (GenomeFeature f : features) {
writer.append(f.id() + ";");
}
writer.append("\n");
}
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Write the map data as a JSON file (which can be read in by {@link GenomeFeatureMapBuilder}). Core attributes
* (unique ID, chromosome, start, stop, and parent ID) are output to all features. Any additional attributes are
* output only for those features that have them. Attributes are output in alphabetical order. Since the attribute
* name has to be output for every feature, this can waste space if all your features have the same attributes. In
* that case a tab-delimited flat file or precompiled binary file is probably a better choice.
*
* @param filename The output file to be written to
*/
public void writeMapAsJsonFile(String filename) {
try {
BufferedWriter writer = Utils.getBufferedWriter(filename);
writer.append("[\n");
String[] sortedNames = nameLookup.keySet().toArray(new String[0]); //Sorted array so ordering is consistent
Arrays.sort(sortedNames);
//Go through with a for loop instead of foreach to know when hit last element
for (int i=0; i attributes = new HashSet<>();
for(GenomeFeature feature: nameLookup.values()){
attributes.addAll(feature.annotations().keySet());
}
//Put column heads in alphabetical order (except for "id"; that goes first and _should_ always be included)
ArrayList header = new ArrayList<>();
if(attributes.contains("id")){
attributes.remove("id");
header.add("id");
}
String[] tempNames = attributes.toArray(new String[0]);
Arrays.sort(tempNames);
for(String s: tempNames){
header.add(s);
}
//Write out file
try {
BufferedWriter writer = Utils.getBufferedWriter(filename);
//Header line
String firstline = StringUtils.join(header, "\t") + "\n";
writer.append(firstline);
//Bulk of data
String[] names = nameLookup.keySet().toArray(new String[0]);
Arrays.sort(names);
for(String id: names){
GenomeFeature feature = nameLookup.get(id);
ArrayList data = new ArrayList<>();
for(String column: header){
data.add(feature.getAnnotation(column));
}
writer.append(StringUtils.join(data, "\t") + "\n");
}
writer.close();
}catch(IOException e){
e.printStackTrace();
}
}
/**
* Write the map data as a precompiled binary that can be read back in by {@link GenomeFeatureMapBuilder}). Unlike
* JSON or flatfile format, this file is not human-readable and is simply a a representation of the Java data
* structures saved onto a disk. This makes it very compact and fast to read back in. This is the preferred format
* for long-term storage of a {@link GenomeFeatureMap} since there is (almost) no risk of someone accidentally modifying
* the file.
*
* @param filename The output file to be written to
*/
//TODO: Test and use this
//TODO: Implement Serializable interface in order to write all these things out
public void writeMapAsBinaryFile(String filename) {
}
}