org.opencb.cellbase.build.transform.GeneExpressionAtlasParser Maven / Gradle / Ivy
The newest version!
package org.opencb.cellbase.build.transform;
import org.opencb.cellbase.core.serializer.CellBaseSerializer;
import org.opencb.cellbase.build.transform.formats.GeneExpressionAtlas;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
/**
* Created by antonior on 10/16/14.
*/
public class GeneExpressionAtlasParser extends CellBaseParser {
private Path gene_atlas_directory_path;
public GeneExpressionAtlasParser(Path gene_atlas_directory_path, CellBaseSerializer serializer) {
super(serializer);
this.gene_atlas_directory_path = gene_atlas_directory_path;
}
public void parse() {
Map geneAtlasMap = new HashMap<>();
try {
String Experiment1 = "EncodeCellLines";
readFile(geneAtlasMap, Experiment1);
String Experiment2 = "IlluminaBodyMap";
readFile(geneAtlasMap, Experiment2);
String Experiment3 = "MammalianTissues";
readFile(geneAtlasMap, Experiment3);
String Experiment4 = "TwentySevenTissues";
readFile(geneAtlasMap, Experiment4);
Collection allGeneAtlasRecords = geneAtlasMap.values();
for (GeneExpressionAtlas one_atlas_gene : allGeneAtlasRecords) {
serializer.serialize(one_atlas_gene);
}
} catch (IOException e) {
e.printStackTrace();
}
}
private void readFile(Map geneAtlasMap, String experiment) throws IOException {
DirectoryStream directoryStream = Files.newDirectoryStream(this.gene_atlas_directory_path.resolve(experiment));
for (Path file_path : directoryStream) {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file_path.toFile())));
String line;
String Metainfo="";
String [] header = null;
while ((line = reader.readLine()) != null){
if (line.startsWith("#")){
Metainfo = Metainfo+line;
}
if (line.startsWith("Gene ID")){
header = line.split("\t");
}
else{
updateGeneAtlasMap(geneAtlasMap, experiment, line, header);
}
}
}
}
private void updateGeneAtlasMap(Map geneAtlasMap, String experiment, String line, String[] header) {
List fields = Arrays.asList(line.split("\t"));
String geneid = fields.get(0);
String genename = fields.get(1);
List tissueList = new ArrayList<>();
for (int i = 2; i < fields.size(); i++ ){
GeneExpressionAtlas.Tissue tissueToAddList = new GeneExpressionAtlas.Tissue(header[i], experiment,Float.parseFloat(fields.get(i)));
tissueList.add(tissueToAddList);
}
if (geneAtlasMap.get(geneid) != null){
geneAtlasMap.get(geneid).getTissues().addAll(tissueList);
}
else {
GeneExpressionAtlas geneAtlasInstance = new GeneExpressionAtlas(geneid, genename, tissueList);
geneAtlasMap.put(geneid,geneAtlasInstance);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy