All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.cellbase.lib.builders.GeneExpressionAtlasBuilder Maven / Gradle / Ivy

There is a newer version: 6.3.0
Show newest version
/*
 * Copyright 2015-2020 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.cellbase.lib.builders;

import org.opencb.cellbase.lib.builders.formats.GeneExpressionAtlas;
import org.opencb.cellbase.core.serializer.CellBaseSerializer;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;

/**
 * Created by antonior on 10/16/14.
 */
public class GeneExpressionAtlasBuilder extends CellBaseBuilder {

    private Path geneAtlasDirectoryPath;

    public GeneExpressionAtlasBuilder(Path geneAtlasDirectoryPath, CellBaseSerializer serializer) {
        super(serializer);
        this.geneAtlasDirectoryPath = geneAtlasDirectoryPath;

    }

    public void parse() {
        Map geneAtlasMap = new HashMap<>();
        try {
            String experiment1 = "EncodeCellLines";
            readFile(geneAtlasMap, experiment1);

            String experiment2 = "IlluminaBodyMap";
            readFile(geneAtlasMap, experiment2);

            String experiment3 = "MammalianTissues";
            readFile(geneAtlasMap, experiment3);

            String experiment4 = "TwentySevenTissues";
            readFile(geneAtlasMap, experiment4);

            Collection allGeneAtlasRecords = geneAtlasMap.values();
            for (GeneExpressionAtlas geneExpressionAtlas : allGeneAtlasRecords) {
                serializer.serialize(geneExpressionAtlas);
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void readFile(Map geneAtlasMap, String experiment) throws IOException {
        DirectoryStream directoryStream = Files.newDirectoryStream(this.geneAtlasDirectoryPath.resolve(experiment));
        for (Path filePath : directoryStream) {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath.toFile())));
            String line;
            String metainfo = "";
            String[] header = null;
            while ((line = reader.readLine()) != null) {

                if (line.startsWith("#")) {
                    metainfo = metainfo + line;
                }

                if (line.startsWith("Gene ID")) {
                    header = line.split("\t");
                } else {
                    updateGeneAtlasMap(geneAtlasMap, experiment, line, header);
                }
            }
        }
    }

    private void updateGeneAtlasMap(Map geneAtlasMap, String experiment, String line, String[] header) {
        List fields = Arrays.asList(line.split("\t"));
        String geneid = fields.get(0);
        String genename = fields.get(1);
        List tissueList = new ArrayList<>();

        for (int i = 2; i < fields.size(); i++) {
            GeneExpressionAtlas.Tissue tissueToAddList =
                    new GeneExpressionAtlas.Tissue(header[i], experiment, Float.parseFloat(fields.get(i)));
            tissueList.add(tissueToAddList);
        }

        if (geneAtlasMap.get(geneid) != null) {
            geneAtlasMap.get(geneid).getTissues().addAll(tissueList);

        } else {

            GeneExpressionAtlas geneAtlasInstance = new GeneExpressionAtlas(geneid, genename, tissueList);
            geneAtlasMap.put(geneid, geneAtlasInstance);

        }
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy