All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.cellbase.build.loaders.mongodb.VariantEffectMongoDBLoader Maven / Gradle / Ivy

The newest version!
package org.opencb.cellbase.build.loaders.mongodb;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import org.apache.commons.lang.StringUtils;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.annotation.*;
import org.opencb.commons.io.DataWriter;
import org.opencb.commons.utils.CryptoUtils;
import org.opencb.datastore.core.QueryResult;
import org.opencb.datastore.mongodb.MongoDBCollection;
import org.opencb.datastore.mongodb.MongoDataStore;
import org.opencb.datastore.mongodb.MongoDataStoreManager;

import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author Cristina Yenyxe Gonzalez Garcia 
 */
@Deprecated
public class VariantEffectMongoDBLoader implements DataWriter {

    private String host;
    private int port;
    private String dbName;
    private String collectionName;
    private String user;
    private String pass;

    private MongoDataStoreManager manager;
    private MongoDataStore datastore;
    private MongoDBCollection collection;

    public VariantEffectMongoDBLoader(Properties applicationProperties) {
        host = applicationProperties.getProperty("MONGO.HOST", "localhost");
        port = Integer.parseInt(applicationProperties.getProperty("MONGO.PORT", "27017"));
        user = applicationProperties.getProperty("MONGO.USERNAME", null);
        pass = applicationProperties.getProperty("MONGO.PASSWORD", null);
        dbName = applicationProperties.getProperty("MONGO.DB", "");
        collectionName = applicationProperties.getProperty("MONGO.COLLECTIONS.VARIANT_EFFECT", "");
    }
    
    public VariantEffectMongoDBLoader(String host, int port, String dbName, String collectionName) {
        this.host = host;
        this.port = port;
        this.dbName = dbName;
        this.collectionName = collectionName;
    }

    @Override
    public boolean open() {
        manager = new MongoDataStoreManager(host, port);
        
        datastore = manager.get(dbName);
        if(user != null && pass != null && (!"".equals(user) || !"".equals(pass))) {
            datastore.getDb().authenticate(user,pass.toCharArray());
        }
        
        collection = datastore.getCollection(collectionName);

        return true;
    }

    @Override
    public boolean pre() {
        return true;
    }

    @Override
    public boolean write(VariantAnnotation elem) {
        return write(Arrays.asList(elem));
    }

    @Override
    public boolean write(List batch) {
        boolean ok = true;
        
        for (VariantAnnotation effect : batch) {
            String rowkey = buildRowkey(effect);
            BasicDBObject mongoEffect = new BasicDBObject("_id", rowkey)
                    .append("chr", effect.getChromosome())
                    .append("start", effect.getStart()).append("end", effect.getEnd())
                    .append("ref", effect.getReferenceAllele())
                    .append("freqs", getFrequenciesDBObject(effect.getFrequencies()))
                    .append("scores", getProteinSubstitutionScores(effect.getProteinSubstitutionScores()))
                    .append("regulatory", getRegulatoryEffect(effect.getRegulatoryEffect()));

            BasicDBList alleles = new BasicDBList();

            for (Map.Entry> allelesConsequences : effect.getEffects().entrySet()) {
                BasicDBObject alleleRoot = new BasicDBObject("alt", allelesConsequences.getKey());
                BasicDBList cts = new BasicDBList();

                for (VariantEffect ct : allelesConsequences.getValue()) {
                    cts.add(getConsequenceTypeDBObject(ct));
                }

                alleleRoot.append("val", cts);
                alleles.add(alleleRoot);
            }

            mongoEffect.append("ct", alleles);

//            QueryResult result = collection.insert(mongoEffect);
            QueryResult result = collection.update(new BasicDBObject("_id", rowkey), mongoEffect, true, true);
            if (result.getErrorMsg() != null) {
                // TODO Do anything special when an error occurs?
                Logger.getLogger(VariantEffectMongoDBLoader.class.getName()).log(Level.SEVERE, null, result.getErrorMsg());
                ok = false;
            }
        }

        return ok;
    }

    @Override
    public boolean post() {
        return true;
    }

    @Override
    public boolean close() {
        manager.close(datastore.getDatabaseName());
        return true;
    }

    
    private BasicDBObject getConsequenceTypeDBObject(VariantEffect variantEffect) {
        BasicDBObject object = new BasicDBObject("so", variantEffect.getConsequenceTypes());

        if (variantEffect.getGeneId() != null) {
            object.append("geneId", variantEffect.getGeneId());
        }
        if (variantEffect.getGeneName() != null) {
            object.append("geneName", variantEffect.getGeneName());
        }
        if (variantEffect.getGeneNameSource() != null) {
            object.append("geneNameSource", variantEffect.getGeneNameSource());
        }

        if (variantEffect.getFeatureId() != null) {
            object.append("featureId", variantEffect.getFeatureId());
        }
        if (variantEffect.getFeatureType() != null) {
            object.append("featureType", variantEffect.getFeatureType());
        }
        if (variantEffect.getFeatureStrand() != null) {
            object.append("featureStrand", variantEffect.getFeatureStrand());
        }
        if (variantEffect.getFeatureBiotype() != null) {
            object.append("featureBiotype", variantEffect.getFeatureBiotype());
        }

        if (variantEffect.getcDnaPosition() >= 0) {
            object.append("cdnaPos", variantEffect.getcDnaPosition());
        }
        if (variantEffect.getCcdsId() != null) {
            object.append("ccdsId", variantEffect.getCcdsId());
        }
        if (variantEffect.getCdsPosition() >= 0) {
            object.append("cdsPos", variantEffect.getCdsPosition());
        }

        if (variantEffect.getProteinId() != null) {
            object.append("proteinId", variantEffect.getProteinId());
        }
        if (variantEffect.getProteinPosition() >= 0) {
            object.append("proteinPos", variantEffect.getProteinPosition());
        }
        if (variantEffect.getProteinDomains() != null) {
            object.append("proteinDomains", StringUtils.join(variantEffect.getProteinDomains(), ","));
        }

        if (variantEffect.getAminoacidChange() != null) {
            object.append("aaChange", variantEffect.getAminoacidChange());
        }
        if (variantEffect.getCodonChange() != null) {
            object.append("codonChange", variantEffect.getCodonChange());
        }

        if (variantEffect.getVariationId() != null) {
            object.append("id", variantEffect.getVariationId());
        }
        if (variantEffect.getStructuralVariantsId() != null) {
            object.append("svIds", StringUtils.join(variantEffect.getStructuralVariantsId(), ","));
        }

        if (variantEffect.getHgvsc() != null) {
            object.append("hgvsc", variantEffect.getHgvsc());
        }
        if (variantEffect.getHgvsp() != null) {
            object.append("hgvsp", variantEffect.getHgvsp());
        }

        if (variantEffect.getIntronNumber() != null) {
            object.append("intron", variantEffect.getIntronNumber());
        }
        if (variantEffect.getExonNumber() != null) {
            object.append("exon", variantEffect.getExonNumber());
        }

        if (variantEffect.getVariantToTranscriptDistance() >= 0) {
            object.append("distance", variantEffect.getVariantToTranscriptDistance());
        }

        if (variantEffect.getClinicalSignificance() != null) {
            object.append("clinicSig", variantEffect.getClinicalSignificance());
        }

        if (variantEffect.getPubmed() != null) {
            object.append("pubmed", StringUtils.join(variantEffect.getPubmed(), ","));
        }
        object.append("canonical", variantEffect.isCanonical());

        return object;
    }

    private BasicDBObject getFrequenciesDBObject(Map> frequencies) {
        BasicDBObject object = null;
//        new BasicDBObject("mafAllele", frequencies.getAllele1000g())
//                .append("gmaf", frequencies.getMaf1000G())
//                .append("afrMaf", frequencies.getMaf1000GAfrican())
//                .append("amrMaf", frequencies.getMaf1000GAmerican())
//                .append("asnMaf", frequencies.getMaf1000GAsian())
//                .append("eurMaf", frequencies.getMaf1000GEuropean())
//                .append("afrAmrMaf", frequencies.getMafNhlbiEspAfricanAmerican())
//                .append("eurAmrMaf", frequencies.getMafNhlbiEspEuropeanAmerican());
        return object;
    }

    private BasicDBObject getProteinSubstitutionScores(ProteinSubstitutionScores scores) {
        BasicDBObject object = new BasicDBObject("polyScore", scores.getPolyphenScore())
                .append("siftScore", scores.getSiftScore());
        
        if (scores.getPolyphenEffect() != null) {
            object.append("polyEff", scores.getPolyphenEffect().name());
        }
        if (scores.getSiftEffect() != null) {
            object.append("siftEff", scores.getSiftEffect().name());
        }
        
        return object;
    }

    private BasicDBObject getRegulatoryEffect(RegulatoryEffect regulatory) {
        BasicDBObject object = new BasicDBObject("motifName", regulatory.getMotifName())
                .append("motifPos", regulatory.getMotifPosition())
                .append("motifScoreChange", regulatory.getMotifScoreChange())
                .append("highInfoPos", regulatory.isHighInformationPosition())
                .append("cellType", regulatory.getCellType());
        return object;
    }

    private String buildRowkey(VariantAnnotation v) {
        StringBuilder builder = new StringBuilder(v.getChromosome());
        builder.append("_");
        builder.append(v.getStart());
        builder.append("_");
        if (v.getReferenceAllele().length() < Variant.SV_THRESHOLD) {
            builder.append(v.getReferenceAllele());
        } else {
            builder.append(new String(CryptoUtils.encryptSha1(v.getReferenceAllele())));
        }

        return builder.toString();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy