All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.cellbase.lib.managers.VariantManager Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015-2020 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.cellbase.lib.managers;

import org.opencb.biodata.models.core.*;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantBuilder;
import org.opencb.biodata.models.variant.avro.SampleEntry;
import org.opencb.biodata.models.variant.avro.Score;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.cellbase.core.ParamConstants;
import org.opencb.cellbase.core.api.SnpQuery;
import org.opencb.cellbase.core.api.VariantQuery;
import org.opencb.cellbase.core.api.key.ApiKeyLicensedDataUtils;
import org.opencb.cellbase.core.api.query.CellBaseQueryOptions;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.core.variant.AnnotationBasedPhasedQueryManager;
import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor;
import org.opencb.cellbase.lib.impl.core.SnpMongoDBAdaptor;
import org.opencb.cellbase.lib.impl.core.SpliceScoreMongoDBAdaptor;
import org.opencb.cellbase.lib.impl.core.VariantMongoDBAdaptor;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.cellbase.lib.variant.annotation.CellBaseNormalizerSequenceAdaptor;
import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator;
import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;

import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

public class VariantManager extends AbstractManager implements AggregationApi {

    private static final String PHASE_DATA_URL_SEPARATOR = "\\+";
    private static final String VARIANT_STRING_FORMAT = "(chr)"
            + ":[(cipos_left)<](start)[<(cipos_right)]" + "[-[(ciend_left)<](end)[<(ciend_right)]]"
            + "[:(ref)]"
            + ":[(alt)|(left_ins_seq)...(right_ins_seq)]";
    private VariantMongoDBAdaptor variantDBAdaptor;
    private SpliceScoreMongoDBAdaptor spliceDBAdaptor;
    private SnpMongoDBAdaptor snpDBAdaptor;

    private CellBaseManagerFactory cellbaseManagerFactory;
    private GenomeManager genomeManager;

    public VariantManager(String species, CellBaseConfiguration configuration) throws CellBaseException {
        this(species, null, configuration);
    }

    public VariantManager(String species, String assembly, CellBaseConfiguration configuration)
            throws CellBaseException {
        super(species, assembly, configuration);

        this.init();
    }

    private void init() throws CellBaseException {
        variantDBAdaptor = dbAdaptorFactory.getVariationDBAdaptor();
        spliceDBAdaptor = dbAdaptorFactory.getSpliceScoreDBAdaptor();
        snpDBAdaptor = dbAdaptorFactory.getSnpDBAdaptor();
        cellbaseManagerFactory = new CellBaseManagerFactory(configuration);
        genomeManager = cellbaseManagerFactory.getGenomeManager(species, assembly);
    }

    @Override
    public CellBaseCoreDBAdaptor getDBAdaptor() {
        return variantDBAdaptor;
    }

    public CellBaseDataResult get(Query query, QueryOptions queryOptions, int dataRelease) throws CellBaseException {
        return variantDBAdaptor.nativeGet(query, queryOptions, dataRelease);
    }

    public List> getHgvsByVariant(String variants, DataRelease dataRelease)
            throws CellBaseException, QueryException, IllegalAccessException {
        List variantList = parseVariants(variants);
        HgvsCalculator hgvsCalculator = new HgvsCalculator(genomeManager, dataRelease.getRelease());
        List> results = new ArrayList<>();
        VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
                dataRelease, "", cellbaseManagerFactory);
        List batchGeneList = variantAnnotationCalculator.getBatchGeneList(variantList);
        for (Variant variant : variantList) {
            List variantGeneList = variantAnnotationCalculator.getAffectedGenes(batchGeneList, variant);
            List hgvsStrings = hgvsCalculator.run(variant, variantGeneList, false);
            results.add(new CellBaseDataResult<>(variant.getId(), 0, new ArrayList<>(), hgvsStrings.size(), hgvsStrings, -1));
        }
        return results;
    }

    /**
     * Normalises a list of variants.
     *
     * @param variants list of variant strings
     * @param decompose boolean to set the decompose MNV behaviour
     * @param leftAlign boolean to set the left alignment behaviour
     * @param dataRelease data release
     * @return list of normalised variants
     * @throws CellBaseException if the species is incorrect
     */
    public CellBaseDataResult getNormalizationByVariant(String variants, boolean decompose, boolean leftAlign,
                                                                 DataRelease dataRelease) throws CellBaseException {
        List variantList = parseVariants(variants);
        VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
                dataRelease, "", cellbaseManagerFactory);


        // Set decompose MNV behaviour
        variantAnnotationCalculator.getNormalizer().getConfig().setDecomposeMNVs(decompose);

        // Set left alignment behaviour
        if (leftAlign) {
            variantAnnotationCalculator.getNormalizer().getConfig().enableLeftAlign(new CellBaseNormalizerSequenceAdaptor(genomeManager,
                    dataRelease.getRelease()));
        } else {
            variantAnnotationCalculator.getNormalizer().getConfig().disableLeftAlign();
        }


        List normalisedVariants = variantAnnotationCalculator.normalizer(variantList);
        return new CellBaseDataResult<>(variants, 0, new ArrayList<>(), normalisedVariants.size(), normalisedVariants, -1);
    }

    public List> getAnnotationByVariant(QueryOptions queryOptions,
                                                                              String variants,
                                                                              Boolean normalize,
                                                                              Boolean decompose,
                                                                              Boolean leftAlign,
                                                                              Boolean ignorePhase,
                                                                              @Deprecated Boolean phased,
                                                                              Boolean imprecise,
                                                                              Integer svExtraPadding,
                                                                              Integer cnvExtraPadding,
                                                                              Boolean checkAminoAcidChange,
                                                                              String consequenceTypeSource,
                                                                              DataRelease dataRelease,
                                                                              String apiKey)
            throws ExecutionException, InterruptedException, CellBaseException, QueryException, IllegalAccessException {
        List variantList = parseVariants(variants);
        logger.debug("queryOptions: " + queryOptions);

        // If ignorePhase (new parameter) is present, then overrides presence of "phased"
        if (ignorePhase != null) {
            queryOptions.put("ignorePhase", ignorePhase);
            // If the new parameter (ignorePhase) is not present but old one ("phased") is, then follow old one - probably
            // someone who has not moved to the new parameter yet
        } else if (phased != null) {
            queryOptions.put("ignorePhase", !phased);
            // Default behavior is to perform phased annotation
        } else {
            queryOptions.put("ignorePhase", false);
        }

        if (normalize != null) {
            queryOptions.put("normalize", normalize);
        }
        if (decompose != null) {
            queryOptions.put("decompose", decompose);
        }
        if (leftAlign != null) {
            queryOptions.put("leftAlign", leftAlign);
        }
        if (imprecise != null) {
            queryOptions.put("imprecise", imprecise);
        }
        if (svExtraPadding != null) {
            queryOptions.put("svExtraPadding", svExtraPadding);
        }
        if (cnvExtraPadding != null) {
            queryOptions.put("cnvExtraPadding", cnvExtraPadding);
        }
        if (checkAminoAcidChange != null) {
            queryOptions.put("checkAminoAcidChange", checkAminoAcidChange);
        }
        if (consequenceTypeSource != null) {
            queryOptions.put("consequenceTypeSource", consequenceTypeSource);
        }

        VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
                dataRelease, apiKey, cellbaseManagerFactory);
        List> queryResults = variantAnnotationCalculator.getAnnotationByVariantList(variantList,
                queryOptions);
        return queryResults;
    }

    private List parseVariants(String variantsString) {
        List variants = null;
        if (variantsString != null && !variantsString.isEmpty()) {
            String[] variantItems = variantsString.split(",");
            variants = new ArrayList<>(variantItems.length);

            for (String variantString: variantItems) {
                variants.add(parseVariant(variantString));
            }
        }
        return variants;
    }

    private Variant parseVariant(String variantString) {
        String[] variantStringPartArray = variantString.split(PHASE_DATA_URL_SEPARATOR);

        VariantBuilder variantBuilder;
        if (variantStringPartArray.length > 0) {
            variantBuilder = new VariantBuilder(variantStringPartArray[0]);
            // Either 1 or 3 parts expected variant+GT+PS
            if (variantStringPartArray.length == 3) {
                List formatList = new ArrayList<>(2);
                // If phase set tag is not provided not phase data is added at all to the Variant object
                if (!variantStringPartArray[2].isEmpty()) {
                    formatList.add(AnnotationBasedPhasedQueryManager.PHASE_SET_TAG);
                    List sampleData = new ArrayList<>(2);
                    sampleData.add(variantStringPartArray[2]);
                    // Genotype field might be empty - just PS would be added to Variant object in that case
                    if (!variantStringPartArray[1].isEmpty()) {
                        formatList.add(AnnotationBasedPhasedQueryManager.GENOTYPE_TAG);
                        sampleData.add(variantStringPartArray[1]);
                    }
                    variantBuilder.setSampleDataKeys(formatList);
                    SampleEntry sampleEntry = new SampleEntry();
                    sampleEntry.setData(sampleData);
                    variantBuilder.setSamples(Collections.singletonList(sampleEntry));
                }
            } else if (variantStringPartArray.length > 3) {
                throw new IllegalArgumentException("Malformed variant string " + variantString + ". "
                        + "variantString+GT+PS expected, where variantString needs 3 or 4 fields separated by ':'. "
                        + "Format: \"" + VARIANT_STRING_FORMAT + "\"");
            }
        } else {
            throw new IllegalArgumentException("Malformed variant string " + variantString + ". "
                    + "variantString+GT+PS expected, where variantString needs 3 or 4 fields separated by ':'. "
                    + "Format: \"" + VARIANT_STRING_FORMAT + "\"");
        }

        return variantBuilder.build();
    }

    public CellBaseDataResult getConsequenceTypes() {
        List consequenceTypes = VariantAnnotationUtils.SO_SEVERITY.keySet().stream()
                .sorted()
                .collect(Collectors.toList());
        CellBaseDataResult queryResult = new CellBaseDataResult<>("consequence_types");
        queryResult.setNumResults(consequenceTypes.size());
        queryResult.setResults(consequenceTypes);
        return queryResult;
    }

    public boolean validateRegionInput(String regions) {
        List regionList = Region.parseRegions(regions);
        // check for regions bigger than 10Mb
        if (regionList != null) {
            for (Region r : regionList) {
                if ((r.getEnd() - r.getStart()) > 10000000) {
                    return false;
                }
            }
        }
        return true;
    }

    @Deprecated
    public List getByRegion(Query query, QueryOptions queryOptions, String regions, int dataRelease) {
        query.put(ParamConstants.QueryParams.REGION.key(), regions);
        logger.debug("query = " + query.toJson());
        logger.debug("queryOptions = " + queryOptions.toJson());
        List queries = createQueries(query, regions, ParamConstants.QueryParams.REGION.key());
        List queryResults = variantDBAdaptor.nativeGet(queries, queryOptions, dataRelease);
        for (int i = 0; i < queries.size(); i++) {
            queryResults.get(i).setId((String) queries.get(i).get(ParamConstants.QueryParams.REGION.key()));
        }
        return queryResults;
    }

    public CellBaseDataResult getFunctionalScoreVariant(Variant variant, QueryOptions queryOptions, int dataRelease)
            throws CellBaseException {
        return variantDBAdaptor.getFunctionalScoreVariant(variant, queryOptions, dataRelease);
    }

    public List> getFunctionalScoreVariant(List variants, QueryOptions options, int dataRelease)
            throws CellBaseException {
        List> cellBaseDataResults = new ArrayList<>(variants.size());
        for (Variant variant: variants) {
            if (variant.getType() == VariantType.SNV) {
                cellBaseDataResults.add(getFunctionalScoreVariant(variant, options, dataRelease));
            } else {
                cellBaseDataResults.add(new CellBaseDataResult<>(variant.toString(), 0, Collections.emptyList(), 0));
            }
        }
        return cellBaseDataResults;
    }

    public List> getPopulationFrequencyByVariant(List variants, QueryOptions queryOptions,
                                                                             int dataRelease) throws CellBaseException {
        return variantDBAdaptor.getPopulationFrequencyByVariant(variants, queryOptions, dataRelease);
    }

    public CellBaseDataResult getSpliceScoreVariant(Variant variant, String apiKey, int dataRelease) throws CellBaseException {
        Set validSources = apiKeyManager.getValidSources(apiKey, ApiKeyLicensedDataUtils.UNLICENSED_SPLICE_SCORES_DATA);

        CellBaseDataResult result = spliceDBAdaptor.getScores(variant.getChromosome(), variant.getStart(),
                variant.getReference(), variant.getAlternate(), dataRelease);

        if (ApiKeyLicensedDataUtils.needFiltering(validSources, ApiKeyLicensedDataUtils.LICENSED_SPLICE_SCORES_DATA)) {
            return ApiKeyLicensedDataUtils.filterDataSources(result, validSources);
        } else {
            return result;
        }
    }

    public List> getSpliceScoreVariant(List variants, String apiKey, int dataRelease)
            throws CellBaseException {
        Set validSources = apiKeyManager.getValidSources(apiKey, ApiKeyLicensedDataUtils.UNLICENSED_SPLICE_SCORES_DATA);

        List> cellBaseDataResults = new ArrayList<>(variants.size());
        if (ApiKeyLicensedDataUtils.needFiltering(validSources, ApiKeyLicensedDataUtils.LICENSED_SPLICE_SCORES_DATA)) {
            for (Variant variant : variants) {
                cellBaseDataResults.add(ApiKeyLicensedDataUtils.filterDataSources(spliceDBAdaptor.getScores(variant.getChromosome(),
                        variant.getStart(), variant.getReference(), variant.getAlternate(), dataRelease), validSources));
            }
        } else {
            for (Variant variant : variants) {
                cellBaseDataResults.add(spliceDBAdaptor.getScores(variant.getChromosome(), variant.getStart(), variant.getReference(),
                        variant.getAlternate(), dataRelease));
            }
        }
        return cellBaseDataResults;
    }

    public CellBaseDataResult getFunctionalScoreRegion(List regions, CellBaseQueryOptions options,
                                                                           int dataRelease)
            throws CellBaseException {
        Set chunkIdSet = new HashSet<>();
        for (Region region : regions) {
            chunkIdSet.addAll(variantDBAdaptor.getFunctionalScoreChunkIds(region));
        }

        return variantDBAdaptor.getFunctionalScoreRegion(new ArrayList<>(chunkIdSet), options, dataRelease);
    }

    public CellBaseDataResult searchSnp(SnpQuery query) throws CellBaseException {
        return snpDBAdaptor.query(query);
    }

    public CellBaseDataResult startsWithSnp(String id, QueryOptions options, int dataRelease) throws CellBaseException {
        return snpDBAdaptor.startsWith(id, options, dataRelease);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy