All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.cellbase.lib.managers.VariantManager Maven / Gradle / Ivy

There is a newer version: 6.3.0
Show newest version
/*
 * Copyright 2015-2020 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.cellbase.lib.managers;

import org.opencb.biodata.models.core.Gene;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.core.SpliceScore;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantBuilder;
import org.opencb.biodata.models.variant.avro.SampleEntry;
import org.opencb.biodata.models.variant.avro.Score;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.cellbase.core.ParamConstants;
import org.opencb.cellbase.core.api.VariantQuery;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.core.variant.AnnotationBasedPhasedQueryManager;
import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor;
import org.opencb.cellbase.lib.impl.core.SpliceScoreMongoDBAdaptor;
import org.opencb.cellbase.lib.impl.core.VariantMongoDBAdaptor;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator;
import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

public class VariantManager extends AbstractManager implements AggregationApi {

    private static final String PHASE_DATA_URL_SEPARATOR = "\\+";
    private static final String VARIANT_STRING_FORMAT = "(chr)"
            + ":[(cipos_left)<](start)[<(cipos_right)]" + "[-[(ciend_left)<](end)[<(ciend_right)]]"
            + "[:(ref)]"
            + ":[(alt)|(left_ins_seq)...(right_ins_seq)]";
    private VariantMongoDBAdaptor variantDBAdaptor;
    private SpliceScoreMongoDBAdaptor spliceDBAdaptor;

    private CellBaseManagerFactory cellbaseManagerFactory;
    private GenomeManager genomeManager;

    public VariantManager(String species, CellBaseConfiguration configuration) throws CellBaseException {
        this(species, null, configuration);
    }

    public VariantManager(String species, String assembly, CellBaseConfiguration configuration)
            throws CellBaseException {
        super(species, assembly, configuration);

        this.init();
    }

    private void init() throws CellBaseException {
        variantDBAdaptor = dbAdaptorFactory.getVariationDBAdaptor();
        spliceDBAdaptor = dbAdaptorFactory.getSpliceScoreDBAdaptor();
        cellbaseManagerFactory = new CellBaseManagerFactory(configuration);
        genomeManager = cellbaseManagerFactory.getGenomeManager(species, assembly);
    }

    @Override
    public CellBaseCoreDBAdaptor getDBAdaptor() {
        return variantDBAdaptor;
    }

    public CellBaseDataResult get(Query query, QueryOptions queryOptions, int dataRelease) throws CellBaseException {
        return variantDBAdaptor.nativeGet(query, queryOptions, dataRelease);
    }

    public List> getHgvsByVariant(String variants, int dataRelease)
            throws CellBaseException, QueryException, IllegalAccessException {
        List variantList = parseVariants(variants);
        HgvsCalculator hgvsCalculator = new HgvsCalculator(genomeManager, dataRelease);
        List> results = new ArrayList<>();
        VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
                dataRelease, cellbaseManagerFactory);
        List batchGeneList = variantAnnotationCalculator.getBatchGeneList(variantList);
        for (Variant variant : variantList) {
            List variantGeneList = variantAnnotationCalculator.getAffectedGenes(batchGeneList, variant);
            List hgvsStrings = hgvsCalculator.run(variant, variantGeneList, false);
            results.add(new CellBaseDataResult<>(variant.getId(), 0, new ArrayList<>(), hgvsStrings.size(), hgvsStrings, -1));
        }
        return results;
    }

    /**
     * Normalises a list of variants.
     *
     * @param variants list of variant strings
     * @param dataRelease data release
     * @return list of normalised variants
     * @throws CellBaseException if the species is incorrect
     */
    public CellBaseDataResult getNormalizationByVariant(String variants, int dataRelease) throws CellBaseException {
        List variantList = parseVariants(variants);
        VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
                dataRelease, cellbaseManagerFactory);
        List normalisedVariants = variantAnnotationCalculator.normalizer(variantList);
        return new CellBaseDataResult<>(variants, 0, new ArrayList<>(), normalisedVariants.size(), normalisedVariants, -1);
    }

    public List> getAnnotationByVariant(QueryOptions queryOptions,
                                                                              String variants,
                                                                              Boolean normalize,
                                                                              Boolean skipDecompose,
                                                                              Boolean ignorePhase,
                                                                              @Deprecated Boolean phased,
                                                                              Boolean imprecise,
                                                                              Integer svExtraPadding,
                                                                              Integer cnvExtraPadding,
                                                                              Boolean checkAminoAcidChange,
                                                                              String consequenceTypeSource,
                                                                              String enable,
                                                                              int dataRelease)
            throws ExecutionException, InterruptedException, CellBaseException, QueryException, IllegalAccessException {
        List variantList = parseVariants(variants);
        logger.debug("queryOptions: " + queryOptions);

        // If ignorePhase (new parameter) is present, then overrides presence of "phased"
        if (ignorePhase != null) {
            queryOptions.put("ignorePhase", ignorePhase);
            // If the new parameter (ignorePhase) is not present but old one ("phased") is, then follow old one - probably
            // someone who has not moved to the new parameter yet
        } else if (phased != null) {
            queryOptions.put("ignorePhase", !phased);
            // Default behavior is to perform phased annotation
        } else {
            queryOptions.put("ignorePhase", false);
        }

        if (normalize != null) {
            queryOptions.put("normalize", normalize);
        }
        if (skipDecompose != null) {
            queryOptions.put("skipDecompose", skipDecompose);
        }
        if (imprecise != null) {
            queryOptions.put("imprecise", imprecise);
        }
        if (svExtraPadding != null) {
            queryOptions.put("svExtraPadding", svExtraPadding);
        }
        if (cnvExtraPadding != null) {
            queryOptions.put("cnvExtraPadding", cnvExtraPadding);
        }
        if (checkAminoAcidChange != null) {
            queryOptions.put("checkAminoAcidChange", checkAminoAcidChange);
        }
        if (consequenceTypeSource != null) {
            queryOptions.put("consequenceTypeSource", consequenceTypeSource);
        }
        if (enable != null) {
            queryOptions.put("enable", enable);
        }

        VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
                dataRelease, cellbaseManagerFactory);
        List> queryResults =
                variantAnnotationCalculator.getAnnotationByVariantList(variantList, queryOptions);
        return queryResults;
    }

    private List parseVariants(String variantsString) {
        List variants = null;
        if (variantsString != null && !variantsString.isEmpty()) {
            String[] variantItems = variantsString.split(",");
            variants = new ArrayList<>(variantItems.length);

            for (String variantString: variantItems) {
                variants.add(parseVariant(variantString));
            }
        }
        return variants;
    }

    private Variant parseVariant(String variantString) {
        String[] variantStringPartArray = variantString.split(PHASE_DATA_URL_SEPARATOR);

        VariantBuilder variantBuilder;
        if (variantStringPartArray.length > 0) {
            variantBuilder = new VariantBuilder(variantStringPartArray[0]);
            // Either 1 or 3 parts expected variant+GT+PS
            if (variantStringPartArray.length == 3) {
                List formatList = new ArrayList<>(2);
                // If phase set tag is not provided not phase data is added at all to the Variant object
                if (!variantStringPartArray[2].isEmpty()) {
                    formatList.add(AnnotationBasedPhasedQueryManager.PHASE_SET_TAG);
                    List sampleData = new ArrayList<>(2);
                    sampleData.add(variantStringPartArray[2]);
                    // Genotype field might be empty - just PS would be added to Variant object in that case
                    if (!variantStringPartArray[1].isEmpty()) {
                        formatList.add(AnnotationBasedPhasedQueryManager.GENOTYPE_TAG);
                        sampleData.add(variantStringPartArray[1]);
                    }
                    variantBuilder.setSampleDataKeys(formatList);
                    SampleEntry sampleEntry = new SampleEntry();
                    sampleEntry.setData(sampleData);
                    variantBuilder.setSamples(Collections.singletonList(sampleEntry));
                }
            } else if (variantStringPartArray.length > 3) {
                throw new IllegalArgumentException("Malformed variant string " + variantString + ". "
                        + "variantString+GT+PS expected, where variantString needs 3 or 4 fields separated by ':'. "
                        + "Format: \"" + VARIANT_STRING_FORMAT + "\"");
            }
        } else {
            throw new IllegalArgumentException("Malformed variant string " + variantString + ". "
                    + "variantString+GT+PS expected, where variantString needs 3 or 4 fields separated by ':'. "
                    + "Format: \"" + VARIANT_STRING_FORMAT + "\"");
        }

        return variantBuilder.build();
    }

    public CellBaseDataResult getConsequenceTypes() {
        List consequenceTypes = VariantAnnotationUtils.SO_SEVERITY.keySet().stream()
                .sorted()
                .collect(Collectors.toList());
        CellBaseDataResult queryResult = new CellBaseDataResult<>("consequence_types");
        queryResult.setNumResults(consequenceTypes.size());
        queryResult.setResults(consequenceTypes);
        return queryResult;
    }

    public boolean validateRegionInput(String regions) {
        List regionList = Region.parseRegions(regions);
        // check for regions bigger than 10Mb
        if (regionList != null) {
            for (Region r : regionList) {
                if ((r.getEnd() - r.getStart()) > 10000000) {
                    return false;
                }
            }
        }
        return true;
    }

    @Deprecated
    public List getByRegion(Query query, QueryOptions queryOptions, String regions, int dataRelease) {
        query.put(ParamConstants.QueryParams.REGION.key(), regions);
        logger.debug("query = " + query.toJson());
        logger.debug("queryOptions = " + queryOptions.toJson());
        List queries = createQueries(query, regions, ParamConstants.QueryParams.REGION.key());
        List queryResults = variantDBAdaptor.nativeGet(queries, queryOptions, dataRelease);
        for (int i = 0; i < queries.size(); i++) {
            queryResults.get(i).setId((String) queries.get(i).get(ParamConstants.QueryParams.REGION.key()));
        }
        return queryResults;
    }

    public CellBaseDataResult getFunctionalScoreVariant(Variant variant, QueryOptions queryOptions, int dataRelease)
            throws CellBaseException {
        return variantDBAdaptor.getFunctionalScoreVariant(variant, queryOptions, dataRelease);
    }

    public List> getFunctionalScoreVariant(List variants, QueryOptions options, int dataRelease)
            throws CellBaseException {
        List> cellBaseDataResults = new ArrayList<>(variants.size());
        for (Variant variant: variants) {
            if (variant.getType() == VariantType.SNV) {
                cellBaseDataResults.add(getFunctionalScoreVariant(variant, options, dataRelease));
            } else {
                cellBaseDataResults.add(new CellBaseDataResult<>(variant.toString(), 0, Collections.emptyList(), 0));
            }
        }
        return cellBaseDataResults;
    }

    public List> getPopulationFrequencyByVariant(List variants, QueryOptions queryOptions,
                                                                             int dataRelease) throws CellBaseException {
        return variantDBAdaptor.getPopulationFrequencyByVariant(variants, queryOptions, dataRelease);
    }

    public CellBaseDataResult getSpliceScoreVariant(Variant variant, int dataRelease) throws CellBaseException {
        return spliceDBAdaptor.getScores(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate(),
                dataRelease);
    }

    public List> getSpliceScoreVariant(List variants, int dataRelease) throws CellBaseException {
        List> cellBaseDataResults = new ArrayList<>(variants.size());
        for (Variant variant: variants) {
            cellBaseDataResults.add(getSpliceScoreVariant(variant, dataRelease));
        }
        return cellBaseDataResults;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy