All Downloads are FREE. Search and download functionalities are using the official Maven repository.

talake-spark3_2.12.14.9.0.source-code.reference_kf.conf Maven / Gradle / Ivy

There is a newer version: 14.8.0
Show newest version
datalake {
    args=[]
    sources=[
        {
            format=VCF
            id="raw_clinvar"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/clinvar/clinvar.vcf.gz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=VCF
            id="raw_dbsnp"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/dbsnp/GCF_000001405.40.gz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=VCF
            id="raw_gnomad_genomes_v3"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/release/3.1/vcf/genomes/gnomad.genomes.v3.1.sites.chr[^M]*.vcf.bgz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid=gnomad
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_gnomad_constraint_v2_1_1"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/gnomad_v2_1_1/gnomad.v2.1.1.lof_metrics.by_gene.txt.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=VCF
            id="raw_topmed_bravo"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/topmed/bravo-dbsnp-*.vcf.gz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=VCF
            id="raw_1000_genomes"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/1000Genomes/ALL.*.sites.vcf.gz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_dbnsfp"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/dbNSFP/dbNSFP4.3a_variant.chr*.gz"
            readoptions {
                header="true"
                "nullValue"="."
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_dbnsfp_annovar"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/annovar/dbNSFP/hg38_dbnsfp41a.txt"
            readoptions {
                header="true"
                "nullValue"="."
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_omim_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/omim/genemap2.txt"
            readoptions {
                comment="#"
                header="false"
                inferSchema="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=XML
            id="raw_orphanet_gene_association"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/orphanet/en_product6.xml"
            readoptions {}
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=XML
            id="raw_orphanet_disease_history"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/orphanet/en_product9_ages.xml"
            readoptions {}
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_cosmic_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/cosmic/Cosmic_CancerGeneCensus_GRCh38.tsv.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_cosmic_mutation_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/cosmic/cmc_export.tsv.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_ddd_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/ddd/DDG2P.csv.gz"
            readoptions {
                header="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_hpo_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/hpo/genes_to_phenotype.txt"
            readoptions {
                comment="#"
                header="false"
                inferSchema="true"
                "nullValue"="-"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_refseq_human_genes"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/refseq/Homo_sapiens.gene_info.gz"
            readoptions {
                header="true"
                inferSchema="true"
                "nullValue"="-"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=GFF
            id="raw_refseq_annotation"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/refseq/GCF_GRCh38_genomic.gff.gz"
            readoptions {}
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_ensembl_canonical"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/ensembl/Homo_sapiens.GRCh38.canonical.tsv.gz"
            readoptions {
                header="false"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_ensembl_entrez"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/ensembl/Homo_sapiens.GRCh38.entrez.tsv.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_ensembl_refseq"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/ensembl/Homo_sapiens.GRCh38.refseq.tsv.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_ensembl_uniprot"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/ensembl/Homo_sapiens.GRCh38.uniprot.tsv.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=CSV
            id="raw_ensembl_ena"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/ensembl/Homo_sapiens.GRCh38.ena.tsv.gz"
            readoptions {
                header="true"
                sep="\t"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=VCF
            id="raw_spliceai_indel"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/spliceai/spliceai_scores.raw.indel.hg38.vcf.gz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=VCF
            id="raw_spliceai_snv"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/raw/landing/spliceai/spliceai_scores.raw.snv.hg38.vcf.gz"
            readoptions {
                flattenInfoFields="true"
                "split_multiallelics"="true"
            }
            storageid="public_database"
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_1000_genomes"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/1000_genomes"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="1000_genomes"
            }
            view {
                database="variant_live"
                name="variant_live"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_cancer_hotspots"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/cancer_hotspots"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="cancer_hotspots"
            }
            view {
                database="variant_live"
                name="cancer_hotspots"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_clinvar"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/clinvar"
            readoptions {}
            repartition {
                kind=Coalesce
                n=1
            }
            storageid="public_database"
            table {
                database=variant
                name=clinvar
            }
            view {
                database="variant_live"
                name=clinvar
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_cosmic_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/cosmic_gene_set"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="cosmic_gene_set"
            }
            view {
                database="variant_live"
                name="cosmic_gene_set"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_cosmic_mutation_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/cosmic_mutation_set"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="cosmic_mutation_set"
            }
            view {
                database="variant_live"
                name="cosmic_mutation_set"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_dbnsfp"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/dbnsfp/variant"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name=dbnsfp
            }
            view {
                database="variant_live"
                name=dbnsfp
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_dbnsfp_annovar"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/annovar/dbnsfp"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="dbnsfp_annovar"
            }
            view {
                database="variant_live"
                name="dbnsfp_annovar"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_dbsnp"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/dbsnp"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name=dbsnp
            }
            view {
                database="variant_live"
                name=dbsnp
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_ddd_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/ddd_gene_set"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="ddd_gene_set"
            }
            view {
                database="variant_live"
                name="ddd_gene_set"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_ensembl_mapping"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/ensembl_mapping"
            readoptions {}
            repartition {
                kind=Coalesce
                n=1
            }
            storageid="public_database"
            table {
                database=variant
                name="ensembl_mapping"
            }
            view {
                database="variant_live"
                name="ensembl_mapping"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_gnomad_genomes_v2_1_1"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/gnomad_genomes_v2_1_1_liftover_grch38"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="gnomad_genomes_v2_1_1"
            }
            view {
                database="variant_live"
                name="gnomad_genomes_v2_1_1"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_gnomad_exomes_v2_1_1"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/gnomad_exomes_v2_1_1_liftover_grch38"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="gnomad_exomes_v2_1_1"
            }
            view {
                database="variant_live"
                name="gnomad_exomes_v2_1_1"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_gnomad_constraint_v2_1_1"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/gnomad_constraint_v2_1_1"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="gnomad_constraint_v_2_1_1"
            }
            view {
                database="variant_live"
                name="gnomad_constraint_v_2_1_1"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_gnomad_genomes_v3"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/gnomad_genomes_v3"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="gnomad_genomes_v3"
            }
            view {
                database="variant_live"
                name="gnomad_genomes_v3"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_human_genes"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/human_genes"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="human_genes"
            }
            view {
                database="variant_live"
                name="human_genes"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_hpo_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/hpo_gene_set"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="hpo_gene_set"
            }
            view {
                database="variant_live"
                name="hpo_gene_set"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_omim_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/omim_gene_set"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="omim_gene_set"
            }
            view {
                database="variant_live"
                name="omim_gene_set"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_orphanet_gene_set"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/orphanet_gene_set"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="orphanet_gene_set"
            }
            view {
                database="variant_live"
                name="orphanet_gene_set"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_topmed_bravo"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/topmed_bravo"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="topmed_bravo"
            }
            view {
                database="variant_live"
                name="topmed_bravo"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_refseq_annotation"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/refseq_annotation"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="refseq_annotation"
            }
            view {
                database="variant_live"
                name="refseq_annotation"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_spliceai_indel"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/spliceai/indel"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="spliceai_indel"
            }
            view {
                database="variant_live"
                name="spliceai_indel"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_spliceai_snv"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/spliceai/snv"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="spliceai_snv"
            }
            view {
                database="variant_live"
                name="spliceai_snv"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_genes"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/public/genes"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name=genes
            }
            view {
                database="variant_live"
                name=genes
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_dbnsfp"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/dbnsfp/scores"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="dbnsfp_original"
            }
            view {
                database="variant_live"
                name="dbnsfp_original"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_spliceai_indel"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/spliceai/enriched/indel"
            readoptions {}
            repartition {
                column-names=[
                    chromosome,
                    start
                ]
                kind=RepartitionByRange
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name="spliceai_enriched_indel"
            }
            view {
                database="variant_live"
                name="spliceai_enriched_indel"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_spliceai_snv"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/public/spliceai/enriched/snv"
            readoptions {}
            repartition {
                column-names=[
                    chromosome,
                    start
                ]
                kind=RepartitionByRange
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name="spliceai_enriched_snv"
            }
            view {
                database="variant_live"
                name="spliceai_enriched_snv"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_rare_variant"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome,
                "is_rare"
            ]
            path="/public/rare_variant/enriched"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="rare_variant_enriched"
            }
            view {
                database="variant_live"
                name="rare_variant_enriched"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="normalized_consequences"
            keys=[
                chromosome,
                start,
                reference,
                alternate,
                "ensembl_transcript_id"
            ]
            loadtype=Scd1
            partitionby=[
                chromosome
            ]
            path="/normalized/consequences"
            readoptions {}
            storageid="public_database"
            table {
                database=variant
                name="normalized_consequences"
            }
            view {
                database="variant_live"
                name="normalized_consequences"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_consequences"
            keys=[
                chromosome,
                start,
                reference,
                alternate,
                "ensembl_transcript_id"
            ]
            loadtype=Scd1
            partitionby=[
                chromosome
            ]
            path="/enriched/consequences"
            readoptions {}
            repartition {
                column-names=[
                    chromosome,
                    start
                ]
                kind=RepartitionByRange
                n=100
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name=consequences
            }
            view {
                database="variant_live"
                name=consequences
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=DELTA
            id="enriched_variants"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/enriched/variants"
            readoptions {}
            repartition {
                column-names=[
                    chromosome,
                    start
                ]
                kind=RepartitionByRange
                n=100
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name=variants
            }
            view {
                database="variant_live"
                name=variants
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=PARQUET
            id="es_index_variant_centric"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/es_index/variant_centric"
            readoptions {}
            repartition {
                column-names=[
                    chromosome
                ]
                kind=RepartitionByColumns
                n=100
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name="variant_centric"
            }
            view {
                database="variant_live"
                name="variant_centric"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=PARQUET
            id="es_index_variant_suggestions"
            keys=[]
            loadtype=OverWrite
            partitionby=[
                chromosome
            ]
            path="/es_index/variant_suggestions"
            readoptions {}
            repartition {
                column-names=[
                    chromosome
                ]
                kind=RepartitionByColumns
                n=100
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name="variant_suggestions"
            }
            view {
                database="variant_live"
                name="variant_suggestions"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=PARQUET
            id="es_index_gene_centric"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/es_index/gene_centric"
            readoptions {}
            repartition {
                kind=FixedRepartition
                n=20
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name="gene_centric"
            }
            view {
                database="variant_live"
                name="gene_centric"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        },
        {
            format=PARQUET
            id="es_index_gene_suggestions"
            keys=[]
            loadtype=OverWrite
            partitionby=[]
            path="/es_index/gene_suggestions"
            readoptions {}
            repartition {
                kind=FixedRepartition
                n=20
                sort-columns=[]
            }
            storageid="public_database"
            table {
                database=variant
                name="gene_suggestions"
            }
            view {
                database="variant_live"
                name="gene_suggestions"
            }
            writeoptions {
                "created_on_column"="created_on"
                "is_current_column"="is_current"
                "updated_on_column"="updated_on"
                "valid_from_column"="valid_from"
                "valid_to_column"="valid_to"
            }
        }
    ]
    sparkconf {
        "hive.metastore.client.factory.class"="com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
    }
    storages=[
        {
            filesystem=S3
            id="public_database"
            path="s3a://kf-strides-variant-parquet-prd"
        },
        {
            filesystem=S3
            id=gnomad
            path="s3a://gnomad-public-us-east-1"
        }
    ]
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy