talake-spark3_2.12.14.9.0.source-code.reference_kf.conf Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datalake-spark3_2.12 Show documentation
Show all versions of datalake-spark3_2.12 Show documentation
Library built on top of Apache Spark to speed-up data lakes development..
datalake {
args=[]
sources=[
{
format=VCF
id="raw_clinvar"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/clinvar/clinvar.vcf.gz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=VCF
id="raw_dbsnp"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/dbsnp/GCF_000001405.40.gz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=VCF
id="raw_gnomad_genomes_v3"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/release/3.1/vcf/genomes/gnomad.genomes.v3.1.sites.chr[^M]*.vcf.bgz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid=gnomad
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_gnomad_constraint_v2_1_1"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/gnomad_v2_1_1/gnomad.v2.1.1.lof_metrics.by_gene.txt.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=VCF
id="raw_topmed_bravo"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/topmed/bravo-dbsnp-*.vcf.gz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=VCF
id="raw_1000_genomes"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/1000Genomes/ALL.*.sites.vcf.gz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_dbnsfp"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/dbNSFP/dbNSFP4.3a_variant.chr*.gz"
readoptions {
header="true"
"nullValue"="."
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_dbnsfp_annovar"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/annovar/dbNSFP/hg38_dbnsfp41a.txt"
readoptions {
header="true"
"nullValue"="."
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_omim_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/omim/genemap2.txt"
readoptions {
comment="#"
header="false"
inferSchema="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=XML
id="raw_orphanet_gene_association"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/orphanet/en_product6.xml"
readoptions {}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=XML
id="raw_orphanet_disease_history"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/orphanet/en_product9_ages.xml"
readoptions {}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_cosmic_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/cosmic/Cosmic_CancerGeneCensus_GRCh38.tsv.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_cosmic_mutation_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/cosmic/cmc_export.tsv.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_ddd_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/ddd/DDG2P.csv.gz"
readoptions {
header="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_hpo_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/hpo/genes_to_phenotype.txt"
readoptions {
comment="#"
header="false"
inferSchema="true"
"nullValue"="-"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_refseq_human_genes"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/refseq/Homo_sapiens.gene_info.gz"
readoptions {
header="true"
inferSchema="true"
"nullValue"="-"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=GFF
id="raw_refseq_annotation"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/refseq/GCF_GRCh38_genomic.gff.gz"
readoptions {}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_ensembl_canonical"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/ensembl/Homo_sapiens.GRCh38.canonical.tsv.gz"
readoptions {
header="false"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_ensembl_entrez"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/ensembl/Homo_sapiens.GRCh38.entrez.tsv.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_ensembl_refseq"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/ensembl/Homo_sapiens.GRCh38.refseq.tsv.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_ensembl_uniprot"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/ensembl/Homo_sapiens.GRCh38.uniprot.tsv.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=CSV
id="raw_ensembl_ena"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/ensembl/Homo_sapiens.GRCh38.ena.tsv.gz"
readoptions {
header="true"
sep="\t"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=VCF
id="raw_spliceai_indel"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/spliceai/spliceai_scores.raw.indel.hg38.vcf.gz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=VCF
id="raw_spliceai_snv"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/raw/landing/spliceai/spliceai_scores.raw.snv.hg38.vcf.gz"
readoptions {
flattenInfoFields="true"
"split_multiallelics"="true"
}
storageid="public_database"
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_1000_genomes"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/1000_genomes"
readoptions {}
storageid="public_database"
table {
database=variant
name="1000_genomes"
}
view {
database="variant_live"
name="variant_live"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_cancer_hotspots"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/cancer_hotspots"
readoptions {}
storageid="public_database"
table {
database=variant
name="cancer_hotspots"
}
view {
database="variant_live"
name="cancer_hotspots"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_clinvar"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/clinvar"
readoptions {}
repartition {
kind=Coalesce
n=1
}
storageid="public_database"
table {
database=variant
name=clinvar
}
view {
database="variant_live"
name=clinvar
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_cosmic_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/cosmic_gene_set"
readoptions {}
storageid="public_database"
table {
database=variant
name="cosmic_gene_set"
}
view {
database="variant_live"
name="cosmic_gene_set"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_cosmic_mutation_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/cosmic_mutation_set"
readoptions {}
storageid="public_database"
table {
database=variant
name="cosmic_mutation_set"
}
view {
database="variant_live"
name="cosmic_mutation_set"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_dbnsfp"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/dbnsfp/variant"
readoptions {}
storageid="public_database"
table {
database=variant
name=dbnsfp
}
view {
database="variant_live"
name=dbnsfp
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_dbnsfp_annovar"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/annovar/dbnsfp"
readoptions {}
storageid="public_database"
table {
database=variant
name="dbnsfp_annovar"
}
view {
database="variant_live"
name="dbnsfp_annovar"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_dbsnp"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/dbsnp"
readoptions {}
storageid="public_database"
table {
database=variant
name=dbsnp
}
view {
database="variant_live"
name=dbsnp
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_ddd_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/ddd_gene_set"
readoptions {}
storageid="public_database"
table {
database=variant
name="ddd_gene_set"
}
view {
database="variant_live"
name="ddd_gene_set"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_ensembl_mapping"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/ensembl_mapping"
readoptions {}
repartition {
kind=Coalesce
n=1
}
storageid="public_database"
table {
database=variant
name="ensembl_mapping"
}
view {
database="variant_live"
name="ensembl_mapping"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_gnomad_genomes_v2_1_1"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/gnomad_genomes_v2_1_1_liftover_grch38"
readoptions {}
storageid="public_database"
table {
database=variant
name="gnomad_genomes_v2_1_1"
}
view {
database="variant_live"
name="gnomad_genomes_v2_1_1"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_gnomad_exomes_v2_1_1"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/gnomad_exomes_v2_1_1_liftover_grch38"
readoptions {}
storageid="public_database"
table {
database=variant
name="gnomad_exomes_v2_1_1"
}
view {
database="variant_live"
name="gnomad_exomes_v2_1_1"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_gnomad_constraint_v2_1_1"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/gnomad_constraint_v2_1_1"
readoptions {}
storageid="public_database"
table {
database=variant
name="gnomad_constraint_v_2_1_1"
}
view {
database="variant_live"
name="gnomad_constraint_v_2_1_1"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_gnomad_genomes_v3"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/gnomad_genomes_v3"
readoptions {}
storageid="public_database"
table {
database=variant
name="gnomad_genomes_v3"
}
view {
database="variant_live"
name="gnomad_genomes_v3"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_human_genes"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/human_genes"
readoptions {}
storageid="public_database"
table {
database=variant
name="human_genes"
}
view {
database="variant_live"
name="human_genes"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_hpo_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/hpo_gene_set"
readoptions {}
storageid="public_database"
table {
database=variant
name="hpo_gene_set"
}
view {
database="variant_live"
name="hpo_gene_set"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_omim_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/omim_gene_set"
readoptions {}
storageid="public_database"
table {
database=variant
name="omim_gene_set"
}
view {
database="variant_live"
name="omim_gene_set"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_orphanet_gene_set"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/orphanet_gene_set"
readoptions {}
storageid="public_database"
table {
database=variant
name="orphanet_gene_set"
}
view {
database="variant_live"
name="orphanet_gene_set"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_topmed_bravo"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/topmed_bravo"
readoptions {}
storageid="public_database"
table {
database=variant
name="topmed_bravo"
}
view {
database="variant_live"
name="topmed_bravo"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_refseq_annotation"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/refseq_annotation"
readoptions {}
storageid="public_database"
table {
database=variant
name="refseq_annotation"
}
view {
database="variant_live"
name="refseq_annotation"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_spliceai_indel"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/spliceai/indel"
readoptions {}
storageid="public_database"
table {
database=variant
name="spliceai_indel"
}
view {
database="variant_live"
name="spliceai_indel"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_spliceai_snv"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/spliceai/snv"
readoptions {}
storageid="public_database"
table {
database=variant
name="spliceai_snv"
}
view {
database="variant_live"
name="spliceai_snv"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_genes"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/public/genes"
readoptions {}
storageid="public_database"
table {
database=variant
name=genes
}
view {
database="variant_live"
name=genes
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_dbnsfp"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/dbnsfp/scores"
readoptions {}
storageid="public_database"
table {
database=variant
name="dbnsfp_original"
}
view {
database="variant_live"
name="dbnsfp_original"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_spliceai_indel"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/spliceai/enriched/indel"
readoptions {}
repartition {
column-names=[
chromosome,
start
]
kind=RepartitionByRange
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name="spliceai_enriched_indel"
}
view {
database="variant_live"
name="spliceai_enriched_indel"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_spliceai_snv"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/public/spliceai/enriched/snv"
readoptions {}
repartition {
column-names=[
chromosome,
start
]
kind=RepartitionByRange
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name="spliceai_enriched_snv"
}
view {
database="variant_live"
name="spliceai_enriched_snv"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_rare_variant"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome,
"is_rare"
]
path="/public/rare_variant/enriched"
readoptions {}
storageid="public_database"
table {
database=variant
name="rare_variant_enriched"
}
view {
database="variant_live"
name="rare_variant_enriched"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="normalized_consequences"
keys=[
chromosome,
start,
reference,
alternate,
"ensembl_transcript_id"
]
loadtype=Scd1
partitionby=[
chromosome
]
path="/normalized/consequences"
readoptions {}
storageid="public_database"
table {
database=variant
name="normalized_consequences"
}
view {
database="variant_live"
name="normalized_consequences"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_consequences"
keys=[
chromosome,
start,
reference,
alternate,
"ensembl_transcript_id"
]
loadtype=Scd1
partitionby=[
chromosome
]
path="/enriched/consequences"
readoptions {}
repartition {
column-names=[
chromosome,
start
]
kind=RepartitionByRange
n=100
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name=consequences
}
view {
database="variant_live"
name=consequences
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=DELTA
id="enriched_variants"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/enriched/variants"
readoptions {}
repartition {
column-names=[
chromosome,
start
]
kind=RepartitionByRange
n=100
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name=variants
}
view {
database="variant_live"
name=variants
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=PARQUET
id="es_index_variant_centric"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/es_index/variant_centric"
readoptions {}
repartition {
column-names=[
chromosome
]
kind=RepartitionByColumns
n=100
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name="variant_centric"
}
view {
database="variant_live"
name="variant_centric"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=PARQUET
id="es_index_variant_suggestions"
keys=[]
loadtype=OverWrite
partitionby=[
chromosome
]
path="/es_index/variant_suggestions"
readoptions {}
repartition {
column-names=[
chromosome
]
kind=RepartitionByColumns
n=100
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name="variant_suggestions"
}
view {
database="variant_live"
name="variant_suggestions"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=PARQUET
id="es_index_gene_centric"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/es_index/gene_centric"
readoptions {}
repartition {
kind=FixedRepartition
n=20
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name="gene_centric"
}
view {
database="variant_live"
name="gene_centric"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
},
{
format=PARQUET
id="es_index_gene_suggestions"
keys=[]
loadtype=OverWrite
partitionby=[]
path="/es_index/gene_suggestions"
readoptions {}
repartition {
kind=FixedRepartition
n=20
sort-columns=[]
}
storageid="public_database"
table {
database=variant
name="gene_suggestions"
}
view {
database="variant_live"
name="gene_suggestions"
}
writeoptions {
"created_on_column"="created_on"
"is_current_column"="is_current"
"updated_on_column"="updated_on"
"valid_from_column"="valid_from"
"valid_to_column"="valid_to"
}
}
]
sparkconf {
"hive.metastore.client.factory.class"="com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
}
storages=[
{
filesystem=S3
id="public_database"
path="s3a://kf-strides-variant-parquet-prd"
},
{
filesystem=S3
id=gnomad
path="s3a://gnomad-public-us-east-1"
}
]
}