All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eqtlmappingpipeline.util.QTLAnnotator Maven / Gradle / Ivy

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package eqtlmappingpipeline.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.regex.Pattern;
import umcg.genetica.io.gwascatalog.GWASCatalog;
import umcg.genetica.io.gwascatalog.GWASSNP;
import umcg.genetica.io.gwascatalog.GWASTrait;
import umcg.genetica.io.probemapping.reading;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.EQTL;
import umcg.genetica.io.trityper.QTLTextFile;

/**
 *
 * @author MarcJan
 */
class QTLAnnotator {

    public static void main(String[] args) throws IOException {
//        addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt",
//                "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Great_GeneAssociations_20141127-public-2.0.2-aaQ50i-hg19-all-region_Ensembl.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt",
//                "1;10-11;0;1;1;4-10", "probe;probe;probe", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt-ExtendedInfoTesting.txt");

//        addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Cis_Pc22c_meQTLs\\Primary\\eQTLProbesFDR0.05-ProbeLevel_ldDrivenEffectsRemoved.txt",
//                "D:\\UMCG\\GWAS_Catalog\\GWAS-Catalog-02092014.txt;D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_2MB_TSS_extendedCis_eQTMs\\eQTLSNPsFDR0.05-SNPLevel.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Normal\\RP3_2Mb_TSSextendedCis_eQTMs\\eQTLSNPsFDR0.05-SNPLevel.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_2MB_TSS_extendedCis_eQTMs\\eQTLSNPsFDR0.05-SNPLevel.txt",
//                "1;1;1;10;1;11;1;4;1;4;1;4", "snp;probe;probe;probe;probe;probe", "D:\\UMCG\\ProbeMapping\\Info\\V70\\gencode.v15.annotation.gtf.gz",
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Cis_Pc22c_meQTLs\\Primary\\eQTLProbesFDR0.05-ProbeLevel_ldDrivenEffectsRemoved-ExtendedInfo.txt");

//        addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt",
//                "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", 
//                "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt-ExtendedInfo5.txt");
        
//            addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt",
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt",  
//                "0;8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-69-70-71-72-73-74-75-76-77-78-79-80-81-82-83-84-85-86-87-88-89-90-91-92-93-94-95-96-97-98-99-100-101-102-103-104-105-106-107-108-109-110-111-112-113-114-115-116-117-118-119-120-121-122-123-124-125-126-127-128-129-130-131-132-133-134-135-136-137-138-139-140-141-142-143-144-145-146-147-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243-244-245-246-247-248-249-250-251-252-253-254-255-256-257-258-259-260-261-262-263-264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346-347-348-349-350-351-352-353-354-355-356-357;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfTMP.txt");
//        
//            addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt",
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10_13BM.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt",  
//                "0;8-9-10-11-12-13-14-15-16-17-18-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo13BM.txt");
        
            addAnnotationToQTLOutput(
                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_eQTMs_062015\\eQTLsFDR0.05-SNPLevel.txt",
                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt",  
                "0;8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-69-70-71-72-73-74-75-76-77-78-79-80-81-82-83-84-85-86-87-88-89-90-91-92-93-94-95-96-97-98-99-100-101-102-103-104-105-106-107-108-109-110-111-112-113-114-115-116-117-118-119-120-121-122-123-124-125-126-127-128-129-130-131-132-133-134-135-136-137-138-139-140-141-142-143-144-145-146-147-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243-244-245-246-247-248-249-250-251-252-253-254-255-256-257-258-259-260-261-262-263-264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346-347-348-349-350-351-352-353-354-355-356-357;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null,
                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_eQTMs_062015\\eQTLsFDR0.05-SNPLevel.txt_ExtendedInfo.txt");
        
//            addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Artificial_eQTMs0.0_Stringent.txt",
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt",  
//                "0;8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-69-70-71-72-73-74-75-76-77-78-79-80-81-82-83-84-85-86-87-88-89-90-91-92-93-94-95-96-97-98-99-100-101-102-103-104-105-106-107-108-109-110-111-112-113-114-115-116-117-118-119-120-121-122-123-124-125-126-127-128-129-130-131-132-133-134-135-136-137-138-139-140-141-142-143-144-145-146-147-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243-244-245-246-247-248-249-250-251-252-253-254-255-256-257-258-259-260-261-262-263-264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346-347-348-349-350-351-352-353-354-355-356-357;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Artificial_eQTMs0.0_Stringent_ExtendedInfo.txt");

        
//        addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt",
//                "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;",  
//                "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;probe;snp;snp;snp", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt");
        
//        addAnnotationToQTLOutput(
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel.txt",
//                "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;",  
//                "1;8-9-10-11-12-13-14;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp;snp;snp", null,
//                "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo2.txt");
//        
        
    }

    static void addAnnotationToQTLOutput(String in, String sources, String keyValuePairs, String idsToAnnotate, String reannotateGene, String out) throws IOException {
        QTLTextFile e = new QTLTextFile(in, QTLTextFile.R);

        ArrayList qtls = e.readList();
        String extraHeaders = "";
        ArrayList extraAnnotation = new ArrayList(qtls.size());

        String[] sourceList = sources.split(";");
        String[] keyValueList = keyValuePairs.split(";");
        String[] annotationIdList = idsToAnnotate.split(";");

        if (sourceList.length * 2 != keyValueList.length) {
            System.out.println("Faulty key value match input.");
            System.exit(0);
        }
        
        
        boolean addTo = false;
        
        if (!(reannotateGene == null || reannotateGene.equals(""))) {
            extraHeaders += "\tEnsemble Gene ids (;)";
            HashMap gencodeGeneMapping = reading.readGTFAnnotationFileHash(reannotateGene, 50000);
            HashMap> genCodeMappingGenes = new HashMap>();
            for (Map.Entry t : gencodeGeneMapping.entrySet()) {
                if (t.getKey().contains("ENSG")) {
                    String[] newKeys;
                    if (t.getValue().contains(";")) {
                        newKeys = t.getValue().split(";");
                    } else {
                        newKeys = new String[1];
                        newKeys[0] = t.getValue();
                    }

                    for (String nKey : newKeys) {
                        if (genCodeMappingGenes.containsKey(nKey)) {
                            genCodeMappingGenes.get(nKey).add(t.getKey());
                        } else {
                            HashSet ensembleIds = new HashSet();
                            ensembleIds.add(t.getKey());
                            genCodeMappingGenes.put(nKey, ensembleIds);
                        }
                    }
                }
            }
            int id = 0;
            for (EQTL qtl : qtls) {
                qtl.setProbeHUGO(qtl.getProbeHUGO().replace("\"", ""));
                
                if (qtl.getProbeHUGO().contains(";")) {
                    String[] ts = qtl.getProbeHUGO().split(";");
                    HashSet setje = new HashSet();
                    for (String x : ts) {
                        if (genCodeMappingGenes.containsKey(x)) {
                            setje.addAll(genCodeMappingGenes.get(x));
                        }
                    }
                    if(setje.isEmpty()){
                        if (!addTo) {
                            extraAnnotation.add("-");
                        } else {
                            extraAnnotation.set(id, extraAnnotation.get(id) + "\t-" );
                        }
                    } else {
                        if (!addTo) {
                            extraAnnotation.add(setje.toString().replaceAll("\\[", "").replaceAll("\\]", "").replaceAll(", ", ";"));
                        } else {
                            extraAnnotation.set(id, extraAnnotation.get(id) + "\t" + setje.toString().replaceAll("\\[", "").replaceAll("\\]", "").replaceAll(", ", ";"));
                        }
                    }
                    
                } else {
                    if (genCodeMappingGenes.containsKey(qtl.getProbeHUGO())) {
                        if (!addTo) {
                            extraAnnotation.add(genCodeMappingGenes.get(qtl.getProbeHUGO()).toString().replaceAll("\\[", "").replaceAll("\\]", "").replaceAll(", ", ";"));
                        } else {
                            extraAnnotation.set(id, extraAnnotation.get(id) + "\t" + genCodeMappingGenes.get(qtl.getProbeHUGO()).toString().replaceAll("\\[", "").replaceAll("\\]", "").replaceAll(", ", ";"));
                        }
                    } else {
                        if (!addTo) {
                            extraAnnotation.add("-");
                        } else {
                            extraAnnotation.set(id, extraAnnotation.get(id) + "\t-" );
                        }
                    }
                }
                id++;
            }
        }

        int masterId = 0;
        for (String annotationSource : sourceList) {
            if (!extraAnnotation.isEmpty()) {
                addTo = true;
            }
            if (annotationSource.contains("GWAS") && annotationSource.contains("Catalog")) {

                GWASCatalog cat = new GWASCatalog(annotationSource);
                HashMap catSnps = cat.getSnpToObj();
                extraHeaders += "\tGWASTraits (;)\tRisk allels (;)\t P-value GWAS (;)";

                int id = 0;
                for (EQTL eQtl : qtls) {
                    String gwasTrait = "";
                    String gwasAllelle = "";
                    String gwasPval = "";
                    GWASSNP s = catSnps.get(eQtl.getRsName());
                    if (s != null) {
                        HashSet traits = s.getAssociatedTraits();

                        for (GWASTrait t : traits) {
                            if (!gwasTrait.equals("")) {
                                gwasTrait += ";";
                                gwasAllelle += ";";
                                gwasPval += ";";
                            }
                            gwasTrait += t.getCleanName();
                            gwasAllelle += s.getRiskAllele(t);
                            gwasPval += s.getPValueAssociatedWithTrait(t);

                        }
//                        System.out.println(gwasTrait + "\t" + gwasAllelle+ "\t" + gwasPval);
                        if (!addTo) {
                            extraAnnotation.add(gwasTrait + "\t" + gwasAllelle + "\t" + gwasPval);
                        } else {
                            extraAnnotation.set(id, extraAnnotation.get(id) + "\t" + gwasTrait + "\t" + gwasAllelle + "\t" + gwasPval);
                        }


                    } else {
//                        System.out.println("-\t-\t-");
                        if (!addTo) {
                            extraAnnotation.add("-\t-\t-");
                        } else {
                            extraAnnotation.set(id, extraAnnotation.get(id) + "\t-\t-\t-");
                        }

                    }
                    id++;
                }

            } else {
//                System.out.println(annotationSource);
//                System.out.println(masterId);
                int keyInt = 0;
                
                try {
                    keyInt = Integer.parseInt(keyValueList[(masterId * 2)]);
                } catch (NumberFormatException ex) {
                    System.out.println("Error parsing key: " + keyValueList[masterId * 2] + " for annotationsource:" + annotationSource);
                    System.exit(-1);
                }

                int[] valueInt = {0};
                try {
                    if(keyValueList[((masterId * 2) + 1)].contains("-")){
                        String[] tmpValues = keyValueList[((masterId * 2) + 1)].split("-");
                        valueInt = new int[tmpValues.length];
                        for(int i=0; i annotationFromSource = (HashMap) t.readAsHashMap(keyInt, valueInt[0]);

                    int id = 0;
                    for (EQTL eQtl : qtls) {

                        String newInfo;
                        if (annotationIdList[masterId].equalsIgnoreCase("probe")) {
                            newInfo = annotationFromSource.get(eQtl.getProbe());
                        } else {
                            newInfo = annotationFromSource.get(eQtl.getRsName());
                        }

                        if (newInfo != null) {
    //                        System.out.println(newInfo);
                            if (!addTo) {
                                extraAnnotation.add(newInfo);
                            } else {
                                extraAnnotation.set(id, extraAnnotation.get(id) + "\t" + newInfo);
                            }

                        } else {
                            System.out.println("Error, something wrong. Desired information is not pressent.");
                            System.out.println(annotationSource);
                            if (annotationIdList[masterId].equalsIgnoreCase("probe")) {
                                System.out.println(eQtl.getProbe());
                            } else {
                                System.out.println(eQtl.getRsName());
                            }
                            
//        System.exit(-1);
    //                        System.out.println("-");
                            if (!addTo) {
                                extraAnnotation.add("-");
                            } else {
                                
                                extraAnnotation.set(id, extraAnnotation.get(id) + "\t-");
                            }

                        }
                        id++;
                    }
                    if (!extraAnnotation.isEmpty()) {
                        addTo = true;
                    }
                    t.close();
                }  else if (valueInt.length>1) {
                    
                    TextFile t = new TextFile(annotationSource, TextFile.R);
                    String[] header = t.readLineElems(Pattern.compile("\t"));
                    for(int val : valueInt){
                        extraHeaders += "\t" +header[val];
                    }
                    

                    HashMap annotationFromSource = (HashMap) t.readAsHashMap(keyInt, valueInt, "\t");

                    int id = 0;
                    for (EQTL eQtl : qtls) {

                        String newInfo;
                        if (annotationIdList[masterId].equalsIgnoreCase("probe")) {
                            newInfo = annotationFromSource.get(eQtl.getProbe());
                        } else {
                            newInfo = annotationFromSource.get(eQtl.getRsName());
                        }

                        if (newInfo != null) {
    //                        System.out.println(newInfo);
                            if (!addTo) {
                                extraAnnotation.add(newInfo);
                            } else {
                                extraAnnotation.set(id, extraAnnotation.get(id) + "\t" + newInfo);
                            }

                        } else {
                            System.out.println("Error, something wrong. Desired information is not pressent.");
                            
                            if (annotationIdList[masterId].equalsIgnoreCase("probe")) {
                                System.out.println(eQtl.getProbe());
                            } else {
                                System.out.println(eQtl.getRsName());
                            }
                            
//        System.exit(-1);
//                            System.out.println("-");
                            if (!addTo) {
                                extraAnnotation.add("-");
                            } else {
                                extraAnnotation.set(id, extraAnnotation.get(id) + "\t-");
                            }

                        }
                        id++;
                    }
                    if (!extraAnnotation.isEmpty()) {
                        addTo = true;
                    }
                    t.close();
                }
            }
            masterId++;
        }

        TextFile outWriter = new TextFile(out, TextFile.W);

        outWriter.writeln(QTLTextFile.header + extraHeaders);
        int id = 0;
        for (EQTL qtl : qtls) {
            outWriter.writeln(qtl.toString() + "\t" + extraAnnotation.get(id));
            id++;
        }
        outWriter.close();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy