All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.legacy.ParseSOFTFile Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
///*
// * To change this template, choose Tools | Templates
// * and open the template in the editor.
// */
//
//package umcg.genetica.legacy;
//
//import java.io.*;
//import java.util.*;
//import java.awt.image.*;
//import java.awt.*;
//import java.rmi.Remote;
//import java.util.regex.Pattern;
//import umcg.genetica.math.stats.*;
//import umcg.genetica.methylation.DeepCopy;
//
///**
// *
// * @author lude
// */
//public class ParseSOFTFile_lude {
//	
//	
//	private static Pattern SPLIT_ON_TAB = Pattern.compile("\\t");
//	private static Pattern SPLIT_ON_EQUALS = Pattern.compile(" = ");
//
//    public ParseSOFTFile_lude(){
//		
//	}
//	
//	public ParseSOFTFile_lude(String fileLocation, int numberOfSamples, int numberOfProbes) throws Exception {
//		
//		importSOFTFile(fileLocation, numberOfSamples, numberOfProbes);
//		
//		//performQC();
//
//        //performPCA();
//
//        //convertPCsToGeneLevel();
//
//        //parseDataPeterBram();
//
//        /*
//        performCoexpressionOnPCScoresFromPCAOverSamples();
//        
//        if (1==2) {
//            ExpressionDataset dataset = new ExpressionDataset("/Users/lude/Downloads/PCATCGABeta/PrincipalcomponentsTCGA+GPL8490Combined.txt");
//            dataset.transposeDataset();
//            for (int p=0; p<100; p++) {
//                for (int q=100; q<200; q++) {
//                    double corr = JSci.maths.ArrayMath.correlation(dataset.rawData[p], dataset.rawData[q]);
//                    double r2  = corr * corr;
//                    if (r2 > 0.35) {
//                        System.out.println(p + "\t" + q + "\t" + corr + "\t" + r2);
//                        for (int  s=0; s hashUniqueProbes = new HashMap();
//        ArrayList vecUniqueProbes = new ArrayList();
//        ArrayList vecUniqueSamples = new ArrayList();
//		
//        int sampleID = 0;
//		
//        try {
//			java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(fileLocation)));
//            String str = "";
//            while ((str = in.readLine()) != null) {
//                if (str.startsWith("^")) {
//					
//                    if(debug) System.out.println(str);
//					
//                    if (str.startsWith("^SAMPLE")) {
//                        String sampleName = SPLIT_ON_EQUALS.split(str)[1];
//                        vecUniqueSamples.add(str);
//                        while ((str = in.readLine()) != null) {
//                            //System.out.println(str);
//                            int nrProbesThisSample = 0;
//                            if (str.startsWith("!Sample_supplementary_file") && debug) {
//                                System.out.println(str);
//                            }
//                            if (str.startsWith("!Sample_characteristics_ch1") && debug) {
//                                if (str.toLowerCase().contains("male") || str.toLowerCase().contains("female") ) {
//                                    System.out.println(sampleName + "\t" + str);
//                                }
//                            }
//                            if (str.startsWith("!sample_table_begin")) {
//                                str = in.readLine();
//								
//                                if(debug) System.out.println(str);
//								
//                                String[] data = SPLIT_ON_TAB.split(str);
//								
//                                int probeHeaderColumn = -1;
//                                int valueHeaderColumn = -1;
//                                int intensityHeaderColumn = -1;
//								
//                                double[] valsValue = new double[numberOfProbes];
//								
//                                for (int d=0; d SelectionBasedOnSeriesId = new HashMap();
//
//        ExpressionDataset_lude dataset = new ExpressionDataset_lude(numberOfProbes, numberOfSamples);
//
//        HashMap hashUniqueProbes = new HashMap();
//        ArrayList uniqueProbes = new ArrayList();
//        ArrayList uniqueSamples = new ArrayList();
//		
//        int sampleID = 0;
//		
//        try {
//			java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(fileLocation)));
//            String str = "";
//            while ((str = in.readLine()) != null) {
//                if (str.startsWith("^")) {
//					
//                    if(debug) System.out.println(str);
//					
//                    if (str.startsWith("^SAMPLE")) {
//                        String sampleName = SPLIT_ON_EQUALS.split(str)[1];
//						String sampleSeriesId = "";
//                        
//                        while ((str = in.readLine()) != null) {
//                            //System.out.println(str);
//                            int nrProbesThisSample = 0;
//							
//							if (str.startsWith("!Sample_series_id")) {
//								if(debug)System.out.println(str);
//								sampleSeriesId = SPLIT_ON_EQUALS.split(str)[1];
//                            }
//							
//                            if (str.startsWith("!Sample_supplementary_file") && debug) {
//                                System.out.println(str);
//                            }
//							
//                            if (str.startsWith("!Sample_characteristics_ch1") && debug) {
//                                if (str.toLowerCase().contains("male") || str.toLowerCase().contains("female") ) {
//                                    System.out.println(sampleName + "\t" + str);
//                                }
//                            }
//							
//                            if (str.startsWith("!sample_table_begin")) {
//                                str = in.readLine();
//								
//                                if(debug) System.out.println(str);
//								
//                                String[] data = SPLIT_ON_TAB.split(str);
//								
//                                int probeHeaderColumn = -1;
//                                int valueHeaderColumn = -1;
//                                int intensityHeaderColumn = -1;
//								
//                                double[] valsValue = new double[numberOfProbes];
//								
//                                for (int d=0; d5)) {
//									if(debug) System.out.println(sampleSeriesId+"\t"+SelectionBasedOnSeriesId.get(sampleSeriesId));
//
//									uniqueSamples.add(sampleName);
//
//									for (int p=0; p0){		
//			for (int s=0; s hashSamplesPassingQC = new HashMap();
//		HashMap hashProbesPassingQC = new HashMap();
//		
//        for (int s=0; s=maxMissingProbesMissingPerSample) {
//                if(debug) System.out.println(s + "\t" + dataset.sampleNames[s] + "\t" + nrMissing);
//            } else {
//                if (!hashSamplesToExplicitlyExlcude.containsKey(dataset.sampleNames[s])) {
//                    hashSamplesPassingQC.put(dataset.sampleNames[s], true);
//                }
//            }
//        }
//		
//        dataset = dataset.RemoveProbesOrSamples(dataset, hashProbesPassingQC, hashSamplesPassingQC);
//		System.out.println("Number of samples removed: "+(originalNumberSamples-dataset.rawData[1].length));
//		
//        for (int p=0; p=maxMissingSamplesMissingAProbe) {
//				if(debug) System.out.println("Excluding probe:\t" + p + "\t" + dataset.probeNames[p] + "\t" + nrMissing);
//            } else {
//                hashProbesPassingQC.put(dataset.probeNames[p], null);
//            }
//        }
//		
//        dataset = dataset.RemoveProbesOrSamples(dataset, hashProbesPassingQC, hashSamplesPassingQC);
//		System.out.println("Number of probes removed: "+(originalNumberProbes-dataset.rawData.length));
//		
//		QuantileNormalization.QuantileNormAdressingNaValuesBeforeQN(dataset.rawData, true, false);
//		
//		
//		//OLD:
//        int[] nanPerProbe = new int[dataset.nrProbes];
//        int[] nanPerSample = new int[dataset.nrSamples];
//        for (int p=0; p vec1 = new ArrayList();
//            for (int p=0; p 0.01) {
//                    System.out.println(tc1 + "\t" + tc2 + "\t" + r2);
//                }
//                r2Sum+=r2;
//            }
//            System.out.println(tc1 + "\t" + r2Sum);
//        }
//        
//        System.exit(0);
//    }
//    
//    public void parseTCGAData() {
//        
//        String fileDirTCGAFiles = "/Users/lude/Downloads/774bde41-d002-447f-805b-0da8d94b30fa/DNA_Methylation/JHU_USC__HumanMethylation27/Level_1/";
//        File file = new File(fileDirTCGAFiles);
//        File[] files = file.listFiles();
//        Vector vecFiles = new Vector();
//        for (int f=0; f10) {
//                    hashProbesToInclude.put("M-" + data[0], null);
//                }
//            }
//            in.close();
//        } catch (IOException e) {
//            e.printStackTrace();
//            System.out.println(e.getMessage());
//            System.exit(-1);
//        }        
//        
//        //ExpressionDataset dataset = new ExpressionDataset("/Users/lude/Downloads/PCATCGADataUv2/principalcomponents.txt");
//        ExpressionDataset_lude dataset = new ExpressionDataset_lude("/Users/lude/Downloads/PCATCGADataM+U/principalcomponents.txt", "\t", hashProbesToInclude, null);
//
//        
//        double[] gcContent1 = new double[dataset.nrProbes];
//        double[] gcContent2 = new double[dataset.nrProbes];
//        double[] gcContent3 = new double[dataset.nrProbes];
//        double[] gcContent4 = new double[dataset.nrProbes];
//        try {
//            java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(fileProbeAnnotation)));
//            String str = "";
//            while ((str = in.readLine()) != null) {
//                String[] data = str.split(",");
//                if (data.length>10) {
//                    int probeIndex = ((Integer) dataset.hashProbes.get("M-" + data[0])).intValue();
//                    if (1==1) {
//                        byte[] bytes = data[4].getBytes();
//                        for (int b=0; b 100) {
//                System.out.println(p + "\t" + dataset.probeNames[p] + "\t" + mean);
//            }
//        }
//
//        dataset.standardNormalizeData();
//        dataset.transposeDataset();
//        dataset.standardNormalizeData();
//        //dataset.transposeDataset();
//        
//        File fileDatasetEVs = new File(dataset.fileName + ".PCAOverSamplesEigenvectorsAfterQNProbesCenteredScaled.txt");
//        if (!fileDatasetEVs.exists()) {
//            int sampleCountMinusOne = dataset.nrSamples - 1;
//            double[][] corr = new double[dataset.nrProbes][dataset.nrProbes];
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                for (int g = f + 1; g < dataset.nrProbes; g++) {
//                    double covarianceInterim = 0;
//                    for (int s = 0; s < dataset.nrSamples; s++) {
//                        covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
//                    }
//                    corr[f][g] = covarianceInterim / (double) (sampleCountMinusOne);
//                    corr[g][f] = corr[f][g];
//                }
//                corr[f][f] = 1;
//                System.out.println(f);
//            }
//
//            System.out.println("EVD decomposition ongoing:");
//            Jama.EigenvalueDecomposition eig = eigenValueDecomposition(corr);
//            double[] eigenValues = eig.getRealEigenvalues();
//
//            double[][] eigenVectors = new double[corr.length][corr.length];
//            ExpressionDataset_lude datasetEVs = new ExpressionDataset_lude(corr.length, corr.length);
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                datasetEVs.sampleNames[f] = dataset.probeNames[f];
//            }
//            for (int pca=0; pca 3) {
//                        annotation = data[3];
//                        if (!hashUniqueGenes.containsKey(annotation)) {
//                            vecUniqueGenes.add(annotation);
//                            hashUniqueGenes.put(annotation, data[0]);
//                        } else {
//                            hashUniqueGenes.put(annotation, (String) hashUniqueGenes.get(annotation) + "," + data[0]);
//                        }
//                    }
//                    hashAnnotation.put(data[0], annotation);
//                }
//            }
//        } catch (IOException e) {
//            e.printStackTrace();
//            System.out.println(e.getMessage());
//            System.exit(-1);
//        }
//        System.out.println("Number of unique genes:\t" + vecUniqueGenes.size());
//
//        
//        /*
//        ExpressionDataset dataset = new ExpressionDataset("/Users/lude/Documents/DMG/Data/GPL8490/GPL8490_family.soft.intensities.txt.binary.qced.binary");
//        dataset.transposeDataset();
//        dataset.standardNormalizeData();
//
//        
//        File fileDatasetEVs = new File(dataset.fileName + ".PCAOverSamplesEigenvectors.txt");
//        if (!fileDatasetEVs.exists()) {
//            int sampleCountMinusOne = dataset.nrSamples - 1;
//            double[][] corr = new double[dataset.nrProbes][dataset.nrProbes];
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                for (int g = f + 1; g < dataset.nrProbes; g++) {
//                    double covarianceInterim = 0;
//                    for (int s = 0; s < dataset.nrSamples; s++) {
//                        covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
//                    }
//                    corr[f][g] = covarianceInterim / (double) (sampleCountMinusOne);
//                    corr[g][f] = corr[f][g];
//                }
//                corr[f][f] = 1;
//                System.out.println(f);
//            }
//
//            System.out.println("EVD decomposition ongoing:");
//            Jama.EigenvalueDecomposition eig = eigenValueDecomposition(corr);
//            double[] eigenValues = eig.getRealEigenvalues();
//
//            double[][] eigenVectors = new double[corr.length][corr.length];
//            ExpressionDataset datasetEVs = new ExpressionDataset(corr.length, corr.length);
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                datasetEVs.sampleNames[f] = dataset.probeNames[f];
//            }
//            for (int pca=0; pca 3) {
//                    annotation = data[3];
//                }
//                hashAnnotation.put(data[0], annotation);
//            }
//        } catch (IOException e) {
//            e.printStackTrace();
//            System.out.println(e.getMessage());
//            System.exit(-1);
//        }
//
//
//        String outputFile = "/Users/lude/Documents/DMG/Data/GPL8490/Plots/";
//        dataset.standardNormalizeData();
//        int sampleCountMinusOne = dataset.nrSamples - 1;
//        for (int f = 0; f < dataset.nrProbes; f++) {
//
//            String annotation = (String) hashAnnotation.get(dataset.probeNames[f]);
//            //if (annotation.contains("F13A1")) {
//                for (int g = f + 1; g < dataset.nrProbes; g++) {
//                //for (int g = 0; g < dataset.nrProbes; g++) {
//                    //Calculate correlation:
//                    double covarianceInterim = 0;
//                    for (int s = 0; s < dataset.nrSamples; s++) {
//                        covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
//                    }
//                    double covariance = covarianceInterim / (double) (sampleCountMinusOne);
//                    double correlation = covariance;
//                    if (correlation > 0.90) {
//                        System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
//                    }
//                    if (correlation > 0.90) {
//                        //System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
//
//                        int width = 500 + 200;
//                        int height = 500 + 200;
//                        int marginLeft = 100; int marginRight = 100; int marginTop = 100; int marginBottom = 100;
//                        double innerWidth = width - marginLeft - marginRight;
//                        double innerHeight = height - marginBottom - marginTop;
//                        double x0 = marginLeft;
//                        double x1 = x0 + innerWidth;
//                        double y0 = marginTop;
//                        double y1 = y0 + innerHeight;
//                        double centerX = (x1 + x0) / 2;
//                        double centerY = (y1 + y0) / 2;
//
//                        BufferedImage bimage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
//                        Graphics2D g2d = bimage.createGraphics();
//                        g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
//                        g2d.setColor(new Color(255, 255, 255));
//                        g2d.fillRect(0,0, width, height);
//
//                        g2d.setColor(new Color(0, 0, 0));
//                        g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
//                        double minF = JSci.maths.ArrayMath.min(dataset.rawData[f]);
//                        double maxF = JSci.maths.ArrayMath.max(dataset.rawData[f]);
//                        double minG = JSci.maths.ArrayMath.min(dataset.rawData[g]);
//                        double maxG = JSci.maths.ArrayMath.max(dataset.rawData[g]);
//                        for (int s=0; s 3) {
//                    annotation = data[3];
//                }
//                hashAnnotation.put(data[0], annotation);
//            }
//        } catch (IOException e) {
//            e.printStackTrace();
//            System.out.println(e.getMessage());
//            System.exit(-1);
//        }
//
//
//        String outputFile = "/Users/lude/Documents/DMG/Data/GPL8490/PCAOver4432SamplesProbesCenteredScaled/Plots/";
//        dataset.transposeDataset();
//        dataset.standardNormalizeData();
//        dataset.transposeDataset();
//        dataset.standardNormalizeData();
//        int sampleCountMinusOne = dataset.nrSamples - 1;
//
//        int[] count = new int[dataset.nrProbes];
//        
//        
//        if (1==2) {
//            int[] degree = new int[dataset.nrProbes];
//
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                for (int g = f + 1; g < dataset.nrProbes; g++) {
//                    if (f!=g) {
//                        //Calculate correlation:
//                        double covarianceInterim = 0;
//                        for (int s = 0; s < dataset.nrSamples; s++) {
//                            covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
//                        }
//                        double covariance = covarianceInterim / (double) (sampleCountMinusOne);
//                        double correlation = covariance;
//                        if (correlation > 0.4) {
//                            degree[f]++;
//                            degree[g]++;
//                        }
//                    }
//                }
//                if (f%100==0) System.out.println(f);
//            }
//
//            int[] degreeDist = new int[dataset.nrProbes];
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                degreeDist[degree[f]]++;
//            }
//            System.out.println("Degree distribution:");
//            for (int f = 0; f < dataset.nrProbes; f++) {
//                if (degreeDist[f]>0) {
//                    System.out.println(f + "\t" + degreeDist[f]);
//                }
//            }
//            System.out.println("");
//        }        
//        
//        //String[] alsGenes = {"SOD1","ALS2","SETX","FUS","VAPB","ANG","TARDBP","OPTN","VCP","APEX1","ATXN2","CHMP2B","NEFH","SMN1","SMN2","PRPH","VEGFA","UNC13A","C9orf72","NIPA1","PFN1"};
//        //String[] alsGenes = {"RGS1","REL","AHSA2","IL18RAP","IL18R1","IL1RL1","IL1RL2","ITGA4","UBE2E3","CTLA4","ICOS","CD28","CCR1","CCR2","CCRL2","CCR3","CCR5","CCR9","IL12A","LPP","IL2","IL21","HLA-DQA1","HLA-DQB1","TNFAIP3","TAGAP","SH2B3","PTPN2","TNFRSF14","MMEL1","RUNX3","PLEK","CCR4","CD80","KTELC1","BACH2","MAP3K7","PTPRK","THEMIS","ZMIZ1","ETS1","CIITA","SOCS1","CLEC16A","ICOSLG","PARK7","TNFRSF9","NFIA","CD247","FASLG","TNFSF18","TNFSF4","FRMD4B","IRF4","ELMO1","ZFP36L1","UBE2L3","YDJC","TLR7","TLR8"};
//        //String[] alsGenes = {"RGS1","REL","AHSA2","IL18RAP","IL18R1","IL1RL1","IL1RL2","ITGA4","UBE2E3","CTLA4","ICOS","CD28","CCR1","CCR2","CCRL2","CCR3","CCR5","CCR9","IL12A","LPP","IL2","IL21","HLA-DQA1","HLA-DQB1","TNFAIP3","TAGAP","SH2B3","PTPN2","TNFRSF14","MMEL1","RUNX3","PLEK","CCR4","CD80","KTELC1","BACH2","MAP3K7","PTPRK","THEMIS","ZMIZ1","ETS1","CIITA","SOCS1","CLEC16A","ICOSLG","PARK7","TNFRSF9","NFIA","CD247","FASLG","TNFSF18","TNFSF4","FRMD4B","IRF4","ELMO1","ZFP36L1","UBE2L3","YDJC","TLR7","TLR8"};
//        //String[] alsGenes = {"FTO", "TMEM18", "MC4R", "GNPDA2", "BDNF", "NEGR1", "SH2B1", "ETV5", "MTCH2", "KCTD15", "identified", "SEC16B", "TFAP2B", "FAIM2", "NRXN3", "identified", "RBJ", "GPRC5B", "MAP2K5", "QPCTL", "TNNI3K", "SLC39A8", "FLJ35779", "LRRN6C", "TMEM160", "FANCL", "CADM2", "PRKD1", "LRP1B", "PTBP2", "MTIF3", "ZNF608", "RPL27A", "NUDT3", "APOB48R", "SULT1A2", "AC138894.2", "ATXN2L", "TUFM", "NDUFS3", "CUGBP1", "SEC16B", "TFAP2B", "FAIM2", "NRXN3", "RBJ", "GPRC5B", "MAP2K5", "QPCTL", "TNNI3K", "SLC39A8", "FLJ35779", "LRRN6C", "TMEM160", "FANCL", "CADM2", "PRKD1", "LRP1B", "PTBP2", "MTIF3", "ZNF608", "RPL27A", "NUDT3", "ADCY3", "POMC", "IQCK", "LBXCOR1", "GIPR", "HMGCR", "ZC3H4", "GTF3A", "TUB", "HMGA1"};
//        String[] alsGenes = {"LACTB", "TP53"};
//        //String[] alsGenes = {"OAS1", "TAGAP"};
//        
//        
//        for (int f = 0; f < dataset.nrProbes; f++) {
//
//            String annotation = (String) hashAnnotation.get(dataset.probeNames[f]);
//            boolean include = false;
//            //if (annotation.toUpperCase().startsWith("TARDBP".toUpperCase())) {
//            //    include = true;
//            //}
//            for (int a=0; a 0.3) {
//                            System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
//                            count[g]++;
//                        }
//                        if (Math.abs(correlation) > 10.4) {
//                            //System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
//
//                            int width = 500 + 200;
//                            int height = 500 + 200;
//                            int marginLeft = 100; int marginRight = 100; int marginTop = 100; int marginBottom = 100;
//                            double innerWidth = width - marginLeft - marginRight;
//                            double innerHeight = height - marginBottom - marginTop;
//                            double x0 = marginLeft;
//                            double x1 = x0 + innerWidth;
//                            double y0 = marginTop;
//                            double y1 = y0 + innerHeight;
//                            double centerX = (x1 + x0) / 2;
//                            double centerY = (y1 + y0) / 2;
//
//                            BufferedImage bimage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
//                            Graphics2D g2d = bimage.createGraphics();
//                            g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
//                            g2d.setColor(new Color(255, 255, 255));
//                            g2d.fillRect(0,0, width, height);
//
//                            g2d.setColor(new Color(0, 0, 0));
//                            g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
//                            double minF = JSci.maths.ArrayMath.min(dataset.rawData[f]);
//                            double maxF = JSci.maths.ArrayMath.max(dataset.rawData[f]);
//                            double minG = JSci.maths.ArrayMath.min(dataset.rawData[g]);
//                            double maxG = JSci.maths.ArrayMath.max(dataset.rawData[g]);
//                            for (int s=0; s1) {
//                System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + count[f]);
//            }
//        }
//
//        System.exit(0);
//    }
//
//    public void comparePCScoresWithGPL570TCs() {
//
//        String sortedEigenvectorFile         = "/Users/lude/Documents/DMG/Data/HumanUCSC/PCAOverAllProbesGPL570/eigenvectors.txt.binary";
//        String probeAnnotationFile = "/Users/lude/Documents/DMG/Data/AffymetrixAnnotation2010-08-23/HG-U133_Plus_2/HG-U133_Plus_2.na31.annot.csv";
//
//        HashMap hashProbeToChr = new HashMap();
//        HashMap hashProbeToHGNC = new HashMap();
//        HashMap hashHGNCToProbe = new HashMap();
//        try {
//            java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(probeAnnotationFile)));
//            String str = "";
//            while ((str = in.readLine()) != null) {
//                if (str.startsWith("\"Probe Set ID")) break;
//            }
//            while ((str = in.readLine()) != null) {
//                String[] data = str.trim().split("\",\"");
//                String mapping = data[12].replace("//", "\t");
//                String[] mappings = mapping.split("\t");
//                if (mappings.length==3) {
//                    String[] mappingsSplit = mappings[0].split(":");
//                    String chr = mappingsSplit[0];
//                    int chrInt = -1;
//                    try {
//                        chrInt = Integer.parseInt(chr.replace("chr", ""));
//                    } catch (Exception e) {
//                        if (chr.equals("chrX")) chrInt = 23;
//                        if (chr.equals("chrY")) chrInt = 24;
//                    }
//                    if (chrInt!=-1) {
//                        hashProbeToChr.put(data[0].substring(1), chrInt);
//                    }
//                }
//                String hgnc = data[14].trim(); if (hgnc.equals("---")) hgnc = "";
//                if (hgnc.length()>0) {
//                    hashProbeToHGNC.put(data[0].substring(1), hgnc.trim().replace(" ", ""));
//                    if (!hashHGNCToProbe.containsKey(hgnc)) {
//                        hashHGNCToProbe.put(hgnc, data[0].substring(1));
//                    } else {
//                        hashHGNCToProbe.put(hgnc, (String) hashHGNCToProbe.get(hgnc) + "," + data[0].substring(1));
//                    }
//                }
//            }
//        } catch (Exception e) {
//            System.out.println("Error:\t" + e.getMessage());
//            e.printStackTrace();
//        }
//
//        int nrTCs = 100;
//        HashMap hashTCsToIncludeHuman = new HashMap();
//        for (int tc=0; tc 3) {
//                    annotation = data[3];
//                }
//                hashAnnotation.put(data[0], annotation);
//            }
//        } catch (IOException e) {
//            e.printStackTrace();
//            System.out.println(e.getMessage());
//            System.exit(-1);
//        }
//
//
//        HashMap hash27KToAffy = new HashMap();
//        HashMap hashAffyTo27K = new HashMap();
//        HashMap hash27KChr = new HashMap();
//        for (int p=0; p 0.01) {
//                    //System.out.println(tc + "\t" + tc2 + "\t" + correlation + "\t" + r2);
//                }
//                r2Sum+=r2;
//            }
//            System.out.println(tc2 + "\t" + r2Sum);
//        }
//
//        System.out.println("");
//
//        dataset.transposeDataset();
//        datasetEVs.transposeDataset();
//
//        dataset.standardNormalizeData();
//        datasetEVs.standardNormalizeData();
//
//
//        int width = 400 + 200 + 500;
//        int height = 400 + 200;
//        int marginLeft = 100; int marginRight = 600; int marginTop = 100; int marginBottom = 100;
//        double innerWidth = width - marginLeft - marginRight;
//        double innerHeight = height - marginBottom - marginTop;
//        double x0 = marginLeft;
//        double x1 = x0 + innerWidth;
//        double y0 = marginTop;
//        double y1 = y0 + innerHeight;
//        double centerX = (x1 + x0) / 2;
//        double centerY = (y1 + y0) / 2;
//
//        BufferedImage bimage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
//        Graphics2D g2d = bimage.createGraphics();
//        g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
//        g2d.setColor(new Color(255, 255, 255));
//        g2d.fillRect(0,0, width, height);
//
//        g2d.setColor(new Color(0, 0, 0));
//        g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
//
//        long[][] corrDist = new long[201][201];
//        int nrSamples = dataset.nrSamples;
//        int nrSamplesGPL570 = datasetEVs.nrSamples;
//        double sampleCountMinusOne = dataset.nrSamples - 1;
//        double sampleCountMinusOneGPL570 = datasetEVs.nrSamples - 1;
//
//        for (int p=0; p200) correlationGPL570Int = 200;
//                    corrDist[correlationInt][correlationGPL570Int]++;
//                }
//            }
//            if (p%100==0) System.out.println(p);
//        }
//
//        long[] dist = new long[201];
//        long[] distGPL570 = new long[201];
//        long sum = 0;
//        for (int d=0; d<201; d++) {
//            for (int e=0; e<201; e++) {
//                long value = corrDist[d][e];
//                dist[d]+=value;
//                distGPL570[e]+=value;
//                sum+=value;
//                if (value > 0) {
//                    double scale = Math.log10((double) value + 1d);
//                    if (scale > 5) {
//                        scale = 5;
//                    }
//                    int colorInt = 255 - (int) Math.round(scale * 51d);
//                    //System.out.println(value + "\t" + scale + "\t" + colorInt);
//                    g2d.setColor(new Color(colorInt, colorInt, colorInt));
//                    g2d.fillRect((int) x0 + d * 2, (int) y1 - e * 2, 2, 2);
//                }
//            }
//        }
//        
//        long[][] corrDistExp = new long[201][201];
//        boolean[][] corrDistSign = new boolean[201][201];
//        for (int d=0; d<201; d++) {
//            for (int e=0; e<201; e++) {
//                corrDistExp[d][e] = (long) ((double) dist[d] * (double) (distGPL570[e] / (double) sum));
//                long value = corrDist[d][e];
//
//                if (value > 0) {
//                    double scale = Math.log10((double) value);
//                    if (scale > 5) {
//                        scale = 5;
//                    }
//                    int colorInt = 255 - (int) Math.round(scale * 51d);
//                    g2d.setColor(new Color(colorInt, colorInt, colorInt));
//                    g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
//                    g2d.fillRect((int) x0 + d * 2 + 500, (int) y1 - e * 2, 2, 2);
//                }
//
//                double obsDivExp = (double) corrDist[d][e] / (double) corrDistExp[d][e];
//                if (Double.isInfinite(obsDivExp)) obsDivExp = 15;
//
//                double colorRange = 0;
//                if (obsDivExp > 1) {
//                    colorRange = Math.log(obsDivExp) / 2;
//                    if (colorRange > 1d) colorRange = 1d;
//                    if (obsDivExp > 2) {
//                        corrDistSign[d][e] = true;
//                        System.out.println(d + "\t" + e + "\t" + corrDist[d][e] + "\t" + corrDistExp[d][e] + "\t" + obsDivExp);
//                    }
//                    float hue = (float) colorRange;
//                    g2d.setColor(g2d.getColor().getHSBColor(hue, 1.0f, 1.0f));
//                    g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, (float) colorRange));
//                    g2d.fillRect((int) x0 + d * 2 + 500, (int) y1 - e * 2, 2, 2);
//                }
//
//                
//            }
//        }
//
//        g2d.setColor(new Color(0, 0, 0));
//        g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.25f));
//        g2d.drawLine((int) x0, (int) centerY, (int) x1, (int) centerY);
//        g2d.drawLine((int) centerX, (int) y0, (int) centerX, (int) y1);
//        g2d.drawLine((int) x0 + 500, (int) centerY, (int) x1 + 500, (int) centerY);
//        g2d.drawLine((int) centerX + 500, (int) y0, (int) centerX + 500, (int) y1);
//
//        try {
//            //javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Documents/DMG/Data/GPL8490/PCAOverSamplesProbesCenteredScaled/Plots/CorrelationGPL8490-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
//            //javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Documents/DMG/Data/GPL8490/PCAOver4432SamplesProbesForcedNormalDistribution/Plots/CorrelationGPL8490-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
//            //javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Downloads/PCATCGABeta/CorrelationTCGA-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
//            javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Downloads/PCATCGABeta/PCAOverTCGA+GPL8490Combined/CorrelationTCGA+GPL8490-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
//        } catch (IOException e) {
//            System.out.println(e.getMessage());
//            e.printStackTrace();
//        }
//        
//
//        for (int p=0; p 0.25) {
//                                System.out.println(p + "\t" + dataset.probeNames[p] + "\t" + datasetEVs.probeNames[p] + "\t" + (String) hashProbeToHGNC.get(datasetEVs.probeNames[p]) + "\t" + q + "\t" + dataset.probeNames[q] + "\t" + datasetEVs.probeNames[q] + "\t" + (String) hashProbeToHGNC.get(datasetEVs.probeNames[q]) + "\t" + correlation + "\t" + correlationGPL570);
//                            //}
//                        }
//                    }
//                }
//            }
//            if (p%100==0) System.out.println(p);
//        }
//
//
//
//        System.exit(0);
//    }
//
//    private Jama.EigenvalueDecomposition eigenValueDecomposition(double[][] data) {
//        Jama.Matrix m = new Jama.Matrix(data);
//        Jama.EigenvalueDecomposition eig = m.eig();
//        return eig;
//    }
//
//    private double[] getEigenVector(Jama.EigenvalueDecomposition eig, double[] eigenValues, int pca) {
//        Jama.Matrix eigenValueMatrix = eig.getV();
//        double[][] eigenValueMat = eigenValueMatrix.getArray();
//        double[] eigenVector = new double[eigenValueMat.length];
//        for (int i = 0; i < eigenValueMat.length; i++) {
//            eigenVector[i] = eigenValueMat[i][eigenValueMat.length - 1 - pca]; // * Math.sqrt(eigenValues[eigenValues.length - 1 - pca]);
//        }
//        return eigenVector;
//    }
//
//    private double[] getEigenVector(Jama.EigenvalueDecomposition eig, int pca) {
//        Jama.Matrix eigenValueMatrix = eig.getV();
//        double[][] eigenValueMat = eigenValueMatrix.getArray();
//        double[] eigenVector = new double[eigenValueMat.length];
//        for (int i = 0; i < eigenValueMat.length; i++) {
//            eigenVector[i] = eigenValueMat[i][eigenValueMat.length - 1 - pca]; // * Math.sqrt(eigenValues[eigenValues.length - 1 - pca]);
//        }
//        return eigenVector;
//    }
//
//    private double getEigenValueVar(double[] eigenValues, int pca) {
//        double sumEigenvalues = 0.0;
//        for (Double d : eigenValues) {
//            sumEigenvalues += Math.abs(d);
//        }
//        double result = eigenValues[eigenValues.length - 1 - pca] / sumEigenvalues;
//        return result;
//    }
//
//    private double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, double[] singularValues, int pca) {
//        Jama.Matrix eigenValueMatrix = svd.getV();
//        double[][] eigenValueMat = eigenValueMatrix.getArray();
//        double[] eigenVector = new double[eigenValueMat.length];
//        for (int i = 0; i < eigenValueMat.length; i++) {
//            eigenVector[i] = eigenValueMat[i][pca] * Math.sqrt(singularValues[pca]);
//        }
//        return eigenVector;
//    }
//
//	//    public void compareSexPhenotype() {
////        
////        
////        int nrTCs = 100;
////        HashMap hashTCsToIncludeHuman = new HashMap();
////        for (int tc=0; tc




© 2015 - 2025 Weber Informatics LLC | Privacy Policy