
umcg.genetica.legacy.ParseSOFTFile Maven / Gradle / Ivy
///*
// * To change this template, choose Tools | Templates
// * and open the template in the editor.
// */
//
//package umcg.genetica.legacy;
//
//import java.io.*;
//import java.util.*;
//import java.awt.image.*;
//import java.awt.*;
//import java.rmi.Remote;
//import java.util.regex.Pattern;
//import umcg.genetica.math.stats.*;
//import umcg.genetica.methylation.DeepCopy;
//
///**
// *
// * @author lude
// */
//public class ParseSOFTFile_lude {
//
//
// private static Pattern SPLIT_ON_TAB = Pattern.compile("\\t");
// private static Pattern SPLIT_ON_EQUALS = Pattern.compile(" = ");
//
// public ParseSOFTFile_lude(){
//
// }
//
// public ParseSOFTFile_lude(String fileLocation, int numberOfSamples, int numberOfProbes) throws Exception {
//
// importSOFTFile(fileLocation, numberOfSamples, numberOfProbes);
//
// //performQC();
//
// //performPCA();
//
// //convertPCsToGeneLevel();
//
// //parseDataPeterBram();
//
// /*
// performCoexpressionOnPCScoresFromPCAOverSamples();
//
// if (1==2) {
// ExpressionDataset dataset = new ExpressionDataset("/Users/lude/Downloads/PCATCGABeta/PrincipalcomponentsTCGA+GPL8490Combined.txt");
// dataset.transposeDataset();
// for (int p=0; p<100; p++) {
// for (int q=100; q<200; q++) {
// double corr = JSci.maths.ArrayMath.correlation(dataset.rawData[p], dataset.rawData[q]);
// double r2 = corr * corr;
// if (r2 > 0.35) {
// System.out.println(p + "\t" + q + "\t" + corr + "\t" + r2);
// for (int s=0; s hashUniqueProbes = new HashMap();
// ArrayList vecUniqueProbes = new ArrayList();
// ArrayList vecUniqueSamples = new ArrayList();
//
// int sampleID = 0;
//
// try {
// java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(fileLocation)));
// String str = "";
// while ((str = in.readLine()) != null) {
// if (str.startsWith("^")) {
//
// if(debug) System.out.println(str);
//
// if (str.startsWith("^SAMPLE")) {
// String sampleName = SPLIT_ON_EQUALS.split(str)[1];
// vecUniqueSamples.add(str);
// while ((str = in.readLine()) != null) {
// //System.out.println(str);
// int nrProbesThisSample = 0;
// if (str.startsWith("!Sample_supplementary_file") && debug) {
// System.out.println(str);
// }
// if (str.startsWith("!Sample_characteristics_ch1") && debug) {
// if (str.toLowerCase().contains("male") || str.toLowerCase().contains("female") ) {
// System.out.println(sampleName + "\t" + str);
// }
// }
// if (str.startsWith("!sample_table_begin")) {
// str = in.readLine();
//
// if(debug) System.out.println(str);
//
// String[] data = SPLIT_ON_TAB.split(str);
//
// int probeHeaderColumn = -1;
// int valueHeaderColumn = -1;
// int intensityHeaderColumn = -1;
//
// double[] valsValue = new double[numberOfProbes];
//
// for (int d=0; d SelectionBasedOnSeriesId = new HashMap();
//
// ExpressionDataset_lude dataset = new ExpressionDataset_lude(numberOfProbes, numberOfSamples);
//
// HashMap hashUniqueProbes = new HashMap();
// ArrayList uniqueProbes = new ArrayList();
// ArrayList uniqueSamples = new ArrayList();
//
// int sampleID = 0;
//
// try {
// java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(fileLocation)));
// String str = "";
// while ((str = in.readLine()) != null) {
// if (str.startsWith("^")) {
//
// if(debug) System.out.println(str);
//
// if (str.startsWith("^SAMPLE")) {
// String sampleName = SPLIT_ON_EQUALS.split(str)[1];
// String sampleSeriesId = "";
//
// while ((str = in.readLine()) != null) {
// //System.out.println(str);
// int nrProbesThisSample = 0;
//
// if (str.startsWith("!Sample_series_id")) {
// if(debug)System.out.println(str);
// sampleSeriesId = SPLIT_ON_EQUALS.split(str)[1];
// }
//
// if (str.startsWith("!Sample_supplementary_file") && debug) {
// System.out.println(str);
// }
//
// if (str.startsWith("!Sample_characteristics_ch1") && debug) {
// if (str.toLowerCase().contains("male") || str.toLowerCase().contains("female") ) {
// System.out.println(sampleName + "\t" + str);
// }
// }
//
// if (str.startsWith("!sample_table_begin")) {
// str = in.readLine();
//
// if(debug) System.out.println(str);
//
// String[] data = SPLIT_ON_TAB.split(str);
//
// int probeHeaderColumn = -1;
// int valueHeaderColumn = -1;
// int intensityHeaderColumn = -1;
//
// double[] valsValue = new double[numberOfProbes];
//
// for (int d=0; d5)) {
// if(debug) System.out.println(sampleSeriesId+"\t"+SelectionBasedOnSeriesId.get(sampleSeriesId));
//
// uniqueSamples.add(sampleName);
//
// for (int p=0; p0){
// for (int s=0; s hashSamplesPassingQC = new HashMap();
// HashMap hashProbesPassingQC = new HashMap();
//
// for (int s=0; s=maxMissingProbesMissingPerSample) {
// if(debug) System.out.println(s + "\t" + dataset.sampleNames[s] + "\t" + nrMissing);
// } else {
// if (!hashSamplesToExplicitlyExlcude.containsKey(dataset.sampleNames[s])) {
// hashSamplesPassingQC.put(dataset.sampleNames[s], true);
// }
// }
// }
//
// dataset = dataset.RemoveProbesOrSamples(dataset, hashProbesPassingQC, hashSamplesPassingQC);
// System.out.println("Number of samples removed: "+(originalNumberSamples-dataset.rawData[1].length));
//
// for (int p=0; p=maxMissingSamplesMissingAProbe) {
// if(debug) System.out.println("Excluding probe:\t" + p + "\t" + dataset.probeNames[p] + "\t" + nrMissing);
// } else {
// hashProbesPassingQC.put(dataset.probeNames[p], null);
// }
// }
//
// dataset = dataset.RemoveProbesOrSamples(dataset, hashProbesPassingQC, hashSamplesPassingQC);
// System.out.println("Number of probes removed: "+(originalNumberProbes-dataset.rawData.length));
//
// QuantileNormalization.QuantileNormAdressingNaValuesBeforeQN(dataset.rawData, true, false);
//
//
// //OLD:
// int[] nanPerProbe = new int[dataset.nrProbes];
// int[] nanPerSample = new int[dataset.nrSamples];
// for (int p=0; p vec1 = new ArrayList();
// for (int p=0; p 0.01) {
// System.out.println(tc1 + "\t" + tc2 + "\t" + r2);
// }
// r2Sum+=r2;
// }
// System.out.println(tc1 + "\t" + r2Sum);
// }
//
// System.exit(0);
// }
//
// public void parseTCGAData() {
//
// String fileDirTCGAFiles = "/Users/lude/Downloads/774bde41-d002-447f-805b-0da8d94b30fa/DNA_Methylation/JHU_USC__HumanMethylation27/Level_1/";
// File file = new File(fileDirTCGAFiles);
// File[] files = file.listFiles();
// Vector vecFiles = new Vector();
// for (int f=0; f10) {
// hashProbesToInclude.put("M-" + data[0], null);
// }
// }
// in.close();
// } catch (IOException e) {
// e.printStackTrace();
// System.out.println(e.getMessage());
// System.exit(-1);
// }
//
// //ExpressionDataset dataset = new ExpressionDataset("/Users/lude/Downloads/PCATCGADataUv2/principalcomponents.txt");
// ExpressionDataset_lude dataset = new ExpressionDataset_lude("/Users/lude/Downloads/PCATCGADataM+U/principalcomponents.txt", "\t", hashProbesToInclude, null);
//
//
// double[] gcContent1 = new double[dataset.nrProbes];
// double[] gcContent2 = new double[dataset.nrProbes];
// double[] gcContent3 = new double[dataset.nrProbes];
// double[] gcContent4 = new double[dataset.nrProbes];
// try {
// java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(fileProbeAnnotation)));
// String str = "";
// while ((str = in.readLine()) != null) {
// String[] data = str.split(",");
// if (data.length>10) {
// int probeIndex = ((Integer) dataset.hashProbes.get("M-" + data[0])).intValue();
// if (1==1) {
// byte[] bytes = data[4].getBytes();
// for (int b=0; b 100) {
// System.out.println(p + "\t" + dataset.probeNames[p] + "\t" + mean);
// }
// }
//
// dataset.standardNormalizeData();
// dataset.transposeDataset();
// dataset.standardNormalizeData();
// //dataset.transposeDataset();
//
// File fileDatasetEVs = new File(dataset.fileName + ".PCAOverSamplesEigenvectorsAfterQNProbesCenteredScaled.txt");
// if (!fileDatasetEVs.exists()) {
// int sampleCountMinusOne = dataset.nrSamples - 1;
// double[][] corr = new double[dataset.nrProbes][dataset.nrProbes];
// for (int f = 0; f < dataset.nrProbes; f++) {
// for (int g = f + 1; g < dataset.nrProbes; g++) {
// double covarianceInterim = 0;
// for (int s = 0; s < dataset.nrSamples; s++) {
// covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
// }
// corr[f][g] = covarianceInterim / (double) (sampleCountMinusOne);
// corr[g][f] = corr[f][g];
// }
// corr[f][f] = 1;
// System.out.println(f);
// }
//
// System.out.println("EVD decomposition ongoing:");
// Jama.EigenvalueDecomposition eig = eigenValueDecomposition(corr);
// double[] eigenValues = eig.getRealEigenvalues();
//
// double[][] eigenVectors = new double[corr.length][corr.length];
// ExpressionDataset_lude datasetEVs = new ExpressionDataset_lude(corr.length, corr.length);
// for (int f = 0; f < dataset.nrProbes; f++) {
// datasetEVs.sampleNames[f] = dataset.probeNames[f];
// }
// for (int pca=0; pca 3) {
// annotation = data[3];
// if (!hashUniqueGenes.containsKey(annotation)) {
// vecUniqueGenes.add(annotation);
// hashUniqueGenes.put(annotation, data[0]);
// } else {
// hashUniqueGenes.put(annotation, (String) hashUniqueGenes.get(annotation) + "," + data[0]);
// }
// }
// hashAnnotation.put(data[0], annotation);
// }
// }
// } catch (IOException e) {
// e.printStackTrace();
// System.out.println(e.getMessage());
// System.exit(-1);
// }
// System.out.println("Number of unique genes:\t" + vecUniqueGenes.size());
//
//
// /*
// ExpressionDataset dataset = new ExpressionDataset("/Users/lude/Documents/DMG/Data/GPL8490/GPL8490_family.soft.intensities.txt.binary.qced.binary");
// dataset.transposeDataset();
// dataset.standardNormalizeData();
//
//
// File fileDatasetEVs = new File(dataset.fileName + ".PCAOverSamplesEigenvectors.txt");
// if (!fileDatasetEVs.exists()) {
// int sampleCountMinusOne = dataset.nrSamples - 1;
// double[][] corr = new double[dataset.nrProbes][dataset.nrProbes];
// for (int f = 0; f < dataset.nrProbes; f++) {
// for (int g = f + 1; g < dataset.nrProbes; g++) {
// double covarianceInterim = 0;
// for (int s = 0; s < dataset.nrSamples; s++) {
// covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
// }
// corr[f][g] = covarianceInterim / (double) (sampleCountMinusOne);
// corr[g][f] = corr[f][g];
// }
// corr[f][f] = 1;
// System.out.println(f);
// }
//
// System.out.println("EVD decomposition ongoing:");
// Jama.EigenvalueDecomposition eig = eigenValueDecomposition(corr);
// double[] eigenValues = eig.getRealEigenvalues();
//
// double[][] eigenVectors = new double[corr.length][corr.length];
// ExpressionDataset datasetEVs = new ExpressionDataset(corr.length, corr.length);
// for (int f = 0; f < dataset.nrProbes; f++) {
// datasetEVs.sampleNames[f] = dataset.probeNames[f];
// }
// for (int pca=0; pca 3) {
// annotation = data[3];
// }
// hashAnnotation.put(data[0], annotation);
// }
// } catch (IOException e) {
// e.printStackTrace();
// System.out.println(e.getMessage());
// System.exit(-1);
// }
//
//
// String outputFile = "/Users/lude/Documents/DMG/Data/GPL8490/Plots/";
// dataset.standardNormalizeData();
// int sampleCountMinusOne = dataset.nrSamples - 1;
// for (int f = 0; f < dataset.nrProbes; f++) {
//
// String annotation = (String) hashAnnotation.get(dataset.probeNames[f]);
// //if (annotation.contains("F13A1")) {
// for (int g = f + 1; g < dataset.nrProbes; g++) {
// //for (int g = 0; g < dataset.nrProbes; g++) {
// //Calculate correlation:
// double covarianceInterim = 0;
// for (int s = 0; s < dataset.nrSamples; s++) {
// covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
// }
// double covariance = covarianceInterim / (double) (sampleCountMinusOne);
// double correlation = covariance;
// if (correlation > 0.90) {
// System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
// }
// if (correlation > 0.90) {
// //System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
//
// int width = 500 + 200;
// int height = 500 + 200;
// int marginLeft = 100; int marginRight = 100; int marginTop = 100; int marginBottom = 100;
// double innerWidth = width - marginLeft - marginRight;
// double innerHeight = height - marginBottom - marginTop;
// double x0 = marginLeft;
// double x1 = x0 + innerWidth;
// double y0 = marginTop;
// double y1 = y0 + innerHeight;
// double centerX = (x1 + x0) / 2;
// double centerY = (y1 + y0) / 2;
//
// BufferedImage bimage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
// Graphics2D g2d = bimage.createGraphics();
// g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
// g2d.setColor(new Color(255, 255, 255));
// g2d.fillRect(0,0, width, height);
//
// g2d.setColor(new Color(0, 0, 0));
// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
// double minF = JSci.maths.ArrayMath.min(dataset.rawData[f]);
// double maxF = JSci.maths.ArrayMath.max(dataset.rawData[f]);
// double minG = JSci.maths.ArrayMath.min(dataset.rawData[g]);
// double maxG = JSci.maths.ArrayMath.max(dataset.rawData[g]);
// for (int s=0; s 3) {
// annotation = data[3];
// }
// hashAnnotation.put(data[0], annotation);
// }
// } catch (IOException e) {
// e.printStackTrace();
// System.out.println(e.getMessage());
// System.exit(-1);
// }
//
//
// String outputFile = "/Users/lude/Documents/DMG/Data/GPL8490/PCAOver4432SamplesProbesCenteredScaled/Plots/";
// dataset.transposeDataset();
// dataset.standardNormalizeData();
// dataset.transposeDataset();
// dataset.standardNormalizeData();
// int sampleCountMinusOne = dataset.nrSamples - 1;
//
// int[] count = new int[dataset.nrProbes];
//
//
// if (1==2) {
// int[] degree = new int[dataset.nrProbes];
//
// for (int f = 0; f < dataset.nrProbes; f++) {
// for (int g = f + 1; g < dataset.nrProbes; g++) {
// if (f!=g) {
// //Calculate correlation:
// double covarianceInterim = 0;
// for (int s = 0; s < dataset.nrSamples; s++) {
// covarianceInterim += dataset.rawData[f][s] * dataset.rawData[g][s];
// }
// double covariance = covarianceInterim / (double) (sampleCountMinusOne);
// double correlation = covariance;
// if (correlation > 0.4) {
// degree[f]++;
// degree[g]++;
// }
// }
// }
// if (f%100==0) System.out.println(f);
// }
//
// int[] degreeDist = new int[dataset.nrProbes];
// for (int f = 0; f < dataset.nrProbes; f++) {
// degreeDist[degree[f]]++;
// }
// System.out.println("Degree distribution:");
// for (int f = 0; f < dataset.nrProbes; f++) {
// if (degreeDist[f]>0) {
// System.out.println(f + "\t" + degreeDist[f]);
// }
// }
// System.out.println("");
// }
//
// //String[] alsGenes = {"SOD1","ALS2","SETX","FUS","VAPB","ANG","TARDBP","OPTN","VCP","APEX1","ATXN2","CHMP2B","NEFH","SMN1","SMN2","PRPH","VEGFA","UNC13A","C9orf72","NIPA1","PFN1"};
// //String[] alsGenes = {"RGS1","REL","AHSA2","IL18RAP","IL18R1","IL1RL1","IL1RL2","ITGA4","UBE2E3","CTLA4","ICOS","CD28","CCR1","CCR2","CCRL2","CCR3","CCR5","CCR9","IL12A","LPP","IL2","IL21","HLA-DQA1","HLA-DQB1","TNFAIP3","TAGAP","SH2B3","PTPN2","TNFRSF14","MMEL1","RUNX3","PLEK","CCR4","CD80","KTELC1","BACH2","MAP3K7","PTPRK","THEMIS","ZMIZ1","ETS1","CIITA","SOCS1","CLEC16A","ICOSLG","PARK7","TNFRSF9","NFIA","CD247","FASLG","TNFSF18","TNFSF4","FRMD4B","IRF4","ELMO1","ZFP36L1","UBE2L3","YDJC","TLR7","TLR8"};
// //String[] alsGenes = {"RGS1","REL","AHSA2","IL18RAP","IL18R1","IL1RL1","IL1RL2","ITGA4","UBE2E3","CTLA4","ICOS","CD28","CCR1","CCR2","CCRL2","CCR3","CCR5","CCR9","IL12A","LPP","IL2","IL21","HLA-DQA1","HLA-DQB1","TNFAIP3","TAGAP","SH2B3","PTPN2","TNFRSF14","MMEL1","RUNX3","PLEK","CCR4","CD80","KTELC1","BACH2","MAP3K7","PTPRK","THEMIS","ZMIZ1","ETS1","CIITA","SOCS1","CLEC16A","ICOSLG","PARK7","TNFRSF9","NFIA","CD247","FASLG","TNFSF18","TNFSF4","FRMD4B","IRF4","ELMO1","ZFP36L1","UBE2L3","YDJC","TLR7","TLR8"};
// //String[] alsGenes = {"FTO", "TMEM18", "MC4R", "GNPDA2", "BDNF", "NEGR1", "SH2B1", "ETV5", "MTCH2", "KCTD15", "identified", "SEC16B", "TFAP2B", "FAIM2", "NRXN3", "identified", "RBJ", "GPRC5B", "MAP2K5", "QPCTL", "TNNI3K", "SLC39A8", "FLJ35779", "LRRN6C", "TMEM160", "FANCL", "CADM2", "PRKD1", "LRP1B", "PTBP2", "MTIF3", "ZNF608", "RPL27A", "NUDT3", "APOB48R", "SULT1A2", "AC138894.2", "ATXN2L", "TUFM", "NDUFS3", "CUGBP1", "SEC16B", "TFAP2B", "FAIM2", "NRXN3", "RBJ", "GPRC5B", "MAP2K5", "QPCTL", "TNNI3K", "SLC39A8", "FLJ35779", "LRRN6C", "TMEM160", "FANCL", "CADM2", "PRKD1", "LRP1B", "PTBP2", "MTIF3", "ZNF608", "RPL27A", "NUDT3", "ADCY3", "POMC", "IQCK", "LBXCOR1", "GIPR", "HMGCR", "ZC3H4", "GTF3A", "TUB", "HMGA1"};
// String[] alsGenes = {"LACTB", "TP53"};
// //String[] alsGenes = {"OAS1", "TAGAP"};
//
//
// for (int f = 0; f < dataset.nrProbes; f++) {
//
// String annotation = (String) hashAnnotation.get(dataset.probeNames[f]);
// boolean include = false;
// //if (annotation.toUpperCase().startsWith("TARDBP".toUpperCase())) {
// // include = true;
// //}
// for (int a=0; a 0.3) {
// System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
// count[g]++;
// }
// if (Math.abs(correlation) > 10.4) {
// //System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + dataset.probeNames[g] + "\t" + (String) hashAnnotation.get(dataset.probeNames[g]) + "\t" + correlation);
//
// int width = 500 + 200;
// int height = 500 + 200;
// int marginLeft = 100; int marginRight = 100; int marginTop = 100; int marginBottom = 100;
// double innerWidth = width - marginLeft - marginRight;
// double innerHeight = height - marginBottom - marginTop;
// double x0 = marginLeft;
// double x1 = x0 + innerWidth;
// double y0 = marginTop;
// double y1 = y0 + innerHeight;
// double centerX = (x1 + x0) / 2;
// double centerY = (y1 + y0) / 2;
//
// BufferedImage bimage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
// Graphics2D g2d = bimage.createGraphics();
// g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
// g2d.setColor(new Color(255, 255, 255));
// g2d.fillRect(0,0, width, height);
//
// g2d.setColor(new Color(0, 0, 0));
// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
// double minF = JSci.maths.ArrayMath.min(dataset.rawData[f]);
// double maxF = JSci.maths.ArrayMath.max(dataset.rawData[f]);
// double minG = JSci.maths.ArrayMath.min(dataset.rawData[g]);
// double maxG = JSci.maths.ArrayMath.max(dataset.rawData[g]);
// for (int s=0; s1) {
// System.out.println(dataset.probeNames[f] + "\t" + (String) hashAnnotation.get(dataset.probeNames[f]) + "\t" + count[f]);
// }
// }
//
// System.exit(0);
// }
//
// public void comparePCScoresWithGPL570TCs() {
//
// String sortedEigenvectorFile = "/Users/lude/Documents/DMG/Data/HumanUCSC/PCAOverAllProbesGPL570/eigenvectors.txt.binary";
// String probeAnnotationFile = "/Users/lude/Documents/DMG/Data/AffymetrixAnnotation2010-08-23/HG-U133_Plus_2/HG-U133_Plus_2.na31.annot.csv";
//
// HashMap hashProbeToChr = new HashMap();
// HashMap hashProbeToHGNC = new HashMap();
// HashMap hashHGNCToProbe = new HashMap();
// try {
// java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(probeAnnotationFile)));
// String str = "";
// while ((str = in.readLine()) != null) {
// if (str.startsWith("\"Probe Set ID")) break;
// }
// while ((str = in.readLine()) != null) {
// String[] data = str.trim().split("\",\"");
// String mapping = data[12].replace("//", "\t");
// String[] mappings = mapping.split("\t");
// if (mappings.length==3) {
// String[] mappingsSplit = mappings[0].split(":");
// String chr = mappingsSplit[0];
// int chrInt = -1;
// try {
// chrInt = Integer.parseInt(chr.replace("chr", ""));
// } catch (Exception e) {
// if (chr.equals("chrX")) chrInt = 23;
// if (chr.equals("chrY")) chrInt = 24;
// }
// if (chrInt!=-1) {
// hashProbeToChr.put(data[0].substring(1), chrInt);
// }
// }
// String hgnc = data[14].trim(); if (hgnc.equals("---")) hgnc = "";
// if (hgnc.length()>0) {
// hashProbeToHGNC.put(data[0].substring(1), hgnc.trim().replace(" ", ""));
// if (!hashHGNCToProbe.containsKey(hgnc)) {
// hashHGNCToProbe.put(hgnc, data[0].substring(1));
// } else {
// hashHGNCToProbe.put(hgnc, (String) hashHGNCToProbe.get(hgnc) + "," + data[0].substring(1));
// }
// }
// }
// } catch (Exception e) {
// System.out.println("Error:\t" + e.getMessage());
// e.printStackTrace();
// }
//
// int nrTCs = 100;
// HashMap hashTCsToIncludeHuman = new HashMap();
// for (int tc=0; tc 3) {
// annotation = data[3];
// }
// hashAnnotation.put(data[0], annotation);
// }
// } catch (IOException e) {
// e.printStackTrace();
// System.out.println(e.getMessage());
// System.exit(-1);
// }
//
//
// HashMap hash27KToAffy = new HashMap();
// HashMap hashAffyTo27K = new HashMap();
// HashMap hash27KChr = new HashMap();
// for (int p=0; p 0.01) {
// //System.out.println(tc + "\t" + tc2 + "\t" + correlation + "\t" + r2);
// }
// r2Sum+=r2;
// }
// System.out.println(tc2 + "\t" + r2Sum);
// }
//
// System.out.println("");
//
// dataset.transposeDataset();
// datasetEVs.transposeDataset();
//
// dataset.standardNormalizeData();
// datasetEVs.standardNormalizeData();
//
//
// int width = 400 + 200 + 500;
// int height = 400 + 200;
// int marginLeft = 100; int marginRight = 600; int marginTop = 100; int marginBottom = 100;
// double innerWidth = width - marginLeft - marginRight;
// double innerHeight = height - marginBottom - marginTop;
// double x0 = marginLeft;
// double x1 = x0 + innerWidth;
// double y0 = marginTop;
// double y1 = y0 + innerHeight;
// double centerX = (x1 + x0) / 2;
// double centerY = (y1 + y0) / 2;
//
// BufferedImage bimage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
// Graphics2D g2d = bimage.createGraphics();
// g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
// g2d.setColor(new Color(255, 255, 255));
// g2d.fillRect(0,0, width, height);
//
// g2d.setColor(new Color(0, 0, 0));
// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
//
// long[][] corrDist = new long[201][201];
// int nrSamples = dataset.nrSamples;
// int nrSamplesGPL570 = datasetEVs.nrSamples;
// double sampleCountMinusOne = dataset.nrSamples - 1;
// double sampleCountMinusOneGPL570 = datasetEVs.nrSamples - 1;
//
// for (int p=0; p200) correlationGPL570Int = 200;
// corrDist[correlationInt][correlationGPL570Int]++;
// }
// }
// if (p%100==0) System.out.println(p);
// }
//
// long[] dist = new long[201];
// long[] distGPL570 = new long[201];
// long sum = 0;
// for (int d=0; d<201; d++) {
// for (int e=0; e<201; e++) {
// long value = corrDist[d][e];
// dist[d]+=value;
// distGPL570[e]+=value;
// sum+=value;
// if (value > 0) {
// double scale = Math.log10((double) value + 1d);
// if (scale > 5) {
// scale = 5;
// }
// int colorInt = 255 - (int) Math.round(scale * 51d);
// //System.out.println(value + "\t" + scale + "\t" + colorInt);
// g2d.setColor(new Color(colorInt, colorInt, colorInt));
// g2d.fillRect((int) x0 + d * 2, (int) y1 - e * 2, 2, 2);
// }
// }
// }
//
// long[][] corrDistExp = new long[201][201];
// boolean[][] corrDistSign = new boolean[201][201];
// for (int d=0; d<201; d++) {
// for (int e=0; e<201; e++) {
// corrDistExp[d][e] = (long) ((double) dist[d] * (double) (distGPL570[e] / (double) sum));
// long value = corrDist[d][e];
//
// if (value > 0) {
// double scale = Math.log10((double) value);
// if (scale > 5) {
// scale = 5;
// }
// int colorInt = 255 - (int) Math.round(scale * 51d);
// g2d.setColor(new Color(colorInt, colorInt, colorInt));
// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 1.0f));
// g2d.fillRect((int) x0 + d * 2 + 500, (int) y1 - e * 2, 2, 2);
// }
//
// double obsDivExp = (double) corrDist[d][e] / (double) corrDistExp[d][e];
// if (Double.isInfinite(obsDivExp)) obsDivExp = 15;
//
// double colorRange = 0;
// if (obsDivExp > 1) {
// colorRange = Math.log(obsDivExp) / 2;
// if (colorRange > 1d) colorRange = 1d;
// if (obsDivExp > 2) {
// corrDistSign[d][e] = true;
// System.out.println(d + "\t" + e + "\t" + corrDist[d][e] + "\t" + corrDistExp[d][e] + "\t" + obsDivExp);
// }
// float hue = (float) colorRange;
// g2d.setColor(g2d.getColor().getHSBColor(hue, 1.0f, 1.0f));
// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, (float) colorRange));
// g2d.fillRect((int) x0 + d * 2 + 500, (int) y1 - e * 2, 2, 2);
// }
//
//
// }
// }
//
// g2d.setColor(new Color(0, 0, 0));
// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.25f));
// g2d.drawLine((int) x0, (int) centerY, (int) x1, (int) centerY);
// g2d.drawLine((int) centerX, (int) y0, (int) centerX, (int) y1);
// g2d.drawLine((int) x0 + 500, (int) centerY, (int) x1 + 500, (int) centerY);
// g2d.drawLine((int) centerX + 500, (int) y0, (int) centerX + 500, (int) y1);
//
// try {
// //javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Documents/DMG/Data/GPL8490/PCAOverSamplesProbesCenteredScaled/Plots/CorrelationGPL8490-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
// //javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Documents/DMG/Data/GPL8490/PCAOver4432SamplesProbesForcedNormalDistribution/Plots/CorrelationGPL8490-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
// //javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Downloads/PCATCGABeta/CorrelationTCGA-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
// javax.imageio.ImageIO.write(bimage, "png", new File("/Users/lude/Downloads/PCATCGABeta/PCAOverTCGA+GPL8490Combined/CorrelationTCGA+GPL8490-" + nrTCs + "TCs-GPL570-" + nrTCsGPL570 + "TCs.png"));
// } catch (IOException e) {
// System.out.println(e.getMessage());
// e.printStackTrace();
// }
//
//
// for (int p=0; p 0.25) {
// System.out.println(p + "\t" + dataset.probeNames[p] + "\t" + datasetEVs.probeNames[p] + "\t" + (String) hashProbeToHGNC.get(datasetEVs.probeNames[p]) + "\t" + q + "\t" + dataset.probeNames[q] + "\t" + datasetEVs.probeNames[q] + "\t" + (String) hashProbeToHGNC.get(datasetEVs.probeNames[q]) + "\t" + correlation + "\t" + correlationGPL570);
// //}
// }
// }
// }
// }
// if (p%100==0) System.out.println(p);
// }
//
//
//
// System.exit(0);
// }
//
// private Jama.EigenvalueDecomposition eigenValueDecomposition(double[][] data) {
// Jama.Matrix m = new Jama.Matrix(data);
// Jama.EigenvalueDecomposition eig = m.eig();
// return eig;
// }
//
// private double[] getEigenVector(Jama.EigenvalueDecomposition eig, double[] eigenValues, int pca) {
// Jama.Matrix eigenValueMatrix = eig.getV();
// double[][] eigenValueMat = eigenValueMatrix.getArray();
// double[] eigenVector = new double[eigenValueMat.length];
// for (int i = 0; i < eigenValueMat.length; i++) {
// eigenVector[i] = eigenValueMat[i][eigenValueMat.length - 1 - pca]; // * Math.sqrt(eigenValues[eigenValues.length - 1 - pca]);
// }
// return eigenVector;
// }
//
// private double[] getEigenVector(Jama.EigenvalueDecomposition eig, int pca) {
// Jama.Matrix eigenValueMatrix = eig.getV();
// double[][] eigenValueMat = eigenValueMatrix.getArray();
// double[] eigenVector = new double[eigenValueMat.length];
// for (int i = 0; i < eigenValueMat.length; i++) {
// eigenVector[i] = eigenValueMat[i][eigenValueMat.length - 1 - pca]; // * Math.sqrt(eigenValues[eigenValues.length - 1 - pca]);
// }
// return eigenVector;
// }
//
// private double getEigenValueVar(double[] eigenValues, int pca) {
// double sumEigenvalues = 0.0;
// for (Double d : eigenValues) {
// sumEigenvalues += Math.abs(d);
// }
// double result = eigenValues[eigenValues.length - 1 - pca] / sumEigenvalues;
// return result;
// }
//
// private double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, double[] singularValues, int pca) {
// Jama.Matrix eigenValueMatrix = svd.getV();
// double[][] eigenValueMat = eigenValueMatrix.getArray();
// double[] eigenVector = new double[eigenValueMat.length];
// for (int i = 0; i < eigenValueMat.length; i++) {
// eigenVector[i] = eigenValueMat[i][pca] * Math.sqrt(singularValues[pca]);
// }
// return eigenVector;
// }
//
// // public void compareSexPhenotype() {
////
////
//// int nrTCs = 100;
//// HashMap hashTCsToIncludeHuman = new HashMap();
//// for (int tc=0; tc
© 2015 - 2025 Weber Informatics LLC | Privacy Policy