All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.survival.cox.CoxR Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.survival.cox;

import org.biojava.nbio.survival.cox.matrix.Matrix;
import org.biojava.nbio.survival.cox.stats.ChiSq;
import org.biojava.nbio.survival.cox.stats.Cholesky2;
import org.biojava.nbio.survival.data.WorkSheet;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;

/**
 *   This is a port of the R survival code used for doing Cox Regression. The algorithm was a fairly easy port from C code to Java where the challenge was
 *   making the code a little more object friendly. In the R code everything is passed around as an array and a large portion of the code is spent extracting
 *   data from the array for use in different calculations. By organizing the data in a class for each data point was able to simplify much of the code.
 *   Not all variants of different methods that you can select for doing various statistical calculations are implemented. Wouldn't be difficult to go back in
 *   add them in if they are important.
 *
 *

In R you can pass in different paramaters to override defaults which requires parsing of the paramaters. In the Java code tried to be a little more exact * in the code related to paramaters where using strata, weighting, robust and cluster are advance options. Additionaly code is implemented from Bob Gray * to do variance correction when using weighted paramaters in a data set. * /Users/Scooter/NetBeansProjects/biojava3-survival/docs/wtexamples.docx * *

The CoxHelper class is meant to hide some of the implementation details. * *

Issues *

    *
  • sign in CoxMart? *
  • double toler_chol = 1.818989e-12; Different value for some reason *
  • In robust linear_predictor set to 0 which implies score = 1 but previous score value doesn't get reset *
* * Cox regression fit, replacement for coxfit2 in order * to be more frugal about memory: specificly that we * don't make copies of the input data. * * * * * *

* the input parameters are * *

 *      maxiter      :number of iterations
 *      time(n)      :time of status or censoring for person i
 *      status(n)    :status for the ith person    1=dead , 0=censored
 *      covar(nv,n)  :covariates for person i.
 *                       Note that S sends this in column major order.
 *      strata(n)    :marks the strata.  Will be 1 if this person is the
 *                      last one in a strata.  If there are no strata, the
 *                      vector can be identically zero, since the nth person's
 *                      value is always assumed to be = to 1.
 *      offset(n)    :offset for the linear predictor
 *      weights(n)   :case weights
 *      init         :initial estimate for the coefficients
 *      eps          :tolerance for convergence.  Iteration continues until
 *                      the percent change in loglikelihood is <= eps.
 *      chol_tol     : tolerance for the Cholesky decompostion
 *      method       : 0=Breslow, 1=Efron
 *      doscale      : 0=don't scale the X matrix, 1=scale the X matrix
 * 
* returned parameters *
 *      means(nv)    : vector of column means of X
 *      beta(nv)     :the vector of answers (at start contains initial est)
 *      u(nv)        :score vector
 *      imat(nv,nv)  :the variance matrix at beta=final
 *                     (returned as a vector)
 *      loglik(2)    :loglik at beta=initial values, at beta=final
 *      sctest       :the score test at beta=initial
 *      flag         :success flag  1000  did not converge
 *                                  1 to nvar: rank of the solution
 *      iterations         :actual number of iterations used
 * 
* work arrays *
 *      mark(n)
 *      wtave(n)
 *      a(nvar), a2(nvar)
 *      cmat(nvar,nvar)       ragged array
 *      cmat2(nvar,nvar)
 *      newbeta(nvar)         always contains the "next iteration"
 * 
* calls functions: cholesky2, chsolve2, chinv2 *

* the data must be sorted by ascending time within strata * * @author Scooter Willis */ public class CoxR { /** * * @param variables * @param DataT * @param useStrata * @param useWeighted * @param robust * @param cluster * @return * @throws Exception */ public CoxInfo process(ArrayList variables, ArrayList DataT, boolean useStrata, boolean useWeighted, boolean robust, boolean cluster) throws Exception { //from coxph.control.S int maxiter2 = 20; double eps2 = 1e-9; double toler2 = Math.pow(eps2, .75); int doscale2 = 1; //int method2 = 0; double[] beta = new double[variables.size()]; return process(variables, DataT, maxiter2, CoxMethod.Efron, eps2, toler2, beta, doscale2, useStrata, useWeighted, robust, cluster); } /** * * @param variables * @param data * @param maxiter * @param method * @param eps * @param toler * @param beta * @param doscale * @param useStrata * @param useWeighted * @param robust * @param cluster * @return * @throws Exception */ public CoxInfo process(ArrayList variables, ArrayList data, int maxiter, CoxMethod method, double eps, double toler, double[] beta, int doscale, boolean useStrata, boolean useWeighted, boolean robust, boolean cluster) throws Exception { //make sure data is converted to numbers if labels are used SurvivalInfoHelper.categorizeData(data); //create variables if testing for interaction for (String variable : variables) { if (variable.indexOf(":") != -1) { String[] d = variable.split(":"); SurvivalInfoHelper.addInteraction(d[0], d[1], data); } } Collections.sort(data); // Collections.reverse(data); CoxInfo coxInfo = new CoxInfo(); coxInfo.setSurvivalInfoList(data); int i, j, k, person; boolean gotofinish = false; double[][] cmat, imat; /*ragged arrays covar[][], */ double wtave; double[] a, newbeta; double[] a2; double[][] cmat2; double[] scale; double denom = 0, zbeta, risk; double temp, temp2; int ndead; /* actually, the sum of their weights */ double newlk = 0; double dtime, d2; double deadwt; /*sum of case weights for the deaths*/ double efronwt; /* sum of weighted risk scores for the deaths*/ int halving; /*are we doing step halving at the moment? */ @SuppressWarnings("unused") int nrisk = 0; /* number of subjects in the current risk set */ /* copies of scalar input arguments */ int nused, nvar; /* vector inputs */ // double *time, *weights, *offset; // int *status, *strata; /* returned objects */ // double imat2[][]; double[] u, loglik, means; double sctest; int flag = 0; int iter = 0; //SEXP rlist, rlistnames; // int nprotect; /* number of protect calls I have issued */ /* get local copies of some input args */ nused = data.size(); // LENGTH(offset2); nvar = variables.size(); // ncols(covar2); // imat2 = new double[nvar][nvar]; // nprotect++; imat = new double[nvar][nvar]; //dmatrix(REAL(imat2), nvar, nvar); a = new double[nvar]; //(double *) R_alloc(2*nvar*nvar + 4*nvar, sizeof(double)); newbeta = new double[nvar]; //a + nvar; a2 = new double[nvar]; //newbeta + nvar; scale = new double[nvar]; //a2 + nvar; cmat = new double[nvar][nvar]; //dmatrix(scale + nvar, nvar, nvar); cmat2 = new double[nvar][nvar]; //dmatrix(scale + nvar +nvar*nvar, nvar, nvar); /* ** create output variables */ // PROTECT(beta2 = duplicate(ibeta)); // beta = REAL(beta2); // beta = new double[nvar]; // beta = beta2; // PROTECT(means2 = allocVector(REALSXP, nvar)); // means = REAL(means2); means = new double[nvar]; double[] sd = new double[nvar]; //double[] se = new double[nvar]; // means = means2; // PROTECT(u2 = allocVector(REALSXP, nvar)); // u = REAL(u2); u = new double[nvar]; // u = u2; // PROTECT(loglik2 = allocVector(REALSXP, 2)); // loglik = REAL(loglik2); loglik = new double[2]; // loglik = loglik2; // PROTECT(sctest2 = allocVector(REALSXP, 1)); // sctest = REAL(sctest2); // sctest = new double[1]; // sctest = sctest2; // PROTECT(flag2 = allocVector(INTSXP, 1)); // flag = INTEGER(flag2); // flag = new int[1]; // flag = flag2; // PROTECT(iter2 = allocVector(INTSXP, 1)); // iterations = INTEGER(iter2); // iterations = new int[1]; // iterations = iter2; // nprotect += 7; /* ** Subtract the mean from each covar, as this makes the regression ** much more stable. */ double[] time = new double[nused]; int[] status = new int[nused]; double[] offset = new double[nused]; double[] weights = new double[nused]; int[] strata = new int[nused]; double[][] covar = new double[nvar][nused]; ArrayList clusterList = null; if(cluster){ clusterList = new ArrayList<>(); } //copy data over to local arrays to minimuze changing code for (person = 0; person < nused; person++) { SurvivalInfo si = data.get(person); time[person] = si.getTime(); status[person] = si.getStatus(); offset[person] = si.getOffset(); if(cluster){ if(si.getClusterValue() == null && si.getClusterValue().length() == 0){ throw new Exception("Cluster value is not valid for " + si.toString()); } clusterList.add(si.getClusterValue()); } if (useWeighted) { weights[person] = si.getWeight(); } else { weights[person] = 1.0; } if (useStrata) { strata[person] = si.getStrata(); } else { strata[person] = 0; } for (i = 0; i < variables.size(); i++) { String variable = variables.get(i); covar[i][person] = si.getVariable(variable); } } double tempsd = 0; i = 0; for (i = 0; i < nvar; i++) { temp = 0; tempsd = 0; //calculate the mean sd for (person = 0; person < nused; person++) { temp += covar[i][person]; // * weights[person]; tempsd += (covar[i][person]) * (covar[i][person]); //*weights[person] * weights[person] } temp /= nused; // temp /= weightCount; means[i] = temp; tempsd /= nused; // tempsd /= weightCount; tempsd = Math.sqrt(tempsd - temp * temp); sd[i] = tempsd; //standard deviation //subtract the mean for (person = 0; person < nused; person++) { covar[i][person] -= temp; } if (doscale == 1) { /* and also scale it */ temp = 0; for (person = 0; person < nused; person++) { temp += Math.abs(covar[i][person]); //fabs } if (temp > 0) { temp = nused / temp; /* scaling */ } else { temp = 1.0; /* rare case of a constant covariate */ } scale[i] = temp; for (person = 0; person < nused; person++) { covar[i][person] *= temp; } } } if (doscale == 1) { for (i = 0; i < nvar; i++) { beta[i] /= scale[i]; /*rescale initial betas */ } } else { for (i = 0; i < nvar; i++) { scale[i] = 1.0; } } /* ** do the initial iteration step */ strata[nused - 1] = 1; loglik[1] = 0; for (i = 0; i < nvar; i++) { u[i] = 0; //u = s1 a2[i] = 0; //a2 = a for (j = 0; j < nvar; j++) { imat[i][j] = 0; //s2 cmat2[i][j] = 0; //a } } for (person = nused - 1; person >= 0;) { if (strata[person] == 1) { nrisk = 0; denom = 0; for (i = 0; i < nvar; i++) { a[i] = 0; for (j = 0; j < nvar; j++) { cmat[i][j] = 0; } } } dtime = time[person]; ndead = 0; /*number of deaths at this time point */ deadwt = 0; /* sum of weights for the deaths */ efronwt = 0; /* sum of weighted risks for the deaths */ while (person >= 0 && time[person] == dtime) { /* walk through the this set of tied times */ nrisk++; zbeta = offset[person]; /* form the term beta*z (vector mult) */ for (i = 0; i < nvar; i++) { zbeta += beta[i] * covar[i][person]; //x } zbeta = coxsafe(zbeta); risk = Math.exp(zbeta) * weights[person]; //risk = v denom += risk; /* a is the vector of weighted sums of x, cmat sums of squares */ for (i = 0; i < nvar; i++) { a[i] += risk * covar[i][person]; //a = s1 for (j = 0; j <= i; j++) { cmat[i][j] += risk * covar[i][person] * covar[j][person]; //cmat = s2; } } if (status[person] == 1) { ndead++; deadwt += weights[person]; efronwt += risk; loglik[1] += weights[person] * zbeta; for (i = 0; i < nvar; i++) { u[i] += weights[person] * covar[i][person]; } if (method == CoxMethod.Efron) { /* Efron */ for (i = 0; i < nvar; i++) { a2[i] += risk * covar[i][person]; for (j = 0; j <= i; j++) { cmat2[i][j] += risk * covar[i][person] * covar[j][person]; } } } } person--; if (person >= 0 && strata[person] == 1) { //added catch of person = 0 and person-- = -1 break; /*ties don't cross strata */ } } if (ndead > 0) { /* we need to add to the main terms */ if (method == CoxMethod.Breslow) { /* Breslow */ loglik[1] -= deadwt * Math.log(denom); for (i = 0; i < nvar; i++) { temp2 = a[i] / denom; /* mean */ u[i] -= deadwt * temp2; for (j = 0; j <= i; j++) { imat[j][i] += deadwt * (cmat[i][j] - temp2 * a[j]) / denom; } } } else { /* Efron */ /* ** If there are 3 deaths we have 3 terms: in the first the ** three deaths are all in, in the second they are 2/3 ** in the sums, and in the last 1/3 in the sum. Let k go ** from 0 to (ndead -1), then we will sequentially use ** denom - (k/ndead)*efronwt as the denominator ** a - (k/ndead)*a2 as the "a" term ** cmat - (k/ndead)*cmat2 as the "cmat" term ** and reprise the equations just above. */ for (k = 0; k < ndead; k++) { temp = (double) k / ndead; wtave = deadwt / ndead; d2 = denom - temp * efronwt; loglik[1] -= wtave * Math.log(d2); for (i = 0; i < nvar; i++) { temp2 = (a[i] - temp * a2[i]) / d2; u[i] -= wtave * temp2; for (j = 0; j <= i; j++) { imat[j][i] += (wtave / d2) * ((cmat[i][j] - temp * cmat2[i][j]) - temp2 * (a[j] - temp * a2[j])); } } } for (i = 0; i < nvar; i++) { a2[i] = 0; for (j = 0; j < nvar; j++) { cmat2[i][j] = 0; } } } } } /* end of accumulation loop */ loglik[0] = loglik[1]; /* save the loglik for iterations 0 */ /* am I done? ** update the betas and test for convergence */ for (i = 0; i < nvar; i++) /*use 'a' as a temp to save u0, for the score test*/ { a[i] = u[i]; } flag = Cholesky2.process(imat, nvar, toler); chsolve2(imat, nvar, a); /* a replaced by a *inverse(i) */ temp = 0; for (i = 0; i < nvar; i++) { temp += u[i] * a[i]; } sctest = temp; /* score test */ /* ** Never, never complain about convergence on the first step. That way, ** if someone HAS to they can force one iterations at a time. */ for (i = 0; i < nvar; i++) { newbeta[i] = beta[i] + a[i]; } if (maxiter == 0) { chinv2(imat, nvar); for (i = 0; i < nvar; i++) { beta[i] *= scale[i]; /*return to original scale */ u[i] /= scale[i]; imat[i][i] *= scale[i] * scale[i]; for (j = 0; j < i; j++) { imat[j][i] *= scale[i] * scale[j]; imat[i][j] = imat[j][i]; } } // goto finish; gotofinish = true; } /* ** here is the main loop */ if (!gotofinish) { halving = 0; /* =1 when in the midst of "step halving" */ for (iter = 1; iter <= maxiter; iter++) { newlk = 0; for (i = 0; i < nvar; i++) { u[i] = 0; for (j = 0; j < nvar; j++) { imat[i][j] = 0; } } /* ** The data is sorted from smallest time to largest ** Start at the largest time, accumulating the risk set 1 by 1 */ for (person = nused - 1; person >= 0;) { if (strata[person] == 1) { /* rezero temps for each strata */ denom = 0; nrisk = 0; for (i = 0; i < nvar; i++) { a[i] = 0; for (j = 0; j < nvar; j++) { cmat[i][j] = 0; } } } dtime = time[person]; deadwt = 0; ndead = 0; efronwt = 0; while (person >= 0 && time[person] == dtime) { nrisk++; zbeta = offset[person]; for (i = 0; i < nvar; i++) { zbeta += newbeta[i] * covar[i][person]; } zbeta = coxsafe(zbeta); risk = Math.exp(zbeta) * weights[person]; denom += risk; for (i = 0; i < nvar; i++) { a[i] += risk * covar[i][person]; for (j = 0; j <= i; j++) { cmat[i][j] += risk * covar[i][person] * covar[j][person]; } } if (status[person] == 1) { ndead++; deadwt += weights[person]; newlk += weights[person] * zbeta; for (i = 0; i < nvar; i++) { u[i] += weights[person] * covar[i][person]; } if (method == CoxMethod.Efron) { /* Efron */ efronwt += risk; for (i = 0; i < nvar; i++) { a2[i] += risk * covar[i][person]; for (j = 0; j <= i; j++) { cmat2[i][j] += risk * covar[i][person] * covar[j][person]; } } } } person--; if (person >= 0 && strata[person] == 1) { //added catch of person = 0 and person-- = -1 break; /*ties don't cross strata */ } } if (ndead > 0) { /* add up terms*/ if (method == CoxMethod.Breslow) { /* Breslow */ newlk -= deadwt * Math.log(denom); for (i = 0; i < nvar; i++) { temp2 = a[i] / denom; /* mean */ u[i] -= deadwt * temp2; for (j = 0; j <= i; j++) { imat[j][i] += (deadwt / denom) * (cmat[i][j] - temp2 * a[j]); } } } else { /* Efron */ for (k = 0; k < ndead; k++) { temp = (double) k / ndead; wtave = deadwt / ndead; d2 = denom - temp * efronwt; newlk -= wtave * Math.log(d2); for (i = 0; i < nvar; i++) { temp2 = (a[i] - temp * a2[i]) / d2; u[i] -= wtave * temp2; for (j = 0; j <= i; j++) { imat[j][i] += (wtave / d2) * ((cmat[i][j] - temp * cmat2[i][j]) - temp2 * (a[j] - temp * a2[j])); } } } for (i = 0; i < nvar; i++) { /*in anticipation */ a2[i] = 0; for (j = 0; j < nvar; j++) { cmat2[i][j] = 0; } } } } } /* end of accumulation loop */ /* am I done? ** update the betas and test for convergence */ flag = Cholesky2.process(imat, nvar, toler); if (Math.abs(1 - (loglik[1] / newlk)) <= eps && halving == 0) { /* all done */ loglik[1] = newlk; chinv2(imat, nvar); /* invert the information matrix */ for (i = 0; i < nvar; i++) { beta[i] = newbeta[i] * scale[i]; u[i] /= scale[i]; imat[i][i] *= scale[i] * scale[i]; for (j = 0; j < i; j++) { imat[j][i] *= scale[i] * scale[j]; imat[i][j] = imat[j][i]; } } // goto finish; gotofinish = true; break; } if (iter == maxiter) { break; /*skip the step halving calc*/ } if (newlk < loglik[1]) { /*it is not converging ! */ halving = 1; for (i = 0; i < nvar; i++) { newbeta[i] = (newbeta[i] + beta[i]) / 2; /*half of old increment */ } } else { halving = 0; loglik[1] = newlk; chsolve2(imat, nvar, u); j = 0; for (i = 0; i < nvar; i++) { beta[i] = newbeta[i]; newbeta[i] = newbeta[i] + u[i]; } } } /* return for another iteration */ } if (!gotofinish) { /* ** We end up here only if we ran out of iterations */ loglik[1] = newlk; chinv2(imat, nvar); for (i = 0; i < nvar; i++) { beta[i] = newbeta[i] * scale[i]; u[i] /= scale[i]; imat[i][i] *= scale[i] * scale[i]; for (j = 0; j < i; j++) { imat[j][i] *= scale[i] * scale[j]; imat[i][j] = imat[j][i]; } } flag = 1000; } //finish: /* for (j = 0; j < numCovariates; j++) { b[j] = b[j] / SD[j]; * ix = j * (numCovariates + 1) + j SE[j] = Math.sqrt(a[ix(j, j, numCovariates + 1)]) / SD[j]; // o = o + (" " + variables.get(j) + " " + Fmt(b[j]) + Fmt(SE[j]) + Fmt(Math.exp(b[j])) + Fmt(Norm(Math.abs(b[j] / SE[j]))) + Fmt(Math.exp(b[j] - 1.95 * SE[j])) + Fmt(Math.exp(b[j] + 1.95 * SE[j])) + NL); CoxCoefficient coe = coxInfo.getCoefficient(variables.get(j)); coe.coeff = b[j]; coe.stdError = SE[j]; coe.hazardRatio = Math.exp(b[j]); coe.pvalue = Norm(Math.abs(b[j] / SE[j])); coe.hazardRatioLoCI = Math.exp(b[j] - 1.95 * SE[j]); coe.hazardRatioHiCI = Math.exp(b[j] + 1.95 * SE[j]); } */ coxInfo.setScoreLogrankTest(sctest); coxInfo.setDegreeFreedom(beta.length); coxInfo.setScoreLogrankTestpvalue(ChiSq.chiSq(coxInfo.getScoreLogrankTest(), beta.length)); coxInfo.setVariance(imat); coxInfo.u = u; // for (int n = 0; n < beta.length; n++) { // se[n] = Math.sqrt(imat[n][n]); // / sd[n]; // } // System.out.println("coef,se, means,u"); for (int n = 0; n < beta.length; n++) { CoxCoefficient coe = new CoxCoefficient(); coe.name = variables.get(n); coe.mean = means[n]; coe.standardDeviation = sd[n]; coe.coeff = beta[n]; coe.stdError = Math.sqrt(imat[n][n]); coe.hazardRatio = Math.exp(coe.getCoeff()); coe.z = coe.getCoeff() / coe.getStdError(); coe.pvalue = ChiSq.norm(Math.abs(coe.getCoeff() / coe.getStdError())); double z = 1.959964; coe.hazardRatioLoCI = Math.exp(coe.getCoeff() - z * coe.getStdError()); coe.hazardRatioHiCI = Math.exp(coe.getCoeff() + z * coe.getStdError()); coxInfo.setCoefficient(coe.getName(), coe); // System.out.println(beta[n] + "," + se[n] + "," + means[n] + "," + sd[n] + "," + u[n]); //+ "," + imat[n] "," + loglik[n] + "," + sctest[n] + "," + iterations[n] + "," + flag[n] } coxInfo.maxIterations = maxiter; coxInfo.eps = eps; coxInfo.toler = toler; coxInfo.iterations = iter; coxInfo.flag = flag; coxInfo.loglikInit = loglik[0]; coxInfo.loglikFinal = loglik[1]; coxInfo.method = method; // System.out.println("loglik[0]=" + loglik[0]); // System.out.println("loglik[1]=" + loglik[1]); // System.out.println("chisq? sctest[0]=" + sctest[0]); // System.out.println("?overall model p-value=" + chiSq(sctest[0], beta.length)); // System.out.println(); // for (int n = 0; n < covar[0].length; n++) { // System.out.print(n); // for (int variable = 0; variable < covar.length; variable++) { // System.out.print("\t" + covar[variable][n]); // } // System.out.println(); // } // for (SurvivalInfo si : data) { // System.out.println(si.order + " " + si.getScore()); // } // coxInfo.dump(); coxphfitSCleanup(coxInfo, useWeighted, robust,clusterList); return coxInfo; } /** * * @param ci * @param useWeighted * @param robust * @param cluster * @throws Exception */ public void coxphfitSCleanup(CoxInfo ci, boolean useWeighted,boolean robust, ArrayList cluster) throws Exception { //Do cleanup found after coxfit6 is called in coxph.fit.S //infs <- abs(coxfit$u %*% var) //[ a1 b1] * [a1 b1] // [a2 b2] double[][] du = new double[1][ci.u.length]; du[0] = ci.u; double[] infs = Matrix.abs(Matrix.multiply(ci.u, ci.getVariance())); // StdArrayIO.print(infs); ArrayList coxCoefficients = new ArrayList<>(ci.getCoefficientsList().values()); for (int i = 0; i < infs.length; i++) { double inf = infs[i]; double coe = coxCoefficients.get(i).getCoeff(); if (inf > ci.eps && inf > (ci.toler * Math.abs(coe))) { ci.message = "Loglik converged before variable "; } } //sum(coef*coxfit$means) double sumcoefmeans = 0; for (CoxCoefficient cc : coxCoefficients) { sumcoefmeans = sumcoefmeans + cc.getCoeff() * cc.getMean(); } // coxph.fit.S line 107 //lp <- c(x %*% coef) + offset - sum(coef*coxfit$means) for (SurvivalInfo si : ci.survivalInfoList) { double offset = si.getOffset(); double lp = 0; for (CoxCoefficient cc : coxCoefficients) { String name = cc.getName(); double coef = cc.getCoeff(); double value = si.getVariable(name); lp = lp + value * coef; } lp = lp + offset - sumcoefmeans; si.setLinearPredictor(lp); si.setScore(Math.exp(lp)); // System.out.println("lp score " + si.order + " " + si.time + " " + si.getWeight() + " " + si.getClusterValue() + " " + lp + " " + Math.exp(lp)); } // ci.dump(); //begin code after call to coxfit6 in coxph.fit.S //Compute the martingale residual for a Cox model // appears to be C syntax error for = - vs -= //(if (nullmodel) in coxph.fit double[] res = CoxMart.process(ci.method, ci.survivalInfoList, false); for(int i = 0; i < ci.survivalInfoList.size(); i++){ SurvivalInfo si = ci.survivalInfoList.get(i); si.setResidual(res[i]); } //this represents the end of coxph.fit.S code and we pickup //after call to fit <- fitter(X, Y, strats ....) in coxph.R if (robust) { ci.setNaiveVariance(ci.getVariance()); double[][] temp; double[][] temp0; if (cluster != null) { temp = ResidualsCoxph.process(ci, ResidualsCoxph.Type.dfbeta, useWeighted, cluster); //# get score for null model // if (is.null(init)) // fit2$linear.predictors <- 0*fit$linear.predictors // else // fit2$linear.predictors <- c(X %*% init) //Set score to 1 double[] templp = new double[ci.survivalInfoList.size()]; double[] tempscore = new double[ci.survivalInfoList.size()]; int i = 0; for (SurvivalInfo si : ci.survivalInfoList) { templp[i] = si.getLinearPredictor(); tempscore[i] = si.getScore(); si.setLinearPredictor(0); si.setScore(1.0); //this erases stored value which isn't how the R code does it i++; } temp0 = ResidualsCoxph.process(ci, ResidualsCoxph.Type.score, useWeighted, cluster); i = 0; for (SurvivalInfo si : ci.survivalInfoList) { si.setLinearPredictor(templp[i]); si.setScore(tempscore[i]); //this erases stored value which isn't how the R code does it i++; } } else { temp = ResidualsCoxph.process(ci, ResidualsCoxph.Type.dfbeta, useWeighted, null); // fit2$linear.predictors <- 0*fit$linear.predictors double[] templp = new double[ci.survivalInfoList.size()]; double[] tempscore = new double[ci.survivalInfoList.size()]; int i = 0; for (SurvivalInfo si : ci.survivalInfoList) { templp[i] = si.getLinearPredictor(); tempscore[i] = si.getScore(); si.setLinearPredictor(0); si.setScore(1.0); } temp0 = ResidualsCoxph.process(ci, ResidualsCoxph.Type.score, useWeighted, null); i = 0; for (SurvivalInfo si : ci.survivalInfoList) { si.setLinearPredictor(templp[i]); si.setScore(tempscore[i]); //this erases stored value which isn't how the R code does it i++; } } //fit$var<- t(temp) % * % temp double[][] ttemp = Matrix.transpose(temp); double[][] var = Matrix.multiply(ttemp, temp); ci.setVariance(var); //u<- apply(as.matrix(temp0), 2, sum) double[] u = new double[temp0[0].length]; for (int i = 0; i < temp0[0].length; i++) { for (int j = 0; j < temp0.length; j++) { u[i] = u[i] + temp0[j][i]; } } //fit$rscore <- coxph.wtest(t(temp0)%*%temp0, u, control$toler.chol)$test double[][] wtemp = Matrix.multiply(Matrix.transpose(temp0),temp0); double toler_chol = 1.818989e-12; // toler_chol = ci.toler; WaldTestInfo wti = WaldTest.process(wtemp,u,toler_chol); //not giving the correct value ci.setRscore(wti.getTest()); } calculateWaldTestInfo(ci); } static public void calculateWaldTestInfo(CoxInfo ci){ if(ci.getNumberCoefficients() > 0){ double toler_chol = 1.818989e-12; // toler_chol = ci.toler; double[][] b = new double[1][ci.getNumberCoefficients()]; int i = 0; for(CoxCoefficient coe : ci.getCoefficientsList().values()){ b[0][i] = coe.getCoeff(); i++; } ci.setWaldTestInfo(WaldTest.process(ci.getVariance(), b, toler_chol)); } } /** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here CoxR coxr = new CoxR(); if (true) { try { InputStream is = coxr.getClass().getClassLoader().getResourceAsStream("uis-complete.txt"); WorkSheet worksheet = WorkSheet.readCSV(is, '\t'); ArrayList survivalInfoList = new ArrayList<>(); int i = 0; for (String row : worksheet.getRows()) { double time = worksheet.getCellDouble(row, "TIME"); double age = worksheet.getCellDouble(row, "AGE"); double treat = worksheet.getCellDouble(row, "TREAT"); double c = worksheet.getCellDouble(row, "CENSOR"); int censor = (int) c; SurvivalInfo si = new SurvivalInfo(time, censor); si.setOrder(i); si.addContinuousVariable("AGE", age); si.addContinuousVariable("TREAT", treat); survivalInfoList.add(si); i++; } CoxR cox = new CoxR(); ArrayList variables = new ArrayList<>(); // variables.add("AGE"); variables.add("AGE"); variables.add("TREAT"); // variables.add("TREAT:AGE"); // ArrayList cluster = new ArrayList(); CoxInfo ci = cox.process(variables, survivalInfoList, false, true,false, false); System.out.println(ci); } catch (Exception e) { e.printStackTrace(); } } // if (false) { // // try { // // // WorkSheet worksheet = WorkSheet.readCSV("/Users/Scooter/NetBeansProjects/AssayWorkbench/src/edu/scripps/assayworkbench/cox/uis-complete.txt", '\t'); // ArrayList rows = worksheet.getRows(); // ArrayList variables = new ArrayList(); // variables.add("AGE"); // variables.add("TREAT"); // double[] time2 = new double[rows.size()]; // int[] status2 = new int[rows.size()]; // double[][] covar2 = new double[variables.size()][rows.size()]; // double[] offset2 = new double[rows.size()]; // double[] weights2 = new double[rows.size()]; // int[] strata2 = new int[rows.size()]; // // // for (int i = 0; i < rows.size(); i++) { // String row = rows.get(i); // double time = worksheet.getCellDouble(row, "TIME"); // // double age = worksheet.getCellDouble(row, "AGE"); // // double treat = worksheet.getCellDouble(row, "TREAT"); // double c = worksheet.getCellDouble(row, "CENSOR"); // int censor = (int) c; // // time2[i] = time; // status2[i] = censor; // offset2[i] = 0; // weights2[i] = 1; // strata2[i] = 0; // // for (int j = 0; j < variables.size(); j++) { // String variable = variables.get(j); // double v = worksheet.getCellDouble(row, variable); // covar2[j][i] = v; // } // // // } // //from coxph.control.S // int maxiter2 = 20; // double eps2 = 1e-9; // double toler2 = Math.pow(eps2, .75); // int doscale2 = 1; // int method2 = 0; // //toler.chol = eps ^ .75 // //toler.inf=sqrt(eps) // //outer.max=10 // // CoxR cox = new CoxR(); // // cox.coxfit6(maxiter2, time2, status2, covar2, offset2, weights2, strata2, method2, eps2, toler2, time2, doscale2); // // // // // // } catch (Exception e) { // e.printStackTrace(); // } // } } /* $Id: chinv2.c 11357 2009-09-04 15:22:46Z therneau $ ** ** matrix inversion, given the FDF' cholesky decomposition ** ** input **matrix, which contains the chol decomp of an n by n ** matrix in its lower triangle. ** ** returned: the upper triangle + diagonal contain (FDF')^{-1} ** below the diagonal will be F inverse ** ** Terry Therneau */ void chinv2(double[][] matrix, int n) { double temp; int i, j, k; /* ** invert the cholesky in the lower triangle ** take full advantage of the cholesky's diagonal of 1's */ for (i = 0; i < n; i++) { if (matrix[i][i] > 0) { matrix[i][i] = 1 / matrix[i][i]; /*this line inverts D */ for (j = (i + 1); j < n; j++) { matrix[j][i] = -matrix[j][i]; for (k = 0; k < i; k++) /*sweep operator */ { matrix[j][k] += matrix[j][i] * matrix[i][k]; } } } } /* ** lower triangle now contains inverse of cholesky ** calculate F'DF (inverse of cholesky decomp process) to get inverse ** of original matrix */ for (i = 0; i < n; i++) { if (matrix[i][i] == 0) { /* singular row */ for (j = 0; j < i; j++) { matrix[j][i] = 0; } for (j = i; j < n; j++) { matrix[i][j] = 0; } } else { for (j = (i + 1); j < n; j++) { temp = matrix[j][i] * matrix[j][j]; if (j != i) { matrix[i][j] = temp; } for (k = i; k < j; k++) { matrix[i][k] += temp * matrix[j][k]; } } } } } /* $Id: chsolve2.c 11376 2009-12-14 22:53:57Z therneau $ ** ** Solve the equation Ab = y, where the cholesky decomposition of A and y ** are the inputs. ** ** Input **matrix, which contains the chol decomp of an n by n ** matrix in its lower triangle. ** y[n] contains the right hand side ** ** y is overwriten with b ** ** Terry Therneau */ void chsolve2(double[][] matrix, int n, double[] y) { int i, j; double temp; /* ** solve Fb =y */ for (i = 0; i < n; i++) { temp = y[i]; for (j = 0; j < i; j++) { temp -= y[j] * matrix[i][j]; } y[i] = temp; } /* ** solve DF'z =b */ for (i = (n - 1); i >= 0; i--) { if (matrix[i][i] == 0) { y[i] = 0; } else { temp = y[i] / matrix[i][i]; for (j = i + 1; j < n; j++) { temp -= y[j] * matrix[j][i]; } y[i] = temp; } } } /** * * @param x * @return */ public double coxsafe(double x) { if (x < -200) { return -200; } if (x > 22) { return 22; } return x; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy