net.maizegenetics.analysis.numericaltransform.TransformDataPlugin Maven / Gradle / Ivy
package net.maizegenetics.analysis.numericaltransform;
import java.awt.Frame;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.swing.ImageIcon;
import org.apache.log4j.Logger;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import net.maizegenetics.phenotype.CategoricalAttribute;
import net.maizegenetics.phenotype.NumericAttribute;
import net.maizegenetics.phenotype.Phenotype;
import net.maizegenetics.phenotype.PhenotypeAttribute;
import net.maizegenetics.phenotype.Phenotype.ATTRIBUTE_TYPE;
import net.maizegenetics.phenotype.PhenotypeBuilder;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.Datum;
import net.maizegenetics.util.OpenBitSet;
public class TransformDataPlugin extends AbstractPlugin {
private static Logger myLogger = Logger.getLogger(TransformDataPlugin.class);
public enum BASE {natural, base_2, base_10};
private List traitsToTransform;
private List byFactor;
private boolean logTransform = false;
private boolean powerTransform = false;
private boolean standardize = false;
private BASE myBase = BASE.natural;
private double power = 1;
private static final double log2 = Math.log(2);
private boolean allTraits = true;
private String traitnames = "";
private String factornames = "";
public TransformDataPlugin(Frame parentFrame, boolean isInteractive) {
super(parentFrame, isInteractive);
}
public DataSet processData(DataSet input){
List myData = input.getDataOfType(Phenotype.class);
if (myData.size() == 1) {
Phenotype myPhenotype = (Phenotype) myData.get(0).getData();
if (isInteractive()) {
allTraits = false;
List numericAttributes = Stream.concat(myPhenotype.attributeListOfType(ATTRIBUTE_TYPE.data).stream(),
myPhenotype.attributeListOfType(ATTRIBUTE_TYPE.covariate).stream())
.map(pa -> (NumericAttribute) pa)
.collect(Collectors.toList());
List catAttributes = myPhenotype.attributeListOfType(ATTRIBUTE_TYPE.factor).stream()
.map(pa -> (CategoricalAttribute) pa)
.collect(Collectors.toList());
TransformDataDialog tdd = new TransformDataDialog(getParentFrame(), numericAttributes, catAttributes);
tdd.setVisible(true);
traitsToTransform = tdd.traitsToTransform();
byFactor = tdd.factorsForStandardizing();
logTransform = tdd.logTransformation();
powerTransform = tdd.powerTransformation();
standardize = tdd.standardize();
myBase = tdd.base();
power = tdd.exponent();
} else {
//add traitnames to list of attributes to be transformed
if (traitnames.length() == 0) {
traitsToTransform = myPhenotype.attributeListOfType(ATTRIBUTE_TYPE.data).stream()
.map(a -> (NumericAttribute) a)
.collect(Collectors.toList());
} else {
String[] attributeNames = traitnames.split(",");
traitsToTransform = myPhenotype.attributeListOfType(ATTRIBUTE_TYPE.data).stream()
.filter(a -> contains(a.name(), attributeNames))
.map(a -> (NumericAttribute) a)
.collect(Collectors.toList());
}
//add factornames ot list of factors for stanardization
if (factornames.length() == 0) {
byFactor = new ArrayList<>(); //do not use factors
} else {
String[] attributeNames = factornames.split(",");
byFactor = myPhenotype.attributeListOfType(ATTRIBUTE_TYPE.factor).stream()
.filter(a -> contains(a.name(), attributeNames))
.map(a -> (CategoricalAttribute) a)
.collect(Collectors.toList());
}
}
if (logTransform || powerTransform || standardize) return transformTraits(myPhenotype, myData.get(0));
else return null;
}
throw new IllegalArgumentException("TransformDataPlugin: Please select one Phenotype data set.");
}
private boolean contains(String name, String[] array) {
return Arrays.stream(array).anyMatch(str -> str.equals(name));
}
public DataSet transformTraits(Phenotype myPhenotype, Datum myData) {
//use a sequential stream, because the order of the attributes needs to stay the same
List myNewAttributes = myPhenotype.attributeListCopy().stream()
.map(a -> transformAttribute(a))
.collect(Collectors.toList());
Phenotype transformedPhenotype = new PhenotypeBuilder().fromAttributeList(myNewAttributes, myPhenotype.typeListCopy()).build().get(0);
StringBuilder nameBuilder = new StringBuilder();
nameBuilder.append("transformed_").append(myData.getName());
StringBuilder commentBuilder = new StringBuilder();
commentBuilder.append("Phenotypes transformed from ");
commentBuilder.append(myData.getName()).append("\n");
// commentBuilder.append(myData.getComment());
commentBuilder.append("The following traits were transformed by ");
if (powerTransform) commentBuilder.append("using a power ").append(power).append(" transformation:\n");
else if (logTransform) commentBuilder.append("using a ").append(myBase.name()).append(" log transformation:\n");
if (standardize) commentBuilder.append("standardizing.\n");
for (NumericAttribute na : traitsToTransform) commentBuilder.append(na.name()).append("\n");
return new DataSet(new Datum(nameBuilder.toString(), transformedPhenotype, commentBuilder.toString()), this);
}
public PhenotypeAttribute transformAttribute(PhenotypeAttribute myAttribute) {
if (!(myAttribute instanceof NumericAttribute)) return myAttribute;
NumericAttribute myNumericAttribute = (NumericAttribute) myAttribute;
if (!traitsToTransform.contains(myNumericAttribute)) return myAttribute;
if (powerTransform) myNumericAttribute = powerTransform(myNumericAttribute);
else if (logTransform) myNumericAttribute = logTransform(myNumericAttribute);
if (standardize) {
if (byFactor.size() > 0) return standardize(myNumericAttribute, byFactor);
return standardize(myNumericAttribute);
}
return myNumericAttribute;
}
@Override
public ImageIcon getIcon() {
URL imageURL = TransformDataPlugin.class.getResource("/net/maizegenetics/analysis/images/Transform.gif");
if (imageURL == null) {
return null;
} else {
return new ImageIcon(imageURL);
}
}
@Override
public String getButtonName() {
return "Transform Phenotype";
}
@Override
public String getToolTipText() {
return "Transform or standardize phenotypes";
}
public NumericAttribute powerTransform(NumericAttribute original) {
float[] originalValues = original.floatValues();
int n = originalValues.length;
float[] transValues = new float[n];
for (int i = 0; i < n; i++) transValues[i] = (float) Math.pow(originalValues[i], power);
return new NumericAttribute(original.name(), transValues, original.missing());
}
public NumericAttribute logTransform(NumericAttribute original) {
float[] originalValues = original.floatValues();
int n = originalValues.length;
float[] transValues = new float[n];
double divisor;
switch (myBase) {
case base_10:
divisor = Math.log(10);
break;
case base_2:
divisor = Math.log(2);
break;
default:
divisor = 1;
}
for (int i = 0; i < n; i++) {
switch (myBase) {
case natural:
transValues[i] = (float) Math.log(originalValues[i]);
break;
case base_10:
case base_2:
transValues[i] = (float) (Math.log(originalValues[i]) / divisor);
break;
}
}
return new NumericAttribute(original.name(), transValues, original.missing());
}
public NumericAttribute standardize(NumericAttribute original) {
float[] originalValues = original.floatValues();
int n = originalValues.length;
float[] meanSD = meanStdDev(originalValues);
float[] transValues = new float[n];
for (int i = 0; i < n; i++) transValues[i] = (originalValues[i] - meanSD[0])/meanSD[1];
return new NumericAttribute(original.name(), transValues, original.missing());
}
public float[] meanStdDev(float[] data) {
int n = data.length;
double sum = 0;
double sumsq = 0;
int notMissingCount = 0;
for (int i = 0; i < n; i++) {
double val = data[i];
if (!Double.isNaN(val)) {
sum += val;
sumsq += val * val;
notMissingCount++;
}
}
float mean = (float) sum / notMissingCount;
float sdev = (float) Math.sqrt((sumsq - sum / notMissingCount * sum) / (notMissingCount - 1));
return new float[]{mean, sdev};
}
public NumericAttribute standardize(NumericAttribute original, List byFactors) {
List subsetList = subsets(byFactors);
float[] stdData = Arrays.copyOf(original.floatValues(), original.size());
for (int[] subset : subsetList) {
int n = subset.length;
float[] subsetData = new float[n];
for (int i = 0; i < n; i++) subsetData[i] = stdData[subset[i]];
float[] meanSD = meanStdDev(subsetData);
for (int i = 0; i < n; i++) stdData[subset[i]] = (stdData[subset[i]] - meanSD[0]) / meanSD[1] ;
}
return new NumericAttribute(original.name(), stdData, original.missing());
}
public List subsets(List byFactors) {
class subset {
int[] levels;
subset(int[] levels) { this.levels = levels; }
public boolean equals(Object other) {
if (other instanceof subset) return Arrays.equals(levels, ((subset) other).levels);
return false;
}
@Override
public int hashCode() {
int hc = 0;
int mult = 1;
for (int i : levels) {
hc += mult * Integer.hashCode(i);
mult *= 10;
}
return hc;
}
}
int nobs = byFactors.get(0).size();
OpenBitSet missing = new OpenBitSet(nobs);
for (PhenotypeAttribute pa : byFactors) missing.or(pa.missing());
int nfactors = byFactors.size();
Multimap subsetMap = HashMultimap.create();
for (int obs = 0; obs < nobs; obs++) if (!missing.fastGet(obs)) {
int[] levels = new int[nfactors];
int count = 0;
for (CategoricalAttribute ca : byFactors) {
levels[count++] = ca.intValue(obs);
}
subsetMap.put(new subset(levels), obs);
}
ArrayList subsetList = new ArrayList<>();
for (subset sub : subsetMap.keySet()) {
subsetList.add(subsetMap.get(sub)
.stream()
.mapToInt(Integer::intValue)
.toArray());
}
return subsetList;
}
@Override
public void setParameters(String[] args) {
// TODO Auto-generated method stub
traitsToTransform = new ArrayList<>();
byFactor = new ArrayList<>();
int argPtr = 0;
while (argPtr < args.length) {
if (args[argPtr].toLowerCase().startsWith("-trait")) {
setTraits(args[++argPtr]);
argPtr++;
} else if (args[argPtr].toLowerCase().startsWith("-factor")) {
setFactors(args[++argPtr]);
argPtr++;
} else if (args[argPtr].equals("-log")) {
logTransform = true;
powerTransform = false;
String baseName = args[++argPtr];
if (baseName.equals("natural")) myBase = BASE.natural;
else if (baseName.equals("base_2")) myBase = BASE.base_2;
else if (baseName.equals("base_10")) myBase = BASE.base_10;
else throw new IllegalArgumentException("-log parameter value must be one of natural, base_2, base_10.");
argPtr++;
} else if (args[argPtr].equals("-power")) {
powerTransform = true;
logTransform = false;
try {
power = Double.parseDouble(args[++argPtr]);
} catch(NumberFormatException nfe) {
myLogger.error("-power parameter value must be a floating point number.", nfe);
}
argPtr++;
} else if (args[argPtr].equals("-standardize")) {
String paramVal = args[++argPtr];
if (paramVal.toLowerCase().startsWith("t")) standardize = true;
else standardize = false;
argPtr++;
} else {
String msg = String.format("unrecognized command line parameter for TransformDataPlugin: %s", args[argPtr]);
myLogger.error(msg);
throw new IllegalArgumentException(msg);
}
}
}
@Override
public String getUsage() {
StringBuilder usageString = new StringBuilder();
usageString.append("The TransformDataPlugin can take the following parameters. Cannot use both log and power parameters.:\n");
usageString.append("-traits: A comma delimited list of trait names with no embedded space. If this parameter is not specified then all traits will be transformed.");
usageString.append("-factor: The factor name or a comma-delimited list of factor names with no embedded spaces within which values are to be standardized. ");
usageString.append("The default is to ignore factors and use the mean and standard deviation of all observations.\n");
usageString.append("-log: perform a log transformation. Can take one of natural, base_2, or base_10. Default = no transformation.");
usageString.append("-power: perform a power transformation. The parameter value is the exponent to which each value should be raised. Default = no transformation.");
usageString.append("-standardize: standardize values by subtracting the mean and dividing by the standard deviation. true or false. Default = false.");
return usageString.toString();
}
public void setTraits(String namelist) {
traitnames = namelist;
}
public void setFactors(String namelist) {
factornames = namelist;
}
public void setTraitsToTransform(List traitsToTransform) {
this.traitsToTransform = traitsToTransform;
}
public void setByFactor(List byFactor) {
this.byFactor = byFactor;
}
public void setLogTransform(boolean logTransform) {
this.logTransform = logTransform;
}
public void setPowerTransform(boolean powerTransform) {
this.powerTransform = powerTransform;
}
public void setStandardize(boolean standardize) {
this.standardize = standardize;
}
public void setMyBase(BASE myBase) {
this.myBase = myBase;
}
public void setPower(double power) {
this.power = power;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy