repicea.stats.data.GenericStatisticalDataStructure Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of repicea-mathstats Show documentation
Show all versions of repicea-mathstats Show documentation
Mathematical and statistical methods
/*
* This file is part of the repicea-statistics library.
*
* Copyright (C) 2009-2012 Mathieu Fortin for Rouge-Epicea
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed with the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* Please see the license at http://www.gnu.org/copyleft/lesser.html.
*/
package repicea.stats.data;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.Vector;
import repicea.math.Matrix;
import repicea.math.formula.MathFormula;
import repicea.math.formula.MathOperator;
import repicea.math.utility.MatrixUtility;
import repicea.util.ObjectUtility;
/**
* The StatisticalDataStructure class is an abstract class that implements all the features to be able
* to fit a statistical model. The structure includes a vector of dependent variables (vectorY), a matrix
* of covariates (matrixX) and a possible hierarchical data structure (hierarchicalStructure).
* @author Mathieu Fortin - June 2011
*/
public class GenericStatisticalDataStructure implements StatisticalDataStructure {
protected final DataSet dataSet;
protected boolean isInterceptModel;
protected final LinkedHashMap effects;
protected String yName;
/**
* General constructor.
* @param dataSet the DataSet instance from which the structure is going to be extracted
*/
public GenericStatisticalDataStructure(DataSet dataSet) {
this.dataSet= dataSet;
isInterceptModel = true;
effects = new LinkedHashMap();
}
@SuppressWarnings({ "rawtypes"})
protected Matrix computeDummyVariables(String fieldName, String refClass) throws StatisticalDataException {
List possibleValues = getPossibleValueForDummyVariable(fieldName, refClass);
int fieldIndex = getDataSet().getIndexOfThisField(fieldName);
return dataSet.getDummyMatrix(possibleValues, fieldIndex);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public List getPossibleValueForDummyVariable(String fieldName, String refClass) {
int fieldIndex = getDataSet().getIndexOfThisField(fieldName);
if (fieldIndex == -1) {
throw new InvalidParameterException("Field " + fieldName + " is not part of the DataSet instance!");
}
List possibleValues = dataSet.getPossibleValuesInThisField(fieldIndex);
Collections.sort(possibleValues);
if (refClass != null && !possibleValues.contains(refClass)) {
throw new InvalidParameterException("Reference class category " + refClass + " does not belong to this class variable!");
}
if (isInterceptModel()) {
if (refClass != null) {
possibleValues.remove(possibleValues.indexOf(refClass));
} else {
possibleValues.remove(0);
}
}
return possibleValues;
}
@Override
public boolean isInterceptModel() {return isInterceptModel;}
@Override
public void setInterceptModel(boolean isInterceptModel) {this.isInterceptModel = isInterceptModel;}
protected Matrix getVectorOfThisField(String fName) {
return dataSet.getVectorOfThisField(fName);
}
@Override
public Matrix constructMatrixX() {
Matrix matrixX = null;
if (this.isInterceptModel) {
matrixX = new Matrix(getNumberOfObservations(), 1, 1, 0);
}
Vector effectsInThisInteraction = new Vector();
for (String effectName : effects.keySet()) {
StringTokenizer tkzExclusiveInteraction = new StringTokenizer(effectName, ":");
effectsInThisInteraction.clear();
while (tkzExclusiveInteraction.hasMoreTokens()) {
effectsInThisInteraction.add(tkzExclusiveInteraction.nextToken());
}
Matrix matXForThisEffect = null;
for (String singleEffect : effectsInThisInteraction) {
Matrix matXtmp;
MathFormula formula;
if ((formula = effects.get(singleEffect)) != null) { // there is a Math Formula behind this effect
String fName = formula.getVariables().get(0);
Matrix originalValues = getVectorOfThisField(fName);
matXtmp = new Matrix(originalValues.m_iRows, 1);
for (int i = 0; i < originalValues.m_iRows; i++) {
formula.setVariable(fName, originalValues.getValueAt(i, 0));
matXtmp.setValueAt(i, 0, formula.calculate());
}
} else {
int indexOfReferenceClass = singleEffect.indexOf("#");
String refClass = null;
if (indexOfReferenceClass != -1) {
refClass = singleEffect.substring(indexOfReferenceClass + 1);
singleEffect = singleEffect.substring(0, indexOfReferenceClass);
}
Class> fieldType = dataSet.getFieldTypeOfThisField(singleEffect);
if (Number.class.isAssignableFrom(fieldType)) { // it is either a double or an integer
matXtmp = getVectorOfThisField(singleEffect);
} else {
matXtmp = computeDummyVariables(singleEffect, refClass);
}
}
matXForThisEffect = matXForThisEffect == null ?
matXtmp :
MatrixUtility.combineMatrices(matXForThisEffect, matXtmp);
}
matrixX = matrixX == null ?
matXForThisEffect :
matrixX.matrixStack(matXForThisEffect, false);
}
return matrixX;
}
@Override
public Matrix constructVectorY() {return dataSet.getVectorOfThisField(yName);}
private List getLongNamedEffects(String modelEffects) {
List longNamedEffects = new ArrayList();
for (String longNamedOperator : MathOperator.NamedOperators.keySet()) {
List longNamedEffectsForThisOperator = ObjectUtility.extractSequences(modelEffects, longNamedOperator + "(", ")");
if (longNamedEffectsForThisOperator.size() > 1) {
for (int i = 1; i < longNamedEffectsForThisOperator.size(); i++)
longNamedEffects.add(longNamedEffectsForThisOperator.get(i));
}
}
return longNamedEffects;
}
@Override
public void setModelDefinition(String modelDefinition) {
List responseAndFixedEffects = ObjectUtility.decomposeUsingToken(modelDefinition, "~");
if (responseAndFixedEffects.size() != 2) {
throw new InvalidParameterException("The model specification is incorrect!");
}
yName = responseAndFixedEffects.get(0);
String modelEffects = responseAndFixedEffects.get(1);
// List longNamedEffects = new ArrayList();
// for (String longNamedOperator : MathOperator.NamedOperators.keySet()) {
// List longNamedEffectsForThisOperator = ObjectUtility.extractSequences(modelEffects, longNamedOperator + "(", ")");
// if (longNamedEffectsForThisOperator.size() > 1) {
// for (int i = 1; i < longNamedEffectsForThisOperator.size(); i++)
// longNamedEffects.add(longNamedEffectsForThisOperator.get(i));
// }
// }
List longNamedEffects = getLongNamedEffects(modelEffects);
Map longNamedEffectsMap = new HashMap();
int id = 0;
String substitute;
for (String longNamedEffect : longNamedEffects) {
substitute = "&" + id;
modelEffects = modelEffects.replace(longNamedEffect, substitute);
longNamedEffectsMap.put(substitute, longNamedEffect);
id++;
}
effects.clear();
List effectsInThisInteraction = new ArrayList();
List effectAndInteractionList = ObjectUtility.decomposeUsingToken(modelEffects, "+");
for (String effectOrInteraction : effectAndInteractionList) {
StringTokenizer tkzInclusiveInteraction = new StringTokenizer(effectOrInteraction, "*");
StringTokenizer tkzExclusiveInteraction = new StringTokenizer(effectOrInteraction, ":");
effectsInThisInteraction.clear();
int numberOfInclusiveInteraction = tkzInclusiveInteraction.countTokens();
int numberOfExclusiveInteraction = tkzExclusiveInteraction.countTokens();
if (numberOfInclusiveInteraction > 1 && numberOfExclusiveInteraction > 1) {
throw new StatisticalDataException("Error : symbols * and : are being used at the same time in the model specification!");
}
boolean isAnInclusiveInteraction = numberOfInclusiveInteraction > 1;
if (isAnInclusiveInteraction) {
while (tkzInclusiveInteraction.hasMoreTokens()) {
effectsInThisInteraction.add(tkzInclusiveInteraction.nextToken());
}
Collections.sort(effectsInThisInteraction);
effectsInThisInteraction.addAll(getAllCombinations(effectsInThisInteraction));
} else {
List tempColl = new ArrayList();
while (tkzExclusiveInteraction.hasMoreTokens()) {
tempColl.add(tkzExclusiveInteraction.nextToken());
}
Collections.sort(tempColl);
String interaction = "";
for (int i = 0; i < tempColl.size(); i++)
interaction = i == tempColl.size() - 1 ?
interaction + tempColl.get(i) :
interaction + tempColl.get(i) + ":";
effectsInThisInteraction.add(interaction);
}
for (String effectName : effectsInThisInteraction) {
if (!effects.containsKey(effectName)) {
MathFormula formula = null;
if (longNamedEffectsMap.containsKey(effectName)) {
String originalEffectName = longNamedEffectsMap.get(effectName);
formula = extractFormulaIfAny(originalEffectName);
}
effects.put(effectName, formula);
}
}
}
}
protected static final List getAllCombinations(List simpleEffects) {
List outputList = new ArrayList();
List uniqueValues = new ArrayList();
for (String str : simpleEffects) {
if (!uniqueValues.contains(str)) {
uniqueValues.add(str);
}
}
if (uniqueValues.size() == 2) {
outputList.add(uniqueValues.get(0) + ":" + uniqueValues.get(1));
} else if (uniqueValues.size() == 3) {
for (int i = 0; i < uniqueValues.size() - 1; i++) {
for (int j = i+1; j < uniqueValues.size(); j++) {
outputList.add(uniqueValues.get(i) + ":" + uniqueValues.get(j));
}
}
outputList.add(uniqueValues.get(0) + ":" + uniqueValues.get(1) + ":" + uniqueValues.get(2));
} else {
throw new InvalidParameterException("Interactions with more than 3 effects are not allowed!");
}
return outputList;
}
private MathFormula extractFormulaIfAny(String effectName) {
boolean isLongNamedOperator = false;
for (String longNamedOperator : MathOperator.NamedOperators.keySet()) {
if (effectName.startsWith(longNamedOperator + "(")) {
isLongNamedOperator = true;
break;
}
}
if (isLongNamedOperator) {
LinkedHashMap variables = new LinkedHashMap();
for (String fieldName : dataSet.getFieldNames()) {
if (effectName.contains(fieldName)) {
variables.put(fieldName, 0d);
}
}
return new MathFormula(effectName, null, variables);
} else {
return null;
}
}
@Override
public int getNumberOfObservations() {return dataSet.getNumberOfObservations();}
@Override
public DataSet getDataSet() {return dataSet;}
@Override
public int indexOfThisEffect(String effect) {
int index = getEffectList().indexOf(effect);
if (index != -1 && isInterceptModel()) {
return index + 1;
} else {
return index;
}
}
@Override
public List getEffectList() {
List effectList = new ArrayList();
for (String key : effects.keySet()) {
MathFormula f = effects.get(key);
effectList.add(f != null ? f.toString() : key);
}
return effectList;
}
}