All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bigml.binding.laminar.Preprocess Maven / Gradle / Ivy

Go to download

An open source Java client that gives you a simple binding to interact with BigML. You can use it to easily create, retrieve, list, update, and delete BigML resources.

There is a newer version: 2.1.1
Show newest version
package org.bigml.binding.laminar;

import java.util.ArrayList;
import java.util.List;

import org.bigml.binding.Constants;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;

/**
 * Auxiliary functions for preprocessing
 *
 */
public class Preprocess {
	
	private static final String MEAN = "mean";
	private static final String STANDARD_DEVIATION = "stdev";
	private static final String ZERO = "zero_value";
	private static final String ONE = "one_value";
	
	
	private static ArrayList oneHot(
			ArrayList vector, JSONArray possibleValues) {

		ArrayList outvec = new ArrayList();
		for (int i=0; i standardize(
			ArrayList vector, Double mean, Double stdev) {
		
		ArrayList newvec = new ArrayList();
		for (int i=0; i 0) {
					standarized = standarized / stdev;
				}
				newvec.add(standarized);
			} else {
				newvec.add(0.0);
			}
		}
		
		return newvec;
	}
	
	private static ArrayList binarize(
			ArrayList vector, Double zero, Double one) {
		
		for (int i=0; i transform(
			ArrayList vector, JSONObject spec) {
		
		ArrayList output = new ArrayList();
		
		String type = (String) spec.get("type");
		
		// Check valid spec type
		if ( !(Constants.OPTYPE_NUMERIC.equals(type) || 
				Constants.OPTYPE_CATEGORICAL.equals(type)) ) {
			throw new IllegalArgumentException(
					String.format("%s is not a valid spec type!", type));
		}
		
		if (Constants.OPTYPE_NUMERIC.equals(type)) {
			ArrayList vectorD = new ArrayList();
			for (Object value: vector) {
				vectorD.add((Double) value);
			}
			
			// Check spec format
			if ( !spec.containsKey(STANDARD_DEVIATION) && 
				 !spec.containsKey(ZERO) ) {
					throw new IllegalArgumentException(
							String.format("%s is not a valid numeric spec!", spec));
			}
			
			if (spec.containsKey(STANDARD_DEVIATION)) {
				Double mean = ((Number) spec.get(MEAN)).doubleValue();
				Double stdev = ((Number) spec.get(STANDARD_DEVIATION)).doubleValue();
				output = standardize(vectorD, mean, stdev);
			} else {
				if (spec.containsKey(ZERO)) {
					Double low = ((Number) spec.get(ZERO)).doubleValue();
					Double high = ((Number) spec.get(ONE)).doubleValue();
					output = binarize(vectorD, low, high);
				}
			}
		}
		
		if (Constants.OPTYPE_CATEGORICAL.equals(type)) {
			JSONArray values = (JSONArray) spec.get("values");
			output = oneHot(vector, values);
		}
		
		return output;
	}
	
	
	private static List treePredict(
			JSONArray tree, List point) {
		
		JSONArray last = (JSONArray) tree.get(tree.size()-1);
		while (last != null && last.size() > 0) {
			int firstNode = ((Number) tree.get(0)).intValue();
			Double pointValue = (Double) point.get(firstNode);
			Double secondNode = ((Number) tree.get(1)).doubleValue();
			
			tree = (pointValue <= secondNode ? 
					(JSONArray) tree.get(2) : (JSONArray) tree.get(3));
			
			last = (JSONArray) tree.get(tree.size()-1);	
		}
		
		return (JSONArray) tree.get(0);
	}
	
	
	private static ArrayList> getEmbedding(
			ArrayList> input, JSONArray model) {
		
		List preds = new ArrayList();
		for (Object tree: model) {
			ArrayList> treePreds = new ArrayList>();	
			for (List row: input) {
				treePreds.add(treePredict((JSONArray) tree, row));
			}
			
			List firstPred = treePreds.get(0);
			
			if (preds.size() == 0) {
				preds.addAll(treePreds.get(0));
			} else {
				for (int i=0; i 1) {
			double norm = 0.0;
			for (Double d: preds) {
				norm += d;
			}
			
			for (int i=0; i> output = new ArrayList>();
		output.add(preds);
		return output;
	}
	
	
	public static ArrayList> treeTransform(
			ArrayList> input, JSONArray trees) {
		
		ArrayList> outdata = new ArrayList>();
		for (Object tree: trees) {
			JSONArray featureRange = (JSONArray) ((JSONArray) tree).get(0);
			JSONArray model = (JSONArray) ((JSONArray) tree).get(1);
			
			int sidx = ((Number) featureRange.get(0)).intValue();
			int eidx = ((Number) featureRange.get(1)).intValue();
			
			ArrayList> rowData = new ArrayList>();
			
			for (int i=0; i row = input.get(i);
				rowData.add(row.subList(sidx, eidx));
			}
			
			ArrayList> outarray = getEmbedding(rowData, model) ;
			
			if (outdata.size() > 0) {
				outdata.get(0).addAll(outarray.get(0));
			} else {
				outdata = outarray;
			}
		}
		outdata.get(0).addAll(input.get(0));
		return outdata;
	}
	
	
	public static ArrayList> preprocess(
			List columns, JSONArray specs) {
		
		ArrayList> outdata = new ArrayList>();
		ArrayList output = new ArrayList();
		
		for (Object specObj: specs) {
			JSONObject spec = (JSONObject) specObj;
			
			int index = ((Number) spec.get("index")).intValue();
			
			Object value = null;
			if (columns.get(index) !=  null) {
				if (columns.get(index) instanceof Number) {
					value = ((Number) columns.get(index)).doubleValue();
				} else {
					value = columns.get(index);
				}
			}
			
			ArrayList column = new ArrayList();		
			column.add(value);
			
			output.addAll(transform(column, spec));
		}
		outdata.add(output);
		return outdata;
	}
	
}