All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.json.JSONInstances Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * JSONInstances.java
 * Copyright (C) 2009-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.core.json;

import java.util.ArrayList;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.converters.ConverterUtils.DataSource;

/**
 * Class for transforming Instances objects into JSON
 * objects and vice versa. Missing values get stored as "?".
 * 
 * @author  FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 9664 $
 * @see #MISSING_VALUE
 */
public class JSONInstances {

  /** the header section. */
  public final static String HEADER = "header";

  /** the data section. */
  public final static String DATA = "data";

  /** the relation name. */
  public final static String RELATION = "relation";

  /** the attributes object. */
  public final static String ATTRIBUTES = "attributes";

  /** the name attribute. */
  public final static String NAME = "name";

  /** the type attribute. */
  public final static String TYPE = "type";

  /** the class attribute indicator. */
  public final static String CLASS = "class";

  /** the labels attribute. */
  public final static String LABELS = "labels";

  /** the weight attribute. */
  public final static String WEIGHT = "weight";

  /** the dateformat attribute. */
  public final static String DATEFORMAT = "dateformat";

  /** the sparse attribute. */
  public final static String SPARSE = "sparse";

  /** the values attribute. */
  public final static String VALUES = "values";

  /** the separator for index/value in case of sparse instances. */
  public final static String SPARSE_SEPARATOR = ":";

  /** the missing value indicator. */
  public final static String MISSING_VALUE = "?";
  
  /**
   * Turns the JSON object into an Attribute, if possible.
   * 
   * @param att		the JSON object to turn into an Attribute
   * @param classAtt	for storing whether the attribute is the class attribute
   * @return		the Attribute, null in case of an error
   */
  protected static Attribute toAttribute(JSONNode att, boolean[] classAtt) {
    Attribute		result;
    String		name;
    String		type;
    String		dateformat;
    JSONNode		labels;
    ArrayList	values;
    String		label;
    int			i;
    double		weight;
    
    name   = (String) att.getChild(NAME).getValue("noname");
    type   = (String) att.getChild(TYPE).getValue("");
    weight = (Double) att.getChild(WEIGHT).getValue(new Double(1.0));
    if (type.equals(Attribute.typeToString(Attribute.NUMERIC))) {
      result = new Attribute(name);
    }
    else if (type.equals(Attribute.typeToString(Attribute.NOMINAL))) {
      labels = att.getChild(LABELS);
      values = new ArrayList();
      for (i = 0; i < labels.getChildCount(); i++) {
	label = (String)((JSONNode) labels.getChildAt(i)).getValue();
	if (label.equals("'" + MISSING_VALUE + "'"))
	  values.add(MISSING_VALUE);
	else
	  values.add(label);
      }
      result = new Attribute(name, values);
    }
    else if (type.equals(Attribute.typeToString(Attribute.DATE))) {
      dateformat = (String) att.getChild(DATEFORMAT).getValue("yyyy-MM-dd'T'HH:mm:ss");
      result     = new Attribute(name, dateformat);
    }
    else if (type.equals(Attribute.typeToString(Attribute.STRING))) {
      result = new Attribute(name, (ArrayList) null);
    }
    else {
      System.err.println("Unhandled attribute type '" + type + "'!");
      return null;
    }
    result.setWeight(weight);
    
    return result;
  }

  /**
   * Turns the JSON Object into an Instance, if possible.
   * 
   * @param inst	the JSON object to turn into an Instance
   * @param data	the data so far (only used for header information)
   * @return		the Instance, null in case of an error
   */
  protected static Instance toInstance(JSONNode inst, Instances data) {
    Instance	result;
    boolean	sparse;
    double	weight;
    JSONNode	values;
    int		i;
    int		index;
    int		pos;
    String	value;
    double[]	vals;

    sparse = (Boolean) inst.getChild(SPARSE).getValue(new Boolean(false));
    weight = (Double) inst.getChild(WEIGHT).getValue(new Double(1.0));
    values = inst.getChild(VALUES);
    vals   = new double[data.numAttributes()];
    for (i = 0; i < values.getChildCount(); i++) {
      if (sparse) {
	value = "" + ((JSONNode) values.getChildAt(i)).getValue();
	pos   = value.indexOf(SPARSE_SEPARATOR);
	index = Integer.parseInt(value.substring(0, pos));
	value = value.substring(pos + 1);
      }
      else {
	index = i;
	value = "" + ((JSONNode) values.getChildAt(i)).getValue();
      }
      
      try {
	if (value.equals(MISSING_VALUE)) {
	  vals[index] = Utils.missingValue();
	}
	else {
	  // unescape '?' labels 
	  if (value.equals("'" + MISSING_VALUE + "'"))
	    value = MISSING_VALUE;
	  if (data.attribute(index).isNumeric() && !data.attribute(index).isDate()) {
	    vals[index] = Double.parseDouble(value);
	  }
	  else if (data.attribute(index).isNominal()) {
	    vals[index] = data.attribute(index).indexOfValue(value);
	    if ((vals[index] == -1) && value.startsWith("'") && value.endsWith("'"))
	      vals[index] = data.attribute(index).indexOfValue(Utils.unquote(value));
	    // FIXME backslashes seem to get escaped twice when creating a JSON file?
	    if ((vals[index] == -1) && value.startsWith("'") && value.endsWith("'"))
	      vals[index] = data.attribute(index).indexOfValue(Utils.unbackQuoteChars(Utils.unquote(value)));
	    if (vals[index] == -1) {
	      System.err.println("Unknown label '" + value + "' for attribute #" + (index+1) + "!");
	      return null;
	    }
	  }
	  else if (data.attribute(index).isDate()) {
	    vals[index] = data.attribute(index).parseDate(value);
	  }
	  else if (data.attribute(index).isString()) {
	    vals[index] = data.attribute(index).addStringValue(value);
	  }
	  else {
	    System.err.println("Unhandled attribute type '" + Attribute.typeToString(data.attribute(index).type()) + "'!");
	    return null;
	  }
	}
      }
      catch (Exception e) {
	System.err.println("Error parsing value #" + (index+1) + ": " + e.toString());
	return null;
      }
    }

    if (sparse)
      result = new SparseInstance(weight, vals);
    else
      result = new DenseInstance(weight, vals);
    result.setDataset(data);
      
    return result;
  }
  
  /**
   * Turns a JSON object, if possible, into an Instances object.
   * 
   * @param json	the JSON object to convert
   * @param onlyHeader	whether to retrieve only the header
   * @return		the generated Instances object, null if not possible
   */
  protected static Instances toInstances(JSONNode json, boolean onlyHeader) {
    Instances	result;
    JSONNode	header;
    JSONNode	attributes;
    JSONNode	data;
    ArrayList	atts;
    Attribute	att;
    Instance	inst;
    int		i;
    int		classIndex;
    boolean[]	classAtt;
    
    header = json.getChild(HEADER);
    if (header == null) {
      System.err.println("No '" + HEADER + "' section!");
      return null;
    }
    data = json.getChild(DATA);
    if (data == null) {
      System.err.println("No '" + DATA + "' section!");
      return null;
    }
    
    // attributes
    attributes = header.getChild(ATTRIBUTES);
    if (attributes == null) {
      System.err.println("No '" + ATTRIBUTES + "' array!");
      return null;
    }
    atts       = new ArrayList();
    classAtt   = new boolean[1];
    classIndex = -1;
    for (i = 0; i < attributes.getChildCount(); i++) {
      att = toAttribute((JSONNode) attributes.getChildAt(i), classAtt);
      if (att == null) {
	System.err.println("Could not convert attribute #" + (i+1) + "!");
	return null;
      }
      if (classAtt[0])
	classIndex = i;
      atts.add(att);
    }
    result = new Instances(
	header.getChild(RELATION).getValue("unknown").toString(), 
	atts, 
	(onlyHeader ? 0 : data.getChildCount()));
    result.setClassIndex(classIndex);
    
    // data
    if (!onlyHeader) {
      for (i = 0; i < data.getChildCount(); i++) {
	inst = toInstance((JSONNode) data.getChildAt(i), result);
	if (inst == null) {
	  System.err.println("Could not convert instance #" + (i+1) + "!");
	  return null;
	}
	result.add(inst);
      }
    }
    
    return result;
  }
  
  /**
   * Turns a JSON object, if possible, into an Instances object.
   * 
   * @param json	the JSON object to convert
   * @return		the generated Instances object, null if not possible
   */
  public static Instances toInstances(JSONNode json) {
    return toInstances(json, false);
  }
  
  /**
   * Turns a JSON object, if possible, into an Instances object (only header).
   * 
   * @param json	the JSON object to convert
   * @return		the generated Instances header object, null if not possible
   */
  public static Instances toHeader(JSONNode json) {
    return toInstances(json, true);
  }
  
  /**
   * Turns the Attribute into a JSON object.
   * 
   * @param inst	the corresponding dataset
   * @param att		the attribute to convert
   * @return		the JSON object
   */
  protected static JSONNode toJSON(Instances inst, Attribute att) {
    JSONNode	result;
    JSONNode	labels;
    int		i;
    
    result = new JSONNode();

    result.addPrimitive(NAME, att.name());
    result.addPrimitive(TYPE, Attribute.typeToString(att));
    result.addPrimitive(CLASS, (att.index() == inst.classIndex()));
    result.addPrimitive(WEIGHT, att.weight());
    if (att.isNominal()) {
      labels = result.addArray(LABELS);
      for (i = 0; i < att.numValues(); i++) {
	if (att.value(i).equals(MISSING_VALUE))
	  labels.addArrayElement("'" + att.value(i) + "'");
	else
	  labels.addArrayElement(att.value(i));
      }
    }
    if (att.isDate())
      result.addPrimitive(DATEFORMAT, att.getDateFormat());
    
    return result;
  }
  
  /**
   * Turns the Instance into a JSON object.
   * 
   * @param inst	the Instance to convert
   * @return		the JSON object
   */
  protected static JSONNode toJSON(Instance inst) {
    JSONNode	result;
    JSONNode	values;
    int		i;
    boolean	sparse;
    
    result = new JSONNode();
    
    sparse = (inst instanceof SparseInstance);
    result.addPrimitive(SPARSE, sparse);
    result.addPrimitive(WEIGHT, inst.weight());
    values = result.addArray(VALUES);
    if (sparse) {
      for (i = 0; i < inst.numValues(); i++) {
	if (inst.isMissing(inst.index(i)))
	  values.addArrayElement(inst.index(i) + SPARSE_SEPARATOR + MISSING_VALUE);
	else if (inst.toString(inst.index(i)).equals("'" + MISSING_VALUE + "'"))
	  values.addArrayElement(inst.index(i) + SPARSE_SEPARATOR + "'" + MISSING_VALUE + "'");  // escape '?' labels
	else
	  values.addArrayElement(inst.index(i) + SPARSE_SEPARATOR + inst.toString(inst.index(i)));
      }
    }
    else {
      for (i = 0; i < inst.numAttributes(); i++) {
	if (inst.isMissing(i))
	  values.addArrayElement(MISSING_VALUE);
	else if (inst.toString(i).equals("'" + MISSING_VALUE + "'"))
	  values.addArrayElement("'" + MISSING_VALUE + "'");  // escape '?' labels
	else
	  values.addArrayElement(inst.toString(i));
      }
    }
    
    return result;
  }
  
  /**
   * Turns the Instances object into a JSON object.
   * 
   * @param inst	the Instances to turn into a JSON object
   * @return		the JSON object
   */
  public static JSONNode toJSON(Instances inst) {
    JSONNode	result;
    JSONNode	header;
    JSONNode	atts;
    JSONNode	data;
    int		i;
    
    result = new JSONNode();
    
    // header
    header = result.addObject(HEADER);
    header.addPrimitive(RELATION, inst.relationName());
    atts = header.addArray(ATTRIBUTES);
    for (i = 0; i < inst.numAttributes(); i++)
      atts.add(toJSON(inst, inst.attribute(i)));
    
    // data
    data = result.addArray(DATA);
    for (i = 0; i < inst.numInstances(); i++)
      data.add(toJSON(inst.instance(i)));
    
    return result;
  }
  
  /**
   * For testing only.
   * 
   * @param args	expects a dataset as first parameter
   * @throws Exception	if something goes wrong
   */
  public static void main(String[] args) throws Exception {
    if (args.length != 1) {
      System.err.println("No dataset supplied!");
      System.exit(1);
    }

    // load dataset
    Instances data = DataSource.read(args[0]);
    
    // turn Instances into JSON object and output it
    JSONNode json = toJSON(data);
    StringBuffer buffer = new StringBuffer();
    json.toString(buffer);
    System.out.println(buffer.toString());
    
    // turn JSON object back into Instances and output it
    Instances inst = toInstances(json);
    System.out.println(inst);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy