All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bigml.binding.Fields Maven / Gradle / Ivy

Go to download

An open source Java client that gives you a simple binding to interact with BigML. You can use it to easily create, retrieve, list, update, and delete BigML resources.

The newest version!
package org.bigml.binding;

import org.bigml.binding.resources.AbstractResource;
import org.bigml.binding.resources.Dataset;
import org.bigml.binding.resources.Model;
import org.bigml.binding.resources.Source;
import org.bigml.binding.utils.Utils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;

/**
 * A class to deal with the fields of a resource.
 *
 * This module helps to map between ids, names, and column_numbers in the fields
 * of source, dataset, or model. Also to validate your input data for
 * predictions or to list all the fields from a resource.
 *
 * from bigml.api import BigML from bigml.fields import Fields
 *
 * api = BigML()
 *
 * source = api.get_source("source/50a6bb94eabcb404d3000174") fields =
 * Fields(source['object']['fields'])
 *
 * dataset = api.get_dataset("dataset/50a6bb96eabcb404cd000342") fields =
 * Fields(dataset['object']['fields'])
 *
 * # Note that the fields in a model come one level deeper model =
 * api.get_model("model/50a6bbac035d0706db0008f8") fields =
 * Fields(model['object']['model']['fields'])
 *
 * prediction = api.get_prediction("prediction/50a69688035d0706dd00044d") fields
 * = Fields(prediction['object']['fields'])
 */
public class Fields {

	// Logging
	Logger LOGGER = LoggerFactory.getLogger(Fields.class);

	protected static Class[] RESOURCES_WITH_FIELDS = new Class[] { Source.class, Dataset.class, Model.class };

	public static String[] DEFAULT_MISSING_TOKENS = { "", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", "#REF!",
			"#NAME?", "NIL", "nil", "NA", "na", "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?" };

	private Locale locale;
	private JSONObject fields = null;
	private JSONObject fieldsByName = null;
	private Map fieldsByColumnNumber = null;
	private List missingTokens = null;
	private List fieldsColumns = null;
	private List filteredFields = null;

	private List rowIds;
	private List headers;
	private Object objectiveField;
	private Boolean objectiveFieldPresent;
	private List filteredIndexes;

	/**
	 * Returns the field structure for a resource, its locale and missing_tokens
	 *
	 * @param resource the BigML resource object
	 * 
	 * @return the field structure for a resource
	 */
	public static FieldsStructure getFieldsStructure(JSONObject resource) {
		AbstractResource resourceInstance = null;
		for (Class resourceClass : RESOURCES_WITH_FIELDS) {
			try {
				AbstractResource checkingResource = (AbstractResource) resourceClass.newInstance();
				if (checkingResource.isInstance(resource)) {
					resourceInstance = checkingResource;
				}
			} catch (Exception e) {
				// Never happen
			}
		}

		FieldsStructure fieldsStructure = new FieldsStructure();
		if (resourceInstance != null) {
			if (resourceInstance instanceof Source) {

				fieldsStructure.setLocale(
						Utils.findLocale((String) Utils.getJSONObject(resource, "object.source_parser.locale"), true));
				fieldsStructure.setMissingTokens(
						(JSONArray) Utils.getJSONObject(resource, "object.source_parser.missing_tokens"));
			} else {
				fieldsStructure
						.setLocale(Utils.findLocale((String) Utils.getJSONObject(resource, "object.locale"), true));
				fieldsStructure.setMissingTokens((JSONArray) Utils.getJSONObject(resource, "object.missing_tokens"));
			}

			if (resourceInstance instanceof Model) {
				fieldsStructure.setFields((JSONObject) Utils.getJSONObject(resource, "object.model.fields"));
			} else {
				fieldsStructure.setFields((JSONObject) Utils.getJSONObject(resource, "object.fields"));
			}
		}

		return fieldsStructure;
	}

	/**
	 * The constructor can be instantiated with resources or a fields structure. The
	 * structure is checked and fields structure is returned if a resource type is
	 * matched.
	 *
	 * @param resourceOrField the resource that hold the fields or the fields itself
	 */
	public Fields(JSONObject resourceOrField) {
		this(resourceOrField, null, null, null, false, null);
	}

	/**
	 * The constructor can be instantiated with resources or a fields structure. The
	 * structure is checked and fields structure is returned if a resource type is
	 * matched.
	 *
	 * @param resourceOrField       the resource that hold the fields or the fields
	 *                              itself
	 * @param missingTokens         the list of missing tokens to use.
	 *                              DEFAULT_MISSING_TOKENS will be used by default
	 * @param dataLocale            the locale of the data
	 * @param objectiveField        the name of the objective field
	 * @param objectiveFieldPresent if the objective field is present in the fields
	 * @param includeFields         the fields to be included if we only want only a
	 *                              subset
	 */
	public Fields(JSONObject resourceOrField, List missingTokens, String dataLocale, Object objectiveField,
			Boolean objectiveFieldPresent, List includeFields) {

		// We first check if the argument is a resource instance
		if (resourceOrField.containsKey("resource")) {
			FieldsStructure fieldsStructure = getFieldsStructure(resourceOrField);
			this.fields = fieldsStructure.getFields();
			Locale resourceLocale = fieldsStructure.getLocale();
			JSONArray resourceMissingTokens = fieldsStructure.getMissingTokens();

			if (dataLocale == null) {
				dataLocale = resourceLocale.toString();
			}

			if (missingTokens == null) {
				missingTokens = resourceMissingTokens;
			}

		} else {
			// If the resource structure is not in the expected set, fields
			// structure is assumed
			this.fields = resourceOrField;
			if (dataLocale == null) {
				dataLocale = BigMLClient.DEFAUL_LOCALE.toString();
			}

			if (missingTokens == null) {
				missingTokens = new ArrayList(Arrays.asList(DEFAULT_MISSING_TOKENS));
			}
		}

		if (this.fields == null) {
			throw new IllegalStateException("No fields structure was found.");
		}

		if (LOGGER.isDebugEnabled()) {
			LOGGER.debug(String.format("resource_or_fields: %s", JSONObject.toJSONString(resourceOrField)));
			LOGGER.debug(String.format("missing_tokens: %s", Arrays.toString(missingTokens.toArray())));
			LOGGER.debug(String.format("data_locale: %s", dataLocale.toString()));
			LOGGER.debug(String.format("objective_field: %s", objectiveField));
			LOGGER.debug(String.format("objective_field_present: %s", objectiveFieldPresent));
		}

		this.fieldsByName = Utils.invertDictionary(this.fields, "name");
		JSONObject fieldsByColumnNumberTmp = Utils.invertDictionary(this.fields, "column_number");

		this.fieldsByColumnNumber = new HashMap();
		for (Object columnNumber : fieldsByColumnNumberTmp.keySet()) {
			fieldsByColumnNumber.put((Long) columnNumber,
					((JSONObject) fieldsByColumnNumberTmp.get(columnNumber)).get("fieldID").toString());
		}

		this.locale = Utils.findLocale(dataLocale, true);

		this.missingTokens = missingTokens;

		this.fieldsColumns = new ArrayList(fieldsByColumnNumber.keySet());
		Collections.sort(this.fieldsColumns);

		// Ids of the fields to be included
		this.filteredFields = new ArrayList();
		if (includeFields != null) {
			for (Object fieldName : this.fields.keySet()) {
				if (includeFields.contains(fieldName.toString())) {
					this.filteredFields.add(fieldName.toString());
				}
			}
		}

		if (LOGGER.isDebugEnabled()) {
			LOGGER.debug(String.format("fields: %s", Arrays.toString(fieldsColumns.toArray())));
			LOGGER.debug(String.format("fields_by_column_number: %s", JSONObject.toJSONString(fieldsByColumnNumber)));
			LOGGER.debug(String.format("missing_tokens: %s", Arrays.toString(missingTokens.toArray())));
			LOGGER.debug(String.format("data_locale: %s", dataLocale.toString()));
		}

		// To be updated in update_objective_field
		this.rowIds = null;
		this.headers = null;
		this.objectiveField = null;
		this.objectiveFieldPresent = null;
		this.filteredIndexes = null;

		updateObjectiveField(objectiveField, objectiveFieldPresent, headers);

	}

	/**
	 * Updates objective_field and headers info
	 *
	 * Permits to update the objective_field, objective_field_present and headers
	 * info from the constructor and also in a per row basis.
	 *
	 * @param objectiveField        the index of the objective field
	 * @param objectiveFieldPresent if the objective field is present in the fields
	 *                              list
	 * @param headers	list of strings with fields names
	 */
	protected void updateObjectiveField(Object objectiveField, Boolean objectiveFieldPresent, List headers) {

		// If no objective field, select the last column, else store its column
		if (objectiveField == null) {
			this.objectiveField = fieldsColumns.get(fieldsColumns.size() - 1);
		} else if (objectiveField instanceof String) {
			this.objectiveField = getFieldColumnNumber(objectiveField.toString());
		} else {
			this.objectiveField = objectiveField;
		}

		String objectiveFieldID = fieldsByColumnNumber.get(this.objectiveField);
		filteredFields.remove(objectiveFieldID);

		rowIds = new ArrayList();

		this.objectiveFieldPresent = objectiveFieldPresent;
		if (headers == null) {
			// The row is supposed to contain the fields sorted by column number
			for (Long fieldColumnIndex : fieldsColumns) {
				if (!fieldColumnIndex.equals(this.objectiveField)) {
					rowIds.add(fieldsByColumnNumber.get(fieldColumnIndex));
				}
			}

			this.headers = this.rowIds;
		} else {
			this.rowIds = new ArrayList(headers.size());
			for (String header : headers) {
				this.rowIds.add(getFieldId(header));
			}
			this.headers = new ArrayList(headers);
		}

		filteredIndexes = new ArrayList();
		for (String filteredField : filteredFields) {
			long index = rowIds.indexOf(filteredField);
			filteredIndexes.add(index);
		}
	}

	/**
	 * Returns the list of columns ids
	 * 
	 * @return list of fields columns ids
	 */
	public List getColumnsIds() {
		return new ArrayList(fields.keySet());
	}

	/**
	 * Returns the list of columns Names
	 * 
	 * @return list of columns names
	 */
	public List getColumnsNames() {
		return new ArrayList(fieldsByName.keySet());
	}

	/**
	 * Returns the field object using its fieldID
	 * 
	 * @param fieldID	the id of the field
	 * 
	 * @return the field object
	 */
	public JSONObject getFieldById(String fieldID) {
		return (JSONObject) fields.get(fieldID);
	}

	/**
	 * Returns the field object using its Name
	 * 
	 * @param fieldName	the name of the field
	 * 
	 * @return the field object
	 */
	public JSONObject getFieldByName(String fieldName) {
		return (JSONObject) fieldsByName.get(fieldName);
	}

	/**
	 * Returns a field id
	 *
	 * @param fieldName the field name
	 * 
	 * @return the field id
	 */
	public String getFieldId(String fieldName) {
		JSONObject field = (JSONObject) fieldsByName.get(fieldName);
		if (field == null) {
			field = (JSONObject) fields.get(fieldName);
		}
		return (field != null ? field.get("fieldID").toString() : null);
	}

	/**
	 * Returns a field name
	 *
	 * @param fieldID the field ID
	 * 
	 * @return the name of a field
	 */
	public String getFieldName(String fieldID) {
		JSONObject field = (JSONObject) fields.get(fieldID);
		return (field != null ? field.get("name").toString() : null);
	}

	/**
	 * Returns a field column number
	 *
	 * @param fieldID the field key
	 * 
	 * @return the column number of a field
	 */
	public Long getFieldColumnNumber(String fieldID) {
		JSONObject field = (JSONObject) fields.get(fieldID);
		if (field == null) {
			field = (JSONObject) fieldsByName.get(fieldID);
		}

		return (field != null ? (Long) field.get("column_number") : null);
	}

	/**
	 * Returns the number of fields
	 * 
	 * @return the number of fields
	 */
	public int getLength() {
		return fields.size();
	}

	/**
	 * Lists a description of the fields
	 *
	 * @param out the string builder used to append the fields description to
	 *            already existent text
	 *            
	 * @return a StringBuilder object with description of fields
	 */
	public StringBuilder listFields(StringBuilder out) {
		out = (out != null ? out : new StringBuilder());

		for (Long fieldIndex : fieldsColumns) {
			String fieldID = fieldsByColumnNumber.get(fieldIndex);
			JSONObject field = (JSONObject) fields.get(fieldID);

			out.append(String.format("[%-32s: %-16s: %-8s]\\n", field.get("name"), field.get("optype"), fieldIndex));
		}

		return out;
	}

	/**
	 * Returns fields where attribute preferred is set to True or where it isn't set
	 * at all.
	 * 
	 * @return a map with fieldId as key and the field json as value
	 */
	public Map getPreferredFields() {
		Map preferredFields = new HashMap();

		for (Object fieldObj : fields.values()) {
			JSONObject field = (JSONObject) fieldObj;
			if (!field.containsKey("preferred") || ((Boolean) field.get("preferred"))) {
				preferredFields.put(field.get("fieldID").toString(), field);
			}
		}

		return preferredFields;
	}

	/**
	 * Pairs a list of values with their respective field ids.
	 *
	 *
	 * @param row	the input row with values
	 * @param headers	list of strings with fields names
	 * @param objectiveField        is the column_number of the objective field.
	 * @param objectiveFieldPresent must be True is the objective_field column is
	 *                              present in the row.
	 * 
	 * @return map of pairs id,list
	 */
	public Map pair(JSONArray row, List headers, Object objectiveField,
			Boolean objectiveFieldPresent) {

		if (objectiveFieldPresent == null) {
			objectiveFieldPresent = false;
		}

		// Try to get objective field form Fields or use the last column
		if (objectiveField == null) {
			if (this.objectiveField == null) {
				objectiveField = fieldsColumns.get(fieldsColumns.size() - 1);
			} else {
				objectiveField = this.objectiveField;
			}
		}

		// If objective fields is a name or an id, retrieve column number
		if (objectiveField instanceof String) {
			objectiveField = getFieldColumnNumber(objectiveField.toString());
		}

		// Try to guess if objective field is in the data by using headers or
		// comparing the row length to the number of fields
		if (objectiveFieldPresent == null) {
			if (headers != null) {
				String fieldName = getFieldName(fieldsByColumnNumber.get(objectiveField));
				objectiveFieldPresent = headers.contains(fieldName);
			} else {
				objectiveFieldPresent = row.size() == getLength();
			}
		}

		// If objective field, its presence or headers have changed, update
		if (!objectiveField.equals(this.objectiveField) || objectiveFieldPresent != this.objectiveFieldPresent
				|| (headers != null && !headers.equals(this.headers))) {
			updateObjectiveField(objectiveField, objectiveFieldPresent, headers);
		}

		JSONArray normalizedRow = new JSONArray();
		for (Object rowValue : row) {
			normalizedRow.add(normalize(rowValue));
		}

		return toInputData(normalizedRow);
	}

	/**
	 * Builds dictionary with field, value info only for the included headers
	 *
	 * @param row the input row with values
	 * 
	 * @return a map with field, value
	 */
	public Map toInputData(JSONArray row) {
		Map pair = new HashMap();
		for (Long filteredIndex : filteredIndexes) {
			pair.put(this.headers.get(filteredIndex.intValue()), row.get(filteredIndex.intValue()));
		}

		return pair;
	}

	/**
	 * Validates whether types for input data match types in the fields definition.
	 *
	 * @param inputData an object with field's id/value pairs representing the
	 *                  instance you want to validate
	 * @param out       the string builder used to append the validation info to
	 *                  already existent text
	 * 
	 * @return the string builder updated with validation info
	 */
	public StringBuilder validateInputData(JSONObject inputData, StringBuilder out) {
		for (Object name : inputData.keySet()) {
			if (fieldsByName.containsKey(name)) {
				out.append(String.format("[%-32s: %-16s: %-16s: ", name, inputData.get(name).getClass().getName(),
						((JSONObject) fieldsByName.get(name)).get("optype")));
				String optType = (String) ((JSONObject) fieldsByName.get(name)).get("optype");
				if (inputData.get(name).getClass().isAssignableFrom(Utils.getJavaType(optType))) {
					out.append("OK\n");
				} else {
					out.append("WRONG\n");
				}
			} else {
				out.append(String.format("Field '%s' does not exist\n", name));
			}
		}

		return out;
	}

	/**
	 * Cleans missing tokens
	 *
	 * @param value the value to normalize
	 * 
	 * @return the value without missing tokens
	 */
	public Object normalize(Object value) {
		if (value instanceof String) {
			return (missingTokens.contains(value) ? null : value);
		}

		return null;
	}

	/**
	 * Returns the ids for the fields that contain missing values
	 * 
	 * @return a map with missing count per field id
	 */
	public Map getMissingCounts() {
		Map missingCounts = new HashMap();

		for (Object fieldID : fields.keySet()) {
			JSONObject field = (JSONObject) fields.get(fieldID);
			JSONObject summary = (JSONObject) field.get("summary");
			if (summary != null) {
				Long missingCount = (Long) summary.get("missing_count");
				if (missingCount != null && missingCount > 0) {
					missingCounts.put(fieldID.toString(), missingCount);
				}
			}
		}

		if (missingCounts.size() == 0) {
			throw new IllegalStateException(
					"The structure has not enough information " + "to extract the fields containing missing values."
							+ "Only datasets and models have such information. "
							+ "You could retry the get remote call " + " with 'limit=-1' as query string.");
		}

		return missingCounts;
	}

	/**
	 * Returns the summary information for the field
	 * 
	 * @param fieldName the name of the field
	 * 
	 * @return a JSONObject with the summary information for the field
	 */
	public JSONObject getStats(String fieldName) {
		JSONObject field = (JSONObject) fieldsByName.get(fieldName);
		return (JSONObject) field.get("summary");
	}

	public static final class FieldsStructure {
		private JSONObject fields;
		private Locale locale;
		private JSONArray missingTokens;

		public JSONObject getFields() {
			return fields;
		}

		public void setFields(JSONObject fields) {
			this.fields = fields;
		}

		public Locale getLocale() {
			return locale;
		}

		public void setLocale(Locale locale) {
			this.locale = locale;
		}

		public JSONArray getMissingTokens() {
			return missingTokens;
		}

		public void setMissingTokens(JSONArray missingTokens) {
			this.missingTokens = missingTokens;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy