Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
weka.core.converters.CSVSaver Maven / Gradle / Ivy
Go to download
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* CSVSaver.java
* Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core.converters;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.AbstractInstance;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
/**
* Writes to a destination that is in CSV
* (comma-separated values) format. The column separator can be chosen (default
* is ',') as well as the value representing missing values (default is '?').
*
*
*
* Valid options are:
*
*
*
* -F <separator>
* The field separator to be used.
* '\t' can be used as well.
* (default: ',')
*
*
*
* -M <str>
* The string representing a missing value.
* (default: ?)
*
*
*
* -N
* Don't write a header row.
*
*
*
* -decimal <num>
* The maximum number of digits to print after the decimal
* place for numeric values (default: 6)
*
*
*
* -i <the input file>
* The input file
*
*
*
* -o <the output file>
* The output file
*
*
*
*
* @author Stefan Mutter ([email protected] )
* @version $Revision: 10203 $
* @see Saver
*/
public class CSVSaver extends AbstractFileSaver implements BatchConverter,
IncrementalConverter, FileSourcedConverter {
/** for serialization. */
static final long serialVersionUID = 476636654410701807L;
/** the field separator. */
protected String m_FieldSeparator = ",";
/** The placeholder for missing values. */
protected String m_MissingValue = "?";
/** Max number of decimal places for numeric values */
protected int m_MaxDecimalPlaces = AbstractInstance.s_numericAfterDecimalPoint;
/** Set to true to not write the header row */
protected boolean m_noHeaderRow = false;
/**
* Constructor.
*/
public CSVSaver() {
resetOptions();
}
/**
* Returns a string describing this Saver.
*
* @return a description of the Saver suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Writes to a destination that is in CSV (comma-separated values) format. "
+ "The column separator can be chosen (default is ',') as well as the value "
+ "representing missing values (default is '?').";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration listOptions() {
Vector result = new Vector ();
result.addElement(new Option("\tThe field separator to be used.\n"
+ "\t'\\t' can be used as well.\n" + "\t(default: ',')", "F", 1,
"-F "));
result.addElement(new Option("\tThe string representing a missing value.\n"
+ "\t(default: ?)", "M", 1, "-M "));
result.addElement(new Option("\tDon't write a header row.", "N", 0, "-N"));
result.addElement(new Option(
"\tThe maximum number of digits to print after the decimal\n"
+ "\tplace for numeric values (default: 6)", "decimal", 1,
"-decimal "));
result.addAll(Collections.list(super.listOptions()));
return result.elements();
}
/**
* Valid options are:
*
*
*
* -F <separator>
* The field separator to be used.
* '\t' can be used as well.
* (default: ',')
*
*
*
* -M <str>
* The string representing a missing value.
* (default: ?)
*
*
*
* -N
* Don't write a header row.
*
*
*
* -decimal <num>
* The maximum number of digits to print after the decimal
* place for numeric values (default: 6)
*
*
*
* -i <the input file>
* The input file
*
*
*
* -o <the output file>
* The output file
*
*
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
@Override
public void setOptions(String[] options) throws Exception {
String tmpStr;
tmpStr = Utils.getOption('F', options);
if (tmpStr.length() != 0) {
setFieldSeparator(tmpStr);
} else {
setFieldSeparator(",");
}
tmpStr = Utils.getOption('M', options);
if (tmpStr.length() != 0) {
setMissingValue(tmpStr);
} else {
setMissingValue("?");
}
setNoHeaderRow(Utils.getFlag('N', options));
tmpStr = Utils.getOption("decimal", options);
if (tmpStr.length() > 0) {
setMaxDecimalPlaces(Integer.parseInt(tmpStr));
}
super.setOptions(options);
Utils.checkForRemainingOptions(options);
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
@Override
public String[] getOptions() {
Vector result = new Vector();
result.add("-F");
result.add(getFieldSeparator());
result.add("-M");
result.add(getMissingValue());
if (getNoHeaderRow()) {
result.add("-N");
}
result.add("-decimal");
result.add("" + getMaxDecimalPlaces());
Collections.addAll(result, super.getOptions());
return result.toArray(new String[result.size()]);
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String noHeaderRowTipText() {
return "If true then the header row is not written";
}
/**
* Set whether to not write the header row
*
* @param b true if no header row is to be written
*/
public void setNoHeaderRow(boolean b) {
m_noHeaderRow = b;
}
/**
* Get whether to not write the header row
*
* @return true if no header row is to be written
*/
public boolean getNoHeaderRow() {
return m_noHeaderRow;
}
/**
* Set the maximum number of decimal places to print
*
* @param maxDecimal the maximum number of decimal places to print
*/
public void setMaxDecimalPlaces(int maxDecimal) {
m_MaxDecimalPlaces = maxDecimal;
}
/**
* Get the maximum number of decimal places to print
*
* @return the maximum number of decimal places to print
*/
public int getMaxDecimalPlaces() {
return m_MaxDecimalPlaces;
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String maxDecimalPlacesTipText() {
return "The maximum number of digits to print after the decimal "
+ "point for numeric values";
}
/**
* Sets the character used as column separator.
*
* @param value the character to use
*/
public void setFieldSeparator(String value) {
m_FieldSeparator = Utils.unbackQuoteChars(value);
/*
* if (m_FieldSeparator.length() != 1) { m_FieldSeparator = ","; System.err
* .println(
* "Field separator can only be a single character (exception being '\t'), "
* + "defaulting back to '" + m_FieldSeparator + "'!"); }
*/
}
/**
* Returns the character used as column separator.
*
* @return the character to use
*/
public String getFieldSeparator() {
return Utils.backQuoteChars(m_FieldSeparator);
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String fieldSeparatorTipText() {
return "The character to use as separator for the columns/fields (use '\\t' for TAB).";
}
/**
* Sets the placeholder for missing values.
*
* @param value the placeholder
*/
public void setMissingValue(String value) {
m_MissingValue = value;
}
/**
* Returns the current placeholder for missing values.
*
* @return the placeholder
*/
public String getMissingValue() {
return m_MissingValue;
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String missingValueTipText() {
return "The placeholder for missing values, default is '?'.";
}
/**
* Returns a description of the file type.
*
* @return a short file description
*/
@Override
public String getFileDescription() {
return "CSV file: comma separated files";
}
/**
* Resets the Saver.
*/
@Override
public void resetOptions() {
super.resetOptions();
setFileExtension(".csv");
}
/**
* Returns the Capabilities of this saver.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.STRING_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.NUMERIC_CLASS);
result.enable(Capability.DATE_CLASS);
result.enable(Capability.STRING_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Saves an instances incrementally. Structure has to be set by using the
* setStructure() method or setInstances() method.
*
* @param inst the instance to save
* @throws IOException throws IOEXception if an instance cannot be saved
* incrementally.
*/
@Override
public void writeIncremental(Instance inst) throws IOException {
int writeMode = getWriteMode();
Instances structure = getInstances();
PrintWriter outW = null;
if (getRetrieval() == BATCH || getRetrieval() == NONE) {
throw new IOException("Batch and incremental saving cannot be mixed.");
}
if (getWriter() != null) {
outW = new PrintWriter(getWriter());
}
if (writeMode == WAIT) {
if (structure == null) {
setWriteMode(CANCEL);
if (inst != null) {
System.err
.println("Structure(Header Information) has to be set in advance");
}
} else {
setWriteMode(STRUCTURE_READY);
}
writeMode = getWriteMode();
}
if (writeMode == CANCEL) {
if (outW != null) {
outW.close();
}
cancel();
}
if (writeMode == STRUCTURE_READY) {
setWriteMode(WRITE);
// write header
if (!getNoHeaderRow()) {
if (retrieveFile() == null && outW == null) {
// print out attribute names as first row
for (int i = 0; i < structure.numAttributes(); i++) {
System.out.print(structure.attribute(i).name());
if (i < structure.numAttributes() - 1) {
System.out.print(m_FieldSeparator);
} else {
System.out.println();
}
}
} else {
for (int i = 0; i < structure.numAttributes(); i++) {
outW.print(structure.attribute(i).name());
if (i < structure.numAttributes() - 1) {
outW.print(m_FieldSeparator);
} else {
outW.println();
}
}
outW.flush();
}
}
writeMode = getWriteMode();
}
if (writeMode == WRITE) {
if (structure == null) {
throw new IOException("No instances information available.");
}
if (inst != null) {
// write instance
if (retrieveFile() == null && outW == null) {
System.out.println(inst);
} else {
outW.println(instanceToString(inst));
// flushes every 100 instances
m_incrementalCounter++;
if (m_incrementalCounter > 100) {
m_incrementalCounter = 0;
outW.flush();
}
}
} else {
// close
if (outW != null) {
outW.flush();
outW.close();
}
m_incrementalCounter = 0;
resetStructure();
outW = null;
resetWriter();
}
}
}
/**
* Writes a Batch of instances.
*
* @throws IOException throws IOException if saving in batch mode is not
* possible
*/
@Override
public void writeBatch() throws IOException {
if (getInstances() == null) {
throw new IOException("No instances to save");
}
if (getRetrieval() == INCREMENTAL) {
throw new IOException("Batch and incremental saving cannot be mixed.");
}
setRetrieval(BATCH);
setWriteMode(WRITE);
if (retrieveFile() == null && getWriter() == null) {
if (!getNoHeaderRow()) {
// print out attribute names as first row
for (int i = 0; i < getInstances().numAttributes(); i++) {
System.out.print(getInstances().attribute(i).name());
if (i < getInstances().numAttributes() - 1) {
System.out.print(m_FieldSeparator);
} else {
System.out.println();
}
}
}
for (int i = 0; i < getInstances().numInstances(); i++) {
System.out.println(instanceToString(getInstances().instance(i)));
}
setWriteMode(WAIT);
return;
}
PrintWriter outW = new PrintWriter(getWriter());
if (!getNoHeaderRow()) {
// print out attribute names as first row
for (int i = 0; i < getInstances().numAttributes(); i++) {
outW.print(Utils.quote(getInstances().attribute(i).name()));
if (i < getInstances().numAttributes() - 1) {
outW.print(m_FieldSeparator);
} else {
outW.println();
}
}
}
for (int i = 0; i < getInstances().numInstances(); i++) {
outW.println(instanceToString((getInstances().instance(i))));
}
outW.flush();
outW.close();
setWriteMode(WAIT);
outW = null;
resetWriter();
setWriteMode(CANCEL);
}
/**
* turns an instance into a string. takes care of sparse instances as well.
*
* @param inst the instance to turn into a string
* @return the generated string
*/
protected String instanceToString(Instance inst) {
StringBuffer result;
Instance outInst;
int i;
String field;
result = new StringBuffer();
if (inst instanceof SparseInstance) {
outInst = new DenseInstance(inst.weight(), inst.toDoubleArray());
outInst.setDataset(inst.dataset());
} else {
outInst = inst;
}
for (i = 0; i < outInst.numAttributes(); i++) {
if (i > 0) {
result.append(m_FieldSeparator);
}
if (outInst.isMissing(i)) {
field = m_MissingValue;
} else {
field = outInst.toString(i, m_MaxDecimalPlaces);
}
// make sure that custom field separators, like ";" get quoted correctly
// as well (but only for single character field separators)
if (m_FieldSeparator.length() == 1
&& (field.indexOf(m_FieldSeparator) > -1) && !field.startsWith("'")
&& !field.endsWith("'")) {
field = "'" + field + "'";
}
result.append(field);
}
return result.toString();
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 10203 $");
}
/**
* Main method.
*
* @param args should contain the options of a Saver.
*/
public static void main(String[] args) {
runFileSaver(new CSVSaver(), args);
}
}