All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.converters.DatabaseSaver Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    DatabaseSaver.java
 *    Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core.converters;

import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Environment;
import weka.core.EnvironmentHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 *  Writes to a database (tested with MySQL, InstantDB,
 * HSQLDB).
 * 

* * * Valid options are: *

* *

 * -url <JDBC URL>
 *  The JDBC URL to connect to.
 *  (default: from DatabaseUtils.props file)
 * 
* *
 * -user <name>
 *  The user to connect with to the database.
 *  (default: none)
 * 
* *
 * -password <password>
 *  The password to connect with to the database.
 *  (default: none)
 * 
* *
 * -T <table name>
 *  The name of the table.
 *  (default: the relation name)
 * 
* *
 * -truncate
 *  Truncate (i.e. delete any data) in table before inserting
 * 
* *
 * -P
 *  Add an ID column as primary key. The name is specified
 *  in the DatabaseUtils file ('idColumn'). The DatabaseLoader
 *  won't load this column.
 * 
* *
 * -custom-props <file>
 *  The custom properties file to use instead of default ones,
 *  containing the database parameters.
 *  (default: none)
 * 
* *
 * -i <input file name>
 *  Input file in arff format that should be saved in database.
 * 
* * * * @author Stefan Mutter ([email protected]) * @version $Revision: 10203 $ */ public class DatabaseSaver extends AbstractSaver implements BatchConverter, IncrementalConverter, DatabaseConverter, OptionHandler, EnvironmentHandler { /** for serialization. */ static final long serialVersionUID = 863971733782624956L; /** The database connection. */ protected DatabaseConnection m_DataBaseConnection; /** The name of the table in which the instances should be stored. */ protected String m_tableName; /** Table name with any environment variables resolved */ protected String m_resolvedTableName; /** An input arff file (for command line use). */ protected String m_inputFile; /** * The database specific type for a string (read in from the properties file). */ protected String m_createText; /** * The database specific type for a double (read in from the properties file). */ protected String m_createDouble; /** The database specific type for an int (read in from the properties file). */ protected String m_createInt; /** The database specific type for a date (read in from the properties file). */ protected String m_createDate; /** For converting the date value into a database string. */ protected SimpleDateFormat m_DateFormat; /** * The name of the primary key column that will be automatically generated (if * enabled). The name is read from DatabaseUtils. */ protected String m_idColumn; /** counts the rows and used as a primary key value. */ protected int m_count; /** Flag indicating if a primary key column should be added. */ protected boolean m_id; /** * Flag indicating whether the default name of the table is the relaion name * or not. */ protected boolean m_tabName; /** the database URL. */ protected String m_URL; /** the user name for the database. */ protected String m_Username; /** the password for the database. */ protected String m_Password; /** the custom props file to use instead of default one. */ protected File m_CustomPropsFile = null; /** * Whether to truncate (i.e. drop and then recreate) the table if it already * exists */ protected boolean m_truncate; /** Environment variables to use */ protected transient Environment m_env; /** * Constructor. * * @throws Exception throws Exception if property file cannot be read */ public DatabaseSaver() throws Exception { resetOptions(); } private void checkEnv() { if (m_env == null) { m_env = Environment.getSystemWide(); } } /** * Set the environment variables to use. * * @param env the environment variables to use */ @Override public void setEnvironment(Environment env) { m_env = env; try { // force a new connection and setting of all parameters // with environment variables resolved m_DataBaseConnection = newDatabaseConnection(); setUrl(m_URL); setUser(m_Username); setPassword(m_Password); } catch (Exception ex) { ex.printStackTrace(); } } /** * Initializes a new DatabaseConnection object, either default one or from * custom props file. * * @return the DatabaseConnection object * @see #m_CustomPropsFile */ protected DatabaseConnection newDatabaseConnection() throws Exception { DatabaseConnection result; checkEnv(); if (m_CustomPropsFile != null) { File pFile = new File(m_CustomPropsFile.getPath()); String pPath = m_CustomPropsFile.getPath(); try { pPath = m_env.substitute(pPath); pFile = new File(pPath); } catch (Exception ex) { } result = new DatabaseConnection(pFile); } else { result = new DatabaseConnection(); } m_createText = result.getProperties().getProperty("CREATE_STRING"); m_createDouble = result.getProperties().getProperty("CREATE_DOUBLE"); m_createInt = result.getProperties().getProperty("CREATE_INT"); m_createDate = result.getProperties() .getProperty("CREATE_DATE", "DATETIME"); m_DateFormat = new SimpleDateFormat(result.getProperties().getProperty( "DateFormat", "yyyy-MM-dd HH:mm:ss")); m_idColumn = result.getProperties().getProperty("idColumn"); return result; } /** * Resets the Saver ready to save a new data set. */ @Override public void resetOptions() { super.resetOptions(); setRetrieval(NONE); try { if (m_DataBaseConnection != null && m_DataBaseConnection.isConnected()) { m_DataBaseConnection.disconnectFromDatabase(); } m_DataBaseConnection = newDatabaseConnection(); } catch (Exception ex) { printException(ex); } m_URL = m_DataBaseConnection.getDatabaseURL(); m_tableName = ""; m_Username = m_DataBaseConnection.getUsername(); m_Password = m_DataBaseConnection.getPassword(); m_count = 1; m_id = false; m_tabName = true; /* * m_createText = * m_DataBaseConnection.getProperties().getProperty("CREATE_STRING"); * m_createDouble = * m_DataBaseConnection.getProperties().getProperty("CREATE_DOUBLE"); * m_createInt = * m_DataBaseConnection.getProperties().getProperty("CREATE_INT"); * m_createDate = * m_DataBaseConnection.getProperties().getProperty("CREATE_DATE", * "DATETIME"); m_DateFormat = new * SimpleDateFormat(m_DataBaseConnection.getProperties * ().getProperty("DateFormat", "yyyy-MM-dd HH:mm:ss")); m_idColumn = * m_DataBaseConnection.getProperties().getProperty("idColumn"); */ } /** * Cancels the incremental saving process and tries to drop the table if the * write mode is CANCEL. */ @Override public void cancel() { if (getWriteMode() == CANCEL) { try { m_DataBaseConnection.update("DROP TABLE " + m_resolvedTableName); if (m_DataBaseConnection.tableExists(m_resolvedTableName)) { System.err.println("Table cannot be dropped."); } } catch (Exception ex) { printException(ex); } resetOptions(); } } /** * Returns a string describing this Saver. * * @return a description of the Saver suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Writes to a database (tested with MySQL, InstantDB, HSQLDB)."; } /** * Sets the table's name. * * @param tn the name of the table */ public void setTableName(String tn) { m_tableName = tn; } /** * Gets the table's name. * * @return the table's name */ public String getTableName() { return m_tableName; } /** * Returns the tip text for this property. * * @return the tip text for this property */ public String tableNameTipText() { return "Sets the name of the table."; } /** * Set whether to truncate (i.e. drop and recreate) the table if it already * exits. If false, then new data is appended to the table. * * @param t true if the table should be truncated first (if it exists). */ public void setTruncate(boolean t) { m_truncate = t; } /** * Get whether to truncate (i.e. drop and recreate) the table if it already * exits. If false, then new data is appended to the table. * * @param t true if the table should be truncated first (if it exists). */ public boolean getTruncate() { return m_truncate; } /** * Returns the tip text for this property. * * @return the tip text for this property */ public String truncateTipText() { return "Truncate (i.e. drop and recreate) table if it already exists"; } /** * En/Dis-ables the automatic generation of a primary key. * * @param flag flag for automatic key-genereration */ public void setAutoKeyGeneration(boolean flag) { m_id = flag; } /** * Gets whether or not a primary key will be generated automatically. * * @return true if a primary key column will be generated, false otherwise */ public boolean getAutoKeyGeneration() { return m_id; } /** * Returns the tip text for this property. * * @return tip text for this property */ public String autoKeyGenerationTipText() { return "If set to true, a primary key column is generated automatically (containing the row number as INTEGER). The name of the key is read from DatabaseUtils (idColumn)" + " This primary key can be used for incremental loading (requires an unique key). This primary key will not be loaded as an attribute."; } /** * En/Dis-ables that the relation name is used for the name of the table * (default enabled). * * @param flag if true the relation name is used as table name */ public void setRelationForTableName(boolean flag) { m_tabName = flag; } /** * Gets whether or not the relation name is used as name of the table. * * @return true if the relation name is used as the name of the table, false * otherwise */ public boolean getRelationForTableName() { return m_tabName; } /** * Returns the tip text fo this property. * * @return the tip text for this property */ public String relationForTableNameTipText() { return "If set to true, the relation name will be used as name for the database table. Otherwise the user has to provide a table name."; } /** * Sets the database URL. * * @param url the URL */ @Override public void setUrl(String url) { checkEnv(); m_URL = url; String uCopy = m_URL; try { uCopy = m_env.substitute(uCopy); } catch (Exception ex) { } m_DataBaseConnection.setDatabaseURL(uCopy); } /** * Gets the database URL. * * @return the URL */ @Override public String getUrl() { return m_URL; } /** * Returns the tip text for this property. * * @return the tip text for this property */ public String urlTipText() { return "The URL of the database"; } /** * Sets the database user. * * @param user the user name */ @Override public void setUser(String user) { checkEnv(); m_Username = user; String userCopy = user; try { userCopy = m_env.substitute(userCopy); } catch (Exception ex) { } m_DataBaseConnection.setUsername(userCopy); } /** * Gets the database user. * * @return the user name */ @Override public String getUser() { // return m_DataBaseConnection.getUsername(); return m_Username; } /** * Returns the tip text for this property. * * @return the tip text for this property */ public String userTipText() { return "The user name for the database"; } /** * Sets the database password. * * @param password the password */ @Override public void setPassword(String password) { checkEnv(); m_Password = password; String passCopy = password; try { passCopy = m_env.substitute(passCopy); } catch (Exception ex) { } m_DataBaseConnection.setPassword(password); } /** * Returns the database password. * * @return the database password */ public String getPassword() { // return m_DataBaseConnection.getPassword(); return m_Password; } /** * Returns the tip text for this property. * * @return the tip text for this property */ public String passwordTipText() { return "The database password"; } /** * Sets the custom properties file to use. * * @param value the custom props file to load database parameters from, use * null or directory to disable custom properties. */ public void setCustomPropsFile(File value) { m_CustomPropsFile = value; } /** * Returns the custom properties file in use, if any. * * @return the custom props file, null if none used */ public File getCustomPropsFile() { return m_CustomPropsFile; } /** * The tip text for this property. * * @return the tip text */ public String customPropsFileTipText() { return "The custom properties that the user can use to override the default ones."; } /** * Sets the database url. * * @param url the database url * @param userName the user name * @param password the password */ public void setDestination(String url, String userName, String password) { try { checkEnv(); m_DataBaseConnection = newDatabaseConnection(); setUrl(url); setUser(userName); setPassword(password); // m_DataBaseConnection.setDatabaseURL(url); // m_DataBaseConnection.setUsername(userName); // m_DataBaseConnection.setPassword(password); } catch (Exception ex) { printException(ex); } } /** * Sets the database url. * * @param url the database url */ public void setDestination(String url) { try { checkEnv(); m_DataBaseConnection = newDatabaseConnection(); // m_DataBaseConnection.setDatabaseURL(url); setUrl(url); setUser(m_Username); setPassword(m_Password); // m_DataBaseConnection.setUsername(m_Username); // m_DataBaseConnection.setPassword(m_Password); } catch (Exception ex) { printException(ex); } } /** Sets the database url using the DatabaseUtils file. */ public void setDestination() { try { checkEnv(); m_DataBaseConnection = newDatabaseConnection(); setUser(m_Username); setPassword(m_Password); // m_DataBaseConnection.setUsername(m_Username); // m_DataBaseConnection.setPassword(m_Password); } catch (Exception ex) { printException(ex); } } /** * Returns the Capabilities of this saver. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); result.enable(Capability.STRING_ATTRIBUTES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.STRING_CLASS); result.enable(Capability.NO_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Opens a connection to the database. * */ public void connectToDatabase() { try { if (!m_DataBaseConnection.isConnected()) { m_DataBaseConnection.connectToDatabase(); } } catch (Exception ex) { printException(ex); } } /** * Writes the structure (header information) to a database by creating a new * table. * * @throws Exception if something goes wrong */ private void writeStructure() throws Exception { StringBuffer query = new StringBuffer(); Instances structure = getInstances(); query.append("CREATE TABLE "); m_resolvedTableName = m_env.substitute(m_tableName); if (m_tabName || m_resolvedTableName.equals("")) { m_resolvedTableName = m_DataBaseConnection.maskKeyword(structure .relationName()); } if (m_DataBaseConnection.getUpperCase()) { m_resolvedTableName = m_resolvedTableName.toUpperCase(); m_createInt = m_createInt.toUpperCase(); m_createDouble = m_createDouble.toUpperCase(); m_createText = m_createText.toUpperCase(); m_createDate = m_createDate.toUpperCase(); } m_resolvedTableName = m_resolvedTableName.replaceAll("[^\\w]", "_"); m_resolvedTableName = m_DataBaseConnection.maskKeyword(m_resolvedTableName); query.append(m_resolvedTableName); if (structure.numAttributes() == 0) { throw new Exception("Instances have no attribute."); } query.append(" ( "); if (m_DataBaseConnection.tableExists(m_resolvedTableName)) { if (!m_truncate) { System.err.println("[DatabaseSaver] Table '" + m_resolvedTableName + "' already exists - will append data..."); // if incremental and using primary key set the correct start value // for count if (getRetrieval() == INCREMENTAL && m_id) { String countS = "SELECT COUNT(*) FROM " + m_resolvedTableName; m_DataBaseConnection.execute(countS); ResultSet countRS = m_DataBaseConnection.getResultSet(); countRS.next(); m_count = countRS.getInt(1); countRS.close(); m_count++; } return; } String trunc = "DROP TABLE " + m_resolvedTableName; m_DataBaseConnection.execute(trunc); } if (m_id) { if (m_DataBaseConnection.getUpperCase()) { m_idColumn = m_idColumn.toUpperCase(); } query.append(m_DataBaseConnection.maskKeyword(m_idColumn)); query.append(" "); query.append(m_createInt); query.append(" PRIMARY KEY,"); } for (int i = 0; i < structure.numAttributes(); i++) { Attribute att = structure.attribute(i); String attName = att.name(); attName = attName.replaceAll("[^\\w]", "_"); attName = m_DataBaseConnection.maskKeyword(attName); if (m_DataBaseConnection.getUpperCase()) { query.append(attName.toUpperCase()); } else { query.append(attName); } if (att.isDate()) { query.append(" " + m_createDate); } else { if (att.isNumeric()) { query.append(" " + m_createDouble); } else { query.append(" " + m_createText); } } if (i != structure.numAttributes() - 1) { query.append(", "); } } query.append(" )"); // System.out.println(query.toString()); m_DataBaseConnection.update(query.toString()); m_DataBaseConnection.close(); if (!m_DataBaseConnection.tableExists(m_resolvedTableName)) { throw new IOException("Table cannot be built."); } } /** * inserts the given instance into the table. * * @param inst the instance to insert * @throws Exception if something goes wrong */ private void writeInstance(Instance inst) throws Exception { StringBuffer insert = new StringBuffer(); insert.append("INSERT INTO "); insert.append(m_resolvedTableName); insert.append(" VALUES ( "); if (m_id) { insert.append(m_count); insert.append(", "); m_count++; } for (int j = 0; j < inst.numAttributes(); j++) { if (inst.isMissing(j)) { insert.append("NULL"); } else { if ((inst.attribute(j)).isDate()) { insert.append("'" + m_DateFormat.format((long) inst.value(j)) + "'"); } else if ((inst.attribute(j)).isNumeric()) { insert.append(inst.value(j)); } else { String stringInsert = "'" + inst.stringValue(j) + "'"; if (stringInsert.length() > 2) { stringInsert = stringInsert.replaceAll("''", "'"); } insert.append(stringInsert); } } if (j != inst.numAttributes() - 1) { insert.append(", "); } } insert.append(" )"); // System.out.println(insert.toString()); if (m_DataBaseConnection.update(insert.toString()) < 1) { throw new IOException("Tuple cannot be inserted."); } else { m_DataBaseConnection.close(); } } /** * Saves an instances incrementally. Structure has to be set by using the * setStructure() method or setInstances() method. When a structure is set, a * table is created. * * @param inst the instance to save * @throws IOException throws IOEXception. */ @Override public void writeIncremental(Instance inst) throws IOException { int writeMode = getWriteMode(); Instances structure = getInstances(); if (m_DataBaseConnection == null) { throw new IOException("No database has been set up."); } if (getRetrieval() == BATCH) { throw new IOException("Batch and incremental saving cannot be mixed."); } setRetrieval(INCREMENTAL); try { if (!m_DataBaseConnection.isConnected()) { connectToDatabase(); } if (writeMode == WAIT) { if (structure == null) { setWriteMode(CANCEL); if (inst != null) { throw new Exception( "Structure(Header Information) has to be set in advance"); } } else { setWriteMode(STRUCTURE_READY); } writeMode = getWriteMode(); } if (writeMode == CANCEL) { cancel(); } if (writeMode == STRUCTURE_READY) { setWriteMode(WRITE); writeStructure(); writeMode = getWriteMode(); } if (writeMode == WRITE) { if (structure == null) { throw new IOException("No instances information available."); } if (inst != null) { // write instance writeInstance(inst); } else { // close m_DataBaseConnection.disconnectFromDatabase(); resetStructure(); m_count = 1; } } } catch (Exception ex) { printException(ex); } } /** * Writes a Batch of instances. * * @throws IOException throws IOException */ @Override public void writeBatch() throws IOException { Instances instances = getInstances(); if (instances == null) { throw new IOException("No instances to save"); } if (getRetrieval() == INCREMENTAL) { throw new IOException("Batch and incremental saving cannot be mixed."); } if (m_DataBaseConnection == null) { throw new IOException("No database has been set up."); } setRetrieval(BATCH); try { if (!m_DataBaseConnection.isConnected()) { connectToDatabase(); } setWriteMode(WRITE); writeStructure(); for (int i = 0; i < instances.numInstances(); i++) { writeInstance(instances.instance(i)); } m_DataBaseConnection.disconnectFromDatabase(); setWriteMode(WAIT); resetStructure(); m_count = 1; } catch (Exception ex) { printException(ex); } } /** * Prints an exception. * * @param ex the exception to print */ private void printException(Exception ex) { System.out.println("\n--- Exception caught ---\n"); while (ex != null) { System.out.println("Message: " + ex.getMessage()); if (ex instanceof SQLException) { System.out.println("SQLState: " + ((SQLException) ex).getSQLState()); System.out.println("ErrorCode: " + ((SQLException) ex).getErrorCode()); ex = ((SQLException) ex).getNextException(); } else { ex = null; } System.out.println(""); } } /** * Gets the setting. * * @return the current setting */ @Override public String[] getOptions() { Vector options = new Vector(); if ((getUrl() != null) && (getUrl().length() != 0)) { options.add("-url"); options.add(getUrl()); } if ((getUser() != null) && (getUser().length() != 0)) { options.add("-user"); options.add(getUser()); } if ((getPassword() != null) && (getPassword().length() != 0)) { options.add("-password"); options.add(getPassword()); } if ((m_tableName != null) && (m_tableName.length() != 0)) { options.add("-T"); options.add(m_tableName); } if (m_truncate) { options.add("-truncate"); } if (m_id) { options.add("-P"); } if ((m_inputFile != null) && (m_inputFile.length() != 0)) { options.add("-i"); options.add(m_inputFile); } if ((m_CustomPropsFile != null) && !m_CustomPropsFile.isDirectory()) { options.add("-custom-props"); options.add(m_CustomPropsFile.toString()); } return options.toArray(new String[options.size()]); } /** * Lists the available options. * * @return an enumeration of the available options */ @Override public Enumeration