All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.converters.XRFFLoader Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * XRFFLoader.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.zip.GZIPInputStream;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.xml.XMLInstances;

/**
 
 * Reads a source that is in the XML version of the ARFF format. It automatically decompresses the data if the extension is '.xrff.gz'.
 * 

* * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ * @see Loader */ public class XRFFLoader extends AbstractFileLoader implements BatchConverter, URLSourcedLoader { /** for serialization */ private static final long serialVersionUID = 3764533621135196582L; /** the file extension */ public static String FILE_EXTENSION = XMLInstances.FILE_EXTENSION; /** the extension for compressed files */ public static String FILE_EXTENSION_COMPRESSED = FILE_EXTENSION + ".gz"; /** the url */ protected String m_URL = "http://"; /** The reader for the source file. */ protected transient Reader m_sourceReader = null; /** the loaded XML document */ protected XMLInstances m_XMLInstances; /** * Returns a string describing this Loader * * @return a description of the Loader suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Reads a source that is in the XML version of the ARFF format. " + "It automatically decompresses the data if the extension is '" + FILE_EXTENSION_COMPRESSED + "'."; } /** * Get the file extension used for libsvm files * * @return the file extension */ public String getFileExtension() { return FILE_EXTENSION; } /** * Gets all the file extensions used for this type of file * * @return the file extensions */ public String[] getFileExtensions() { return new String[]{FILE_EXTENSION, FILE_EXTENSION_COMPRESSED}; } /** * Returns a description of the file type. * * @return a short file description */ public String getFileDescription() { return "XRFF data files"; } /** * Resets the Loader ready to read a new data set * * @throws IOException if something goes wrong */ public void reset() throws IOException { m_structure = null; m_XMLInstances = null; setRetrieval(NONE); if (m_File != null) { setFile(new File(m_File)); } else if ((m_URL != null) && !m_URL.equals("http://")) { setURL(m_URL); } } /** * Resets the Loader object and sets the source of the data set to be * the supplied File object. * * @param file the source file. * @throws IOException if an error occurs */ public void setSource(File file) throws IOException { m_structure = null; m_XMLInstances = null; setRetrieval(NONE); if (file == null) throw new IOException("Source file object is null!"); try { if (file.getName().endsWith(FILE_EXTENSION_COMPRESSED)) setSource(new GZIPInputStream(new FileInputStream(file))); else setSource(new FileInputStream(file)); } catch (FileNotFoundException ex) { throw new IOException("File not found"); } m_sourceFile = file; m_File = file.getAbsolutePath(); } /** * Resets the Loader object and sets the source of the data set to be * the supplied url. * * @param url the source url. * @throws IOException if an error occurs */ public void setSource(URL url) throws IOException { m_structure = null; m_XMLInstances = null; setRetrieval(NONE); setSource(url.openStream()); m_URL = url.toString(); } /** * Set the url to load from * * @param url the url to load from * @throws IOException if the url can't be set. */ public void setURL(String url) throws IOException { m_URL = url; setSource(new URL(url)); } /** * Return the current url * * @return the current url */ public String retrieveURL() { return m_URL; } /** * Resets the Loader object and sets the source of the data set to be * the supplied InputStream. * * @param in the source InputStream. * @throws IOException if initialization of reader fails. */ public void setSource(InputStream in) throws IOException { m_File = (new File(System.getProperty("user.dir"))).getAbsolutePath(); m_URL = "http://"; m_sourceReader = new BufferedReader(new InputStreamReader(in)); } /** * Determines and returns (if possible) the structure (internally the * header) of the data set as an empty set of instances. * * @return the structure of the data set as an empty set * of Instances * @throws IOException if an error occurs */ public Instances getStructure() throws IOException { if (m_sourceReader == null) throw new IOException("No source has been specified"); if (m_structure == null) { try { m_XMLInstances = new XMLInstances(m_sourceReader); m_structure = new Instances(m_XMLInstances.getInstances(), 0); } catch (IOException ioe) { // just re-throw it throw ioe; } catch (Exception e) { throw new RuntimeException(e); } } return new Instances(m_structure, 0); } /** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set. * * @return the structure of the data set as an empty * set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_sourceReader == null) throw new IOException("No source has been specified"); if (getRetrieval() == INCREMENTAL) throw new IOException("Cannot mix getting Instances in both incremental and batch modes"); setRetrieval(BATCH); if (m_structure == null) getStructure(); try { // close the stream m_sourceReader.close(); } catch (Exception ex) { } return m_XMLInstances.getInstances(); } /** * XRFFLoader is unable to process a data set incrementally. * * @param structure ignored * @return never returns without throwing an exception * @throws IOException always. XRFFLoader is unable to process a * data set incrementally. */ public Instance getNextInstance(Instances structure) throws IOException { throw new IOException("XRFFLoader can't read data sets incrementally."); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } /** * Main method. * * @param args should contain the name of an input file. */ public static void main(String[] args) { runFileLoader(new XRFFLoader(), args); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy