All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.converters.ExcelLoader Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * ExcelLoader.java
 * Copyright (C) 2010-2015 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core.converters;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SingleIndex;
import weka.core.Utils;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Vector;

/**
 *  Reads a source that is in the Excel spreadsheet
 * format.
* For instance, a spreadsheet generated with the Microsoft Office Suite. *

* * * Valid options are: *

* *

 * -sheet <index>
 *  The index of the sheet to load; 'first' and 'last' are accepted as well.
 * 
* *
 * -M <str>
 *  The string representing a missing value.
 *  (default is ? but empty cell is also treated as missing value)
 * 
* * * * For a tutorial on ExcelDOM, see:
* http://java.dzone.com/news/integrate-openoffice-java * * @author FracPete (fracpete at waikato dot ac dot nz) * @author Geertjan Wielenga * @version $Revision: 14663 $ * @see Loader */ public class ExcelLoader extends AbstractFileLoader implements BatchConverter, URLSourcedLoader, OptionHandler { /** for serialization. */ private static final long serialVersionUID = 9164120515718983413L; /** the binary file extension. */ public static String FILE_EXTENSION = ".xls"; /** the OOXML file extension. */ public static String FILE_EXTENSION_OOXML = ".xlsx"; /** the file description. */ public static String FILE_DESCRIPTION = "Excel Spreadsheets"; /** the url. */ protected String m_URL = "http://"; /** The stream for the source file. */ protected transient InputStream m_sourceStream = null; /** the currently open Excel document. */ protected transient Workbook m_Workbook; /** the sheet to load. */ protected SingleIndex m_SheetIndex = new SingleIndex("first"); /** The placeholder for missing values. */ protected String m_MissingValue = "?"; /** * Returns a string describing this Loader. * * @return a description of the Loader suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Reads a source that is in the Excel spreadsheet format.\n" + "For instance, a spreadsheet generated with the Microsoft Office Suite."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy