All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.io.DataFrameReader Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.io;

import com.google.common.io.Files;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Optional;
import tech.tablesaw.api.Table;
import tech.tablesaw.io.csv.CsvReadOptions;
import tech.tablesaw.io.csv.CsvReader;
import tech.tablesaw.io.jdbc.SqlResultSetReader;

public class DataFrameReader {

  private final ReaderRegistry registry;

  public DataFrameReader(ReaderRegistry registry) {
    this.registry = registry;
  }

  /**
   * Reads the given URL into a table using default options Uses appropriate converter based on
   * mime-type Use {@link #usingOptions(ReadOptions) usingOptions} to use non-default options
   */
  public Table url(String url) {
    try {
      return url(new URL(url));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }

  /**
   * Reads the given URL into a table using default options Uses appropriate converter based on
   * mime-type Use {@link #usingOptions(ReadOptions) usingOptions} to use non-default options
   */
  public Table url(URL url) {
    URLConnection connection = null;
    try {
      connection = url.openConnection();
    } catch (IOException e) {
      e.printStackTrace();
    }
    String contentType = connection.getContentType();
    return url(url, getCharset(contentType), getMimeType(contentType));
  }

  private Table url(URL url, Charset charset, String mimeType) {
    Optional> reader = registry.getReaderForMimeType(mimeType);
    if (reader.isPresent()) {
      return readUrl(url, charset, reader.get());
    }
    reader = registry.getReaderForExtension(getExtension(url));
    if (reader.isPresent()) {
      return readUrl(url, charset, reader.get());
    }
    throw new IllegalArgumentException("No reader registered for mime-type " + mimeType);
  }

  private Table readUrl(URL url, Charset charset, DataReader reader) {
    try {
      return reader.read(new Source(url.openConnection().getInputStream(), charset));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }

  private String getMimeType(String contentType) {
    String[] pair = contentType.split(";");
    return pair[0].trim();
  }

  private Charset getCharset(String contentType) {
    String[] pair = contentType.split(";");
    return pair.length == 1
        ? Charset.defaultCharset()
        : Charset.forName(pair[1].split("=")[1].trim());
  }

  /**
   * Best effort method to get the extension from a URL.
   *
   * @param url the url to pull the extension from.
   * @return the extension.
   */
  private String getExtension(URL url) {
    return Files.getFileExtension(url.getPath());
  }

  /**
   * Reads the given string contents into a table using default options Uses converter specified
   * based on given file extension Use {@link #usingOptions(ReadOptions) usingOptions} to use
   * non-default options
   */
  public Table string(String s, String fileExtension) {
    Optional> reader = registry.getReaderForExtension(fileExtension);
    if (!reader.isPresent()) {
      throw new IllegalArgumentException("No reader registered for extension " + fileExtension);
    }
    return reader.get().read(Source.fromString(s));
  }

  /**
   * Reads the given file into a table using default options Uses converter specified based on given
   * file extension Use {@link #usingOptions(ReadOptions) usingOptions} to use non-default options
   */
  public Table file(String file) {
    return file(new File(file));
  }

  /**
   * Reads the given file into a table using default options Uses converter specified based on given
   * file extension Use {@link #usingOptions(ReadOptions) usingOptions} to use non-default options
   */
  public Table file(File file) {
    String extension = null;
    try {
      extension = Files.getFileExtension(file.getCanonicalPath());
      Optional> reader = registry.getReaderForExtension(extension);
      if (reader.isPresent()) {
        return reader.get().read(new Source(file));
      }
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
    throw new IllegalArgumentException("No reader registered for extension " + extension);
  }

  public  Table usingOptions(T options) {
    DataReader reader = registry.getReaderForOptions(options);
    return reader.read(options);
  }

  public Table usingOptions(ReadOptions.Builder builder) {
    return usingOptions(builder.build());
  }

  public Table db(ResultSet resultSet) throws SQLException {
    return SqlResultSetReader.read(resultSet);
  }

  public Table db(ResultSet resultSet, String tableName) throws SQLException {
    Table table = SqlResultSetReader.read(resultSet);
    table.setName(tableName);
    return table;
  }

  // Legacy reader methods for backwards-compatibility

  public Table csv(String file) {
    return csv(CsvReadOptions.builder(file));
  }

  public Table csv(String contents, String tableName) {
    try {
      return csv(CsvReadOptions.builder(new StringReader(contents)).tableName(tableName));
    } catch (Exception e) {
      throw new IllegalStateException(e);
    }
  }

  public Table csv(File file) {
    return csv(CsvReadOptions.builder(file));
  }

  public Table csv(InputStream stream) {
    return csv(CsvReadOptions.builder(stream));
  }

  public Table csv(URL url) {
    try {
      return readUrl(url, getCharset(url.openConnection().getContentType()), new CsvReader());
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }

  public Table csv(InputStream stream, String name) {
    return csv(CsvReadOptions.builder(stream).tableName(name));
  }

  public Table csv(Reader reader) {
    return csv(CsvReadOptions.builder(reader));
  }

  public Table csv(CsvReadOptions.Builder options) {
    return csv(options.build());
  }

  public Table csv(CsvReadOptions options) {
    return new CsvReader().read(options);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy