Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*******************************************************************************
* Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Smile. If not, see .
******************************************************************************/
package smile.io;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.function.Function;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import smile.data.DataFrame;
import smile.data.Tuple;
import smile.data.type.DataType;
import smile.data.type.DataTypes;
import smile.data.type.StructField;
import smile.data.type.StructType;
import smile.util.Strings;
/**
* Reads JSON datasets. No nested objects are currently allowed.
*
* @author Haifeng Li
*/
public class JSON {
/** The schema of data structure. */
private StructType schema;
/** Charset of file. */
private Charset charset = StandardCharsets.UTF_8;
/** Reads JSON files in single-line or multi-line mode. */
private Mode mode = Mode.SINGLE_LINE;
/** JSON files in single-line or multi-line mode. */
public enum Mode {
/** One JSON object per line. */
SINGLE_LINE,
/**
* A JSON object may occupy multiple lines.
* The file contains a list of objects.
* Files will be loaded as a whole entity and cannot be split.
*/
MULTI_LINE
}
/**
* Constructor.
*/
public JSON() {
}
/**
* Sets the schema.
* @param schema the schema of file.
*/
public JSON schema(StructType schema) {
this.schema = schema;
return this;
}
/**
* Sets the charset.
* @param charset the charset of file.
*/
public JSON charset(Charset charset) {
this.charset = charset;
return this;
}
/** Reads JSON files in single-line or multi-line mode. */
public JSON mode(Mode mode) {
this.mode = mode;
return this;
}
/**
* Reads a JSON file.
* @param path a JSON file path.
*/
public DataFrame read(Path path) throws IOException {
return read(path, Integer.MAX_VALUE);
}
/**
* Reads a JSON file.
* @param path a JSON file path.
* @param limit reads a limited number of records.
*/
public DataFrame read(Path path, int limit) throws IOException {
if (schema == null) {
// infer the schema from top 1000 objects.
schema = inferSchema(Files.newBufferedReader(path, charset), Math.min(1000, limit));
}
return read(Files.newBufferedReader(path, charset), Integer.MAX_VALUE);
}
/**
* Reads a JSON file.
* @param path a JSON file path or URI.
*/
public DataFrame read(String path) throws IOException, URISyntaxException {
return read(path, Integer.MAX_VALUE);
}
/**
* Reads a JSON file.
* @param path a JSON file path or URI.
* @param limit reads a limited number of records.
*/
public DataFrame read(String path, int limit) throws IOException, URISyntaxException {
if (schema == null) {
// infer the schema from top 1000 objects.
schema = inferSchema(Input.reader(path, charset), Math.min(1000, limit));
}
return read(Input.reader(path, charset), Integer.MAX_VALUE);
}
/**
* Reads a limited number of records from a JSON file.
* @param reader a JSON file reader.
* @param limit reads a limited number of records.
*/
public DataFrame read(BufferedReader reader, int limit) throws IOException {
if (schema == null) {
// infer the schema from top 1000 rows.
throw new IllegalStateException("The schema is not set or inferred.");
}
List> parser = schema.parser();
List rows = new ArrayList<>();
ObjectMapper objectMapper = new ObjectMapper();
if (mode == Mode.MULTI_LINE) {
List