All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.d3x.morpheus.json.JsonSourceSplit Maven / Gradle / Ivy

There is a newer version: 1.0.31
Show newest version
/*
 * Copyright (C) 2014-2018 D3X Systems - All Rights Reserved
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.d3x.morpheus.json;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import com.d3x.morpheus.array.ArrayBuilder;
import com.d3x.morpheus.frame.DataFrame;
import com.d3x.morpheus.frame.DataFrameException;
import com.d3x.morpheus.util.IO;
import com.d3x.morpheus.util.Resource;
import com.d3x.morpheus.util.text.parser.Parser;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;

/**
 * A JsonSource implementation that can load a DataFrame from Pandas compatible JSON with "split" orientation
 *
 * 

This is open source software released under the Apache 2.0 License

* * @author Xavier Witdouck */ public class JsonSourceSplit implements JsonSource { @Override public synchronized DataFrame read(Options options) throws DataFrameException { var is = options.getResource().toInputStream(); var reader = new JsonReader(new InputStreamReader(new BufferedInputStream(is))); try { return read(reader, options); } finally { IO.close(reader); } } /** * Returns a DataFrame loaded from the Json reader * @param reader the Json stream reader * @param options the options for parsing * @return the resulting DataFrame * @throws DataFrameException if fails to parse json into DataFrame */ public synchronized DataFrame read(JsonReader reader, Options options) throws DataFrameException { try { var token = reader.peek(); if (token == null) { reader.nextNull(); return null; } else { reader.beginObject(); token = reader.peek(); var rows = new ArrayList(); var columns = new ArrayList(); DataFrame frame = null; while (token != JsonToken.END_OBJECT) { var name = reader.nextName(); if (name.equalsIgnoreCase("columns")) { columns.addAll(this.columns(reader, options)); token = reader.peek(); } else if (name.equalsIgnoreCase("index")) { rows.addAll(this.rows(reader, options)); token = reader.peek(); } else if (name.equalsIgnoreCase("data")) { frame = data(reader, rows, columns); token = reader.peek(); } else { throw new DataFrameException("Unexpected field name in DataFrame JSON: " + name); } } reader.endObject(); return frame; } } catch (Exception ex) { throw new DataFrameException("Failed to parse json into DataFrame", ex); } } /** * Returns the column keys from reader * @param reader the json reader * @param options the options * @return the column keys */ @SuppressWarnings("unchecked") private List columns(JsonReader reader, Options options) throws IOException { reader.beginArray(); var token = reader.peek(); var defaultParser = (Parser)Parser.forObject(String.class, v -> v); var parser = Optional.ofNullable(options.getColKeyParser()).orElse(defaultParser); var columns = new ArrayList(); while (token != JsonToken.END_ARRAY) { token = reader.peek(); if (token == JsonToken.STRING) { var value = reader.nextString(); columns.add(parser.apply(value)); token = reader.peek(); } else if (token == JsonToken.NUMBER) { var value = String.valueOf(reader.nextDouble()); columns.add(parser.apply(value)); token = reader.peek(); } else if (token == JsonToken.BOOLEAN) { var value = String.valueOf(reader.nextBoolean()); columns.add(parser.apply(value)); token = reader.peek(); } else if (token == JsonToken.NULL) { throw new DataFrameException("Cannot have null column headings in JSON"); } } reader.endArray(); return columns; } /** * Returns the row keys from reader * @param reader the json reader * @param options the options * @return the row keys */ @SuppressWarnings("unchecked") private List rows(JsonReader reader, Options options) throws IOException { reader.beginArray(); var token = reader.peek(); var defaultParser = (Parser)Parser.forObject(String.class, v -> v); var parser = Optional.ofNullable(options.getRowKeyParser()).orElse(defaultParser); var rows = new ArrayList(); while (token != JsonToken.END_ARRAY) { token = reader.peek(); if (token == JsonToken.STRING) { var value = reader.nextString(); rows.add(parser.apply(value)); token = reader.peek(); } else if (token == JsonToken.NUMBER) { var value = String.valueOf(reader.nextDouble()); rows.add(parser.apply(value)); token = reader.peek(); } else if (token == JsonToken.BOOLEAN) { var value = String.valueOf(reader.nextBoolean()); rows.add(parser.apply(value)); token = reader.peek(); } else if (token == JsonToken.NULL) { throw new DataFrameException("Cannot have null row headings in JSON"); } } reader.endArray(); return rows; } /** * Returns the data frame with all the data * @param reader the json reader * @param rows the row keys * @param columns the column keys * @return the resulting data frame * @throws IOException if there is an IO error */ @SuppressWarnings("unchecked") private DataFrame data(JsonReader reader, List rows, List columns) throws IOException { reader.beginArray(); var token = reader.peek(); var rowCount = rows.size(); var colCount = columns.size(); var colIndexes = IntStream.range(0, columns.size()); var arrays = colIndexes.mapToObj(i -> ArrayBuilder.of(rowCount)).collect(Collectors.toList()); while (token != JsonToken.END_ARRAY) { reader.beginArray(); token = reader.peek(); for (int i=0; i)columns.get(0).getClass(); return DataFrame.of(rows, colType, cols -> { for (int i=0; icreate(v -> v.resource(Resource.of(file))); var source = new JsonSourceSplit(); var t1 = System.currentTimeMillis(); var frame = source.read(options); var t2 = System.currentTimeMillis(); IO.println("Loaded frame in " + (t2-t1) + " millis"); frame.out().print(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy