All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.wrangler.utils.Json2Schema Maven / Gradle / Ivy

There is a newer version: 3.2.2
Show newest version
/*
 * Copyright © 2017 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.wrangler.utils;

import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.dataset.lib.KeyValue;
import co.cask.cdap.internal.guava.reflect.TypeToken;
import co.cask.cdap.internal.io.AbstractSchemaGenerator;
import co.cask.wrangler.api.Record;
import co.cask.wrangler.steps.parser.JsParser;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonPrimitive;
import org.json.JSONException;

import java.math.BigDecimal;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;

/**
 * Json to Schema translates a JSON string into a {@link Schema} schema.
 *
 * This class takes in JSON as a string or parsed JSON as {@link JsonElement} and converts it to the
 * {@link Schema}. It's recursive in nature and it creates multi-level of {@link Schema}.
 */
public final class Json2Schema {
  // Parser to parse json if provided as string.
  private final JsonParser parser;

  public Json2Schema() {
    this.parser = new JsonParser();
  }

  /**
   * Generates a {@link Schema} given a record.
   *
   * @param id Schema id
   * @return returns {@link Schema}
   * @throws UnsupportedTypeException
   * @throws JSONException
   */
  public Schema toSchema(String id, Record record) throws RecordConvertorException {
    List fields = new ArrayList<>();

    // Iterate through each field in the record.
    for (KeyValue column : record.getFields()) {
      String name = column.getKey();
      Object value = column.getValue();

      // First, we check if object is of simple type.
      if (value instanceof String || value instanceof Integer || value instanceof Long || value instanceof Short ||
          value instanceof Double || value instanceof Float || value instanceof Boolean || value instanceof byte[]) {
        try {
          Schema schema = Schema.nullableOf(new SimpleSchemaGenerator().generate(value.getClass()));
          fields.add(Schema.Field.of(name, schema));
        } catch (UnsupportedTypeException e) {
          throw new RecordConvertorException(
            String.format("Unable to convert field '%s' to basic type.", name)
          );
        }
      }

      if (value instanceof BigDecimal) {
        Schema schema = Schema.nullableOf(Schema.of(Schema.Type.DOUBLE));
        fields.add(Schema.Field.of(name, schema));
      }

      if (value instanceof Date || value instanceof java.sql.Date || value instanceof Time
        || value instanceof Timestamp) {
        Schema schema = Schema.nullableOf(Schema.of(Schema.Type.LONG));
        fields.add(Schema.Field.of(name, schema));
      }

      if (value instanceof Map) {
        Schema schema = Schema.nullableOf (Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING)));
        fields.add(Schema.Field.of(name, schema));
      }

      if (value instanceof JsonElement) {
        Schema schema = toSchema(name, (JsonElement) value);
        fields.add(Schema.Field.of(name, schema));
      }

      if (value instanceof Schema) {
        fields.addAll(((Schema) value).getFields());
      }
    }
    return Schema.recordOf(id, fields);
  }

  /**
   * Converts a json string to a {@link Schema}.
   *
   * @param id of the {@link Schema}
   * @param json represents the JSON as {@link String}
   * @return instance of {@link Schema} representing the JSON.
   * @throws RecordConvertorException throw if there are any issues parsing to {@link Schema}
   * @see {@link #toSchema(String, Record)}
   */
  public Schema toSchema(String id, String json) throws RecordConvertorException {
    JsonElement element = parser.parse(json);
    return toSchema(id, element);
  }

  @Nullable
  private Schema toSchema(String name, JsonElement element) throws RecordConvertorException {
    Schema schema = null;
    if (element == null) {
      return null;
    }
    if (element.isJsonObject() || element.isJsonArray()) {
      schema = toComplexSchema(name, element);
    } else if (element.isJsonPrimitive()) {
      schema = toSchema(name, element.getAsJsonPrimitive());
    } else if (element.isJsonNull()) {
      schema = Schema.of(Schema.Type.NULL);
    }
    return schema;
  }

  private Schema toSchema(String name, JsonArray array) throws RecordConvertorException {
    int[] types = new int[3];
    types[0] = types[1] = types[2] = 0;
    for(int i = 0; i < array.size(); ++i) {
      JsonElement item = array.get(i);
      if (item.isJsonArray()) {
        types[1]++;
      } else if (item.isJsonPrimitive()) {
        types[0]++;
      } else if (item.isJsonObject()) {
        types[2]++;
      }
    }

    int sum = types[0] + types[1] + types[2];
    if (sum > 0) {
      JsonElement child = array.get(0);
      if (types[2] > 0 || types[1] > 0) {
        return Schema.nullableOf(Schema.arrayOf(toComplexSchema(name, child)));
      } else if (types[0] > 0) {
        return Schema.nullableOf(Schema.arrayOf(toSchema(name, child.getAsJsonPrimitive())));
      }
    }
    return Schema.nullableOf(Schema.arrayOf(Schema.of(Schema.Type.NULL)));
  }

  private Schema toSchema(String name, JsonPrimitive primitive) throws RecordConvertorException {
    Object value = JsParser.getValue(primitive);
    try {
      return Schema.nullableOf(new SimpleSchemaGenerator().generate(value.getClass()));
    } catch (UnsupportedTypeException e) {
      throw new RecordConvertorException(
        String.format("Unable to convert field '%s' to basic type.", name)
      );
    }
  }

  private Schema toSchema(String name, JsonObject object) throws RecordConvertorException {
    List fields = new ArrayList<>();
    Iterator> iterator = object.entrySet().iterator();
    while(iterator.hasNext()) {
      Map.Entry next = iterator.next();
      String key = next.getKey();
      JsonElement child = next.getValue();
      Schema schema = null;
      if (child.isJsonObject() || child.isJsonArray()) {
        schema = toComplexSchema(key, child);
      } else if (child.isJsonPrimitive()) {
        schema = toSchema(key, child.getAsJsonPrimitive());
      }
      if (schema != null) {
        fields.add(Schema.Field.of(key, schema));
      }
    }
    if (fields.size() == 0) {
      return Schema.nullableOf(Schema.arrayOf(Schema.of(Schema.Type.NULL)));
    }
    return Schema.recordOf(name, fields);
  }

  private Schema toComplexSchema(String name, JsonElement element) throws RecordConvertorException {
    if (element.isJsonObject()) {
      return toSchema(name, element.getAsJsonObject());
    } else if (element.isJsonArray()){
      return toSchema(name, element.getAsJsonArray());
    } else if (element.isJsonPrimitive()) {
      return toSchema(name, element.getAsJsonPrimitive());
    }
    return null;
  }


  private static final class SimpleSchemaGenerator extends AbstractSchemaGenerator {
    @Override
    protected Schema generateRecord(TypeToken typeToken, Set set,
                                    boolean b) throws UnsupportedTypeException {
      // we don't actually leverage this method for types we support, so no need to implement it
      throw new UnsupportedTypeException(String.format("Generating record of type %s is not supported.",
                                                       typeToken));
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy