org.apache.iceberg.SchemaParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-core Show documentation
Show all versions of iceberg-core Show documentation
A table format for huge analytic datasets
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.JsonUtil;
public class SchemaParser {
private SchemaParser() {}
private static final String SCHEMA_ID = "schema-id";
private static final String IDENTIFIER_FIELD_IDS = "identifier-field-ids";
private static final String TYPE = "type";
private static final String STRUCT = "struct";
private static final String LIST = "list";
private static final String MAP = "map";
private static final String FIELDS = "fields";
private static final String ELEMENT = "element";
private static final String KEY = "key";
private static final String VALUE = "value";
private static final String DOC = "doc";
private static final String NAME = "name";
private static final String ID = "id";
private static final String ELEMENT_ID = "element-id";
private static final String KEY_ID = "key-id";
private static final String VALUE_ID = "value-id";
private static final String REQUIRED = "required";
private static final String ELEMENT_REQUIRED = "element-required";
private static final String VALUE_REQUIRED = "value-required";
private static void toJson(Types.StructType struct, JsonGenerator generator) throws IOException {
toJson(struct, null, null, generator);
}
private static void toJson(
Types.StructType struct,
Integer schemaId,
Set identifierFieldIds,
JsonGenerator generator)
throws IOException {
generator.writeStartObject();
generator.writeStringField(TYPE, STRUCT);
if (schemaId != null) {
generator.writeNumberField(SCHEMA_ID, schemaId);
}
if (identifierFieldIds != null && !identifierFieldIds.isEmpty()) {
JsonUtil.writeIntegerArray(IDENTIFIER_FIELD_IDS, identifierFieldIds, generator);
}
generator.writeArrayFieldStart(FIELDS);
for (Types.NestedField field : struct.fields()) {
generator.writeStartObject();
generator.writeNumberField(ID, field.fieldId());
generator.writeStringField(NAME, field.name());
generator.writeBooleanField(REQUIRED, field.isRequired());
generator.writeFieldName(TYPE);
toJson(field.type(), generator);
if (field.doc() != null) {
generator.writeStringField(DOC, field.doc());
}
generator.writeEndObject();
}
generator.writeEndArray();
generator.writeEndObject();
}
static void toJson(Types.ListType list, JsonGenerator generator) throws IOException {
generator.writeStartObject();
generator.writeStringField(TYPE, LIST);
generator.writeNumberField(ELEMENT_ID, list.elementId());
generator.writeFieldName(ELEMENT);
toJson(list.elementType(), generator);
generator.writeBooleanField(ELEMENT_REQUIRED, !list.isElementOptional());
generator.writeEndObject();
}
static void toJson(Types.MapType map, JsonGenerator generator) throws IOException {
generator.writeStartObject();
generator.writeStringField(TYPE, MAP);
generator.writeNumberField(KEY_ID, map.keyId());
generator.writeFieldName(KEY);
toJson(map.keyType(), generator);
generator.writeNumberField(VALUE_ID, map.valueId());
generator.writeFieldName(VALUE);
toJson(map.valueType(), generator);
generator.writeBooleanField(VALUE_REQUIRED, !map.isValueOptional());
generator.writeEndObject();
}
static void toJson(Type.PrimitiveType primitive, JsonGenerator generator) throws IOException {
generator.writeString(primitive.toString());
}
static void toJson(Type type, JsonGenerator generator) throws IOException {
if (type.isPrimitiveType()) {
toJson(type.asPrimitiveType(), generator);
} else {
Type.NestedType nested = type.asNestedType();
switch (type.typeId()) {
case STRUCT:
toJson(nested.asStructType(), generator);
break;
case LIST:
toJson(nested.asListType(), generator);
break;
case MAP:
toJson(nested.asMapType(), generator);
break;
default:
throw new IllegalArgumentException("Cannot write unknown type: " + type);
}
}
}
public static void toJson(Schema schema, JsonGenerator generator) throws IOException {
toJson(schema.asStruct(), schema.schemaId(), schema.identifierFieldIds(), generator);
}
public static String toJson(Schema schema) {
return toJson(schema, false);
}
public static String toJson(Schema schema, boolean pretty) {
return JsonUtil.generate(
gen -> toJson(schema.asStruct(), schema.schemaId(), schema.identifierFieldIds(), gen),
pretty);
}
private static Type typeFromJson(JsonNode json) {
if (json.isTextual()) {
return Types.fromPrimitiveString(json.asText());
} else if (json.isObject()) {
JsonNode typeObj = json.get(TYPE);
if (typeObj != null) {
String type = typeObj.asText();
if (STRUCT.equals(type)) {
return structFromJson(json);
} else if (LIST.equals(type)) {
return listFromJson(json);
} else if (MAP.equals(type)) {
return mapFromJson(json);
}
}
}
throw new IllegalArgumentException("Cannot parse type from json: " + json);
}
private static Types.StructType structFromJson(JsonNode json) {
JsonNode fieldArray = JsonUtil.get(FIELDS, json);
Preconditions.checkArgument(
fieldArray.isArray(), "Cannot parse struct fields from non-array: %s", fieldArray);
List fields = Lists.newArrayListWithExpectedSize(fieldArray.size());
Iterator iterator = fieldArray.elements();
while (iterator.hasNext()) {
JsonNode field = iterator.next();
Preconditions.checkArgument(
field.isObject(), "Cannot parse struct field from non-object: %s", field);
int id = JsonUtil.getInt(ID, field);
String name = JsonUtil.getString(NAME, field);
Type type = typeFromJson(JsonUtil.get(TYPE, field));
String doc = JsonUtil.getStringOrNull(DOC, field);
boolean isRequired = JsonUtil.getBool(REQUIRED, field);
if (isRequired) {
fields.add(Types.NestedField.required(id, name, type, doc));
} else {
fields.add(Types.NestedField.optional(id, name, type, doc));
}
}
return Types.StructType.of(fields);
}
private static Types.ListType listFromJson(JsonNode json) {
int elementId = JsonUtil.getInt(ELEMENT_ID, json);
Type elementType = typeFromJson(JsonUtil.get(ELEMENT, json));
boolean isRequired = JsonUtil.getBool(ELEMENT_REQUIRED, json);
if (isRequired) {
return Types.ListType.ofRequired(elementId, elementType);
} else {
return Types.ListType.ofOptional(elementId, elementType);
}
}
private static Types.MapType mapFromJson(JsonNode json) {
int keyId = JsonUtil.getInt(KEY_ID, json);
Type keyType = typeFromJson(JsonUtil.get(KEY, json));
int valueId = JsonUtil.getInt(VALUE_ID, json);
Type valueType = typeFromJson(JsonUtil.get(VALUE, json));
boolean isRequired = JsonUtil.getBool(VALUE_REQUIRED, json);
if (isRequired) {
return Types.MapType.ofRequired(keyId, valueId, keyType, valueType);
} else {
return Types.MapType.ofOptional(keyId, valueId, keyType, valueType);
}
}
public static Schema fromJson(JsonNode json) {
Type type = typeFromJson(json);
Preconditions.checkArgument(
type.isNestedType() && type.asNestedType().isStructType(),
"Cannot create schema, not a struct type: %s",
type);
Integer schemaId = JsonUtil.getIntOrNull(SCHEMA_ID, json);
Set identifierFieldIds = JsonUtil.getIntegerSetOrNull(IDENTIFIER_FIELD_IDS, json);
if (schemaId == null) {
return new Schema(type.asNestedType().asStructType().fields(), identifierFieldIds);
} else {
return new Schema(schemaId, type.asNestedType().asStructType().fields(), identifierFieldIds);
}
}
private static final Cache SCHEMA_CACHE =
Caffeine.newBuilder().weakValues().build();
public static Schema fromJson(String json) {
return SCHEMA_CACHE.get(json, jsonKey -> JsonUtil.parse(json, SchemaParser::fromJson));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy