org.apache.hudi.internal.schema.InternalSchemaBuilder Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.internal.schema;
import org.apache.hudi.internal.schema.visitor.InternalSchemaVisitor;
import org.apache.hudi.internal.schema.visitor.NameToIDVisitor;
import org.apache.hudi.internal.schema.visitor.NameToPositionVisitor;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
/**
* A build class to help build fields for InternalSchema
*/
public class InternalSchemaBuilder implements Serializable {
private static final InternalSchemaBuilder INSTANCE = new InternalSchemaBuilder();
public static InternalSchemaBuilder getBuilder() {
return INSTANCE;
}
private InternalSchemaBuilder() {
}
/**
* Build a mapping from id to full field name for a internal Type.
* if a field y belong to a struct filed x, then the full name of y is x.y
*
* @param type hoodie internal type
* @return a mapping from id to full field name
*/
public Map buildIdToName(Type type) {
Map result = new HashMap<>();
buildNameToId(type).forEach((k, v) -> result.put(v, k));
return result;
}
/**
* Build a mapping from full field name to id for a internal Type.
* if a field y belong to a struct filed x, then the full name of y is x.y
*
* @param type hoodie internal type
* @return a mapping from full field name to id
*/
public Map buildNameToId(Type type) {
return visit(type, new NameToIDVisitor());
}
Map buildNameToPosition(Type type) {
return visit(type, new NameToPositionVisitor());
}
/**
* Use to traverse all types in internalSchema with visitor.
*
* @param schema hoodie internal schema
* @return visitor expected result.
*/
public T visit(InternalSchema schema, InternalSchemaVisitor visitor) {
return visitor.schema(schema, visit(schema.getRecord(), visitor));
}
public T visit(Type type, InternalSchemaVisitor visitor) {
switch (type.typeId()) {
case RECORD:
Types.RecordType record = (Types.RecordType) type;
List results = new ArrayList<>();
for (Types.Field f : record.fields()) {
visitor.beforeField(f);
T result;
try {
result = visit(f.type(), visitor);
} finally {
visitor.afterField(f);
}
results.add(visitor.field(f, result));
}
return visitor.record(record, results);
case ARRAY:
Types.ArrayType array = (Types.ArrayType) type;
T elementResult;
Types.Field elementField = array.field(array.elementId());
visitor.beforeArrayElement(elementField);
try {
elementResult = visit(elementField.type(), visitor);
} finally {
visitor.afterArrayElement(elementField);
}
return visitor.array(array, elementResult);
case MAP:
Types.MapType map = (Types.MapType) type;
T keyResult;
T valueResult;
Types.Field keyField = map.field(map.keyId());
visitor.beforeMapKey(keyField);
try {
keyResult = visit(map.keyType(), visitor);
} finally {
visitor.afterMapKey(keyField);
}
Types.Field valueField = map.field(map.valueId());
visitor.beforeMapValue(valueField);
try {
valueResult = visit(map.valueType(), visitor);
} finally {
visitor.afterMapValue(valueField);
}
return visitor.map(map, keyResult, valueResult);
default:
return visitor.primitive((Type.PrimitiveType)type);
}
}
/**
* Build a mapping from id to field for a internal Type.
*
* @param type hoodie internal type
* @return a mapping from id to field
*/
public Map buildIdToField(Type type) {
Map idToField = new HashMap<>();
visitIdToField(type, idToField);
return idToField;
}
private void visitIdToField(Type type, Map index) {
switch (type.typeId()) {
case RECORD:
Types.RecordType record = (Types.RecordType) type;
for (Types.Field field : record.fields()) {
visitIdToField(field.type(), index);
index.put(field.fieldId(), field);
}
return;
case ARRAY:
Types.ArrayType array = (Types.ArrayType) type;
visitIdToField(array.elementType(), index);
for (Types.Field field : array.fields()) {
index.put(field.fieldId(), field);
}
return;
case MAP:
Types.MapType map = (Types.MapType) type;
visitIdToField(map.keyType(), index);
visitIdToField(map.valueType(), index);
for (Types.Field field : map.fields()) {
index.put(field.fieldId(), field);
}
return;
default:
return;
}
}
/**
* Build a mapping which maintain the relation between child field id and it's parent field id.
* if a child field y(which id is 9) belong to a nest field x(which id is 6), then (9 -> 6) will be added to the result map.
* if a field has no parent field, nothings will be added.
*
* @param record hoodie record type.
* @return a mapping from id to parentId for a record Type
*/
public Map index2Parents(Types.RecordType record) {
Map result = new HashMap<>();
Deque parentIds = new LinkedList<>();
index2Parents(record, parentIds, result);
return result;
}
private void index2Parents(Type type, Deque pids, Map id2p) {
switch (type.typeId()) {
case RECORD:
Types.RecordType record = (Types.RecordType)type;
for (Types.Field f : record.fields()) {
pids.push(f.fieldId());
index2Parents(f.type(), pids, id2p);
pids.pop();
}
for (Types.Field f : record.fields()) {
// root record has no parent id.
if (!pids.isEmpty()) {
Integer pid = pids.peek();
id2p.put(f.fieldId(), pid);
}
}
return;
case ARRAY:
Types.ArrayType array = (Types.ArrayType) type;
Types.Field elementField = array.field(array.elementId());
pids.push(elementField.fieldId());
index2Parents(elementField.type(), pids, id2p);
pids.pop();
id2p.put(array.elementId(), pids.peek());
return;
case MAP:
Types.MapType map = (Types.MapType) type;
Types.Field keyField = map.field(map.keyId());
Types.Field valueField = map.field(map.valueId());
// visit key
pids.push(map.keyId());
index2Parents(keyField.type(), pids, id2p);
pids.pop();
// visit value
pids.push(map.valueId());
index2Parents(valueField.type(), pids, id2p);
pids.pop();
id2p.put(map.keyId(), pids.peek());
id2p.put(map.valueId(), pids.peek());
return;
default:
}
}
/**
* Assigns new ids for all fields in a Type, based on initial id.
*
* @param type a type.
* @param nextId initial id which used to fresh ids for all fields in a type
* @return a new type with new ids
*/
public Type refreshNewId(Type type, AtomicInteger nextId) {
switch (type.typeId()) {
case RECORD:
Types.RecordType record = (Types.RecordType) type;
List oldFields = record.fields();
int currentId = nextId.get();
nextId.set(currentId + record.fields().size());
List internalFields = new ArrayList<>();
for (int i = 0; i < oldFields.size(); i++) {
Types.Field oldField = oldFields.get(i);
Type fieldType = refreshNewId(oldField.type(), nextId);
internalFields.add(Types.Field.get(currentId++, oldField.isOptional(), oldField.name(), fieldType, oldField.doc()));
}
return Types.RecordType.get(internalFields);
case ARRAY:
Types.ArrayType array = (Types.ArrayType) type;
int elementId = nextId.get();
nextId.set(elementId + 1);
Type elementType = refreshNewId(array.elementType(), nextId);
return Types.ArrayType.get(elementId, array.isElementOptional(), elementType);
case MAP:
Types.MapType map = (Types.MapType) type;
int keyId = nextId.get();
int valueId = keyId + 1;
nextId.set(keyId + 2);
Type keyType = refreshNewId(map.keyType(), nextId);
Type valueType = refreshNewId(map.valueType(), nextId);
return Types.MapType.get(keyId, valueId, keyType, valueType, map.isValueOptional());
default:
return type;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy