![JAR search and dependency download from the Maven repository](/logo.png)
com.wl4g.infra.common.dataformat.orc.FastJsonOrcHolder Maven / Gradle / Ivy
/*
* Copyright 2023 ~ 2030 the original author or authors. James Wong
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.wl4g.infra.common.dataformat.orc;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import lombok.NoArgsConstructor;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import javax.annotation.Nullable;
import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import static com.wl4g.infra.common.dataformat.FastJsonFlatUtil.flatten;
import static com.wl4g.infra.common.dataformat.FastJsonFlatUtil.unFlatten;
import static com.wl4g.infra.common.lang.Assert2.notNullOf;
import static com.wl4g.infra.common.lang.DateUtils2.formatDate;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.nonNull;
/**
* The {@link FastJsonOrcHolder} class provides conversion utilities between ORC and Fastjson.
*/
@NoArgsConstructor
public class FastJsonOrcHolder extends OrcJsonHolder {
private static final FastJsonOrcHolder DEFAULT = new FastJsonOrcHolder();
public static FastJsonOrcHolder getDefault() {
return DEFAULT;
}
private boolean useFlatSchema; // Flat mode fast but not general.
@SuppressWarnings("unused")
public FastJsonOrcHolder(boolean useFlatSchema) {
this.useFlatSchema = useFlatSchema;
}
@SuppressWarnings("unused")
public FastJsonOrcHolder(boolean usePhysicalFsWriter,
boolean useFlatSchema,
@Min(0) int batchMaxSize,
@Nullable String timestampFormat,
@Nullable Properties options) {
super(usePhysicalFsWriter, batchMaxSize, timestampFormat, options);
this.useFlatSchema = useFlatSchema;
}
// ----- Get ORC schema from JSON -----
@Override
public TypeDescription getSchema(@NotNull Object jsonNode) {
if (useFlatSchema) {
jsonNode = flatten((JSONObject) jsonNode);
}
return getSchemaFromJsonObject(jsonNode);
}
/**
* Get the ORC schema type for the given Fastjson json.
*
* @param jsonNode The Fastjson json
* @return The ORC schema type.
*/
@Override
protected TypeDescription getSchemaFromJsonObject(@NotNull Object jsonNode) {
notNullOf(jsonNode, "jsonNode");
if (jsonNode instanceof JSONObject) {
final TypeDescription structSchema = TypeDescription.createStruct();
for (Map.Entry entry : ((JSONObject) jsonNode).entrySet()) {
final Object subNode = entry.getValue();
if (subNode instanceof JSONArray) {
structSchema.addField(entry.getKey(), getListSchemaFromArrayNode((JSONArray) subNode));
} else if (subNode instanceof JSONObject) {
structSchema.addField(entry.getKey(), getSchemaFromJsonObject(subNode));
} else {
structSchema.addField(entry.getKey(), getPrimitiveTypeDescription(subNode));
}
}
return structSchema;
} else if (jsonNode instanceof JSONArray) {
return getListSchemaFromArrayNode(((JSONArray) jsonNode));
} else {
return getPrimitiveTypeDescription(jsonNode);
}
}
/**
* Get the ORC schema type for the given Fastjson node.
*
* @param value The Fastjson node
* @return The ORC schema type
*/
@SuppressWarnings("all")
private TypeDescription getPrimitiveTypeDescription(Object value) {
final Class> nodeType = value.getClass();
if (value instanceof JSONObject) {
return getSchemaFromJsonObject(value);
} else if (value instanceof JSONArray) {
return getListSchemaFromArrayNode((JSONArray) value);
} else if (value instanceof Collection) {
return getListSchemaFromArrayNode((JSONArray) value);
} else if (value instanceof Boolean || nodeType == boolean.class) {
return TypeDescription.createBoolean();
} else if (value instanceof Integer || nodeType == int.class) {
return TypeDescription.createInt();
} else if (value instanceof Long || nodeType == long.class) {
return TypeDescription.createLong();
} else if (value instanceof Float || nodeType == float.class) {
return TypeDescription.createFloat();
} else if (value instanceof Double || nodeType == double.class) {
return TypeDescription.createDouble();
} else if (value instanceof String) {
return TypeDescription.createString();
} else if (value instanceof Date || value instanceof java.sql.Date) {
return TypeDescription.createDate();
} else if (value instanceof BigDecimal) {
return TypeDescription.createDecimal();
} else {
//return TypeDescription.createBinary();
throw new IllegalArgumentException("Unsupported the FastJson node type: " + value);
}
}
// ----- Write ORC from JSON -----
@Override
protected byte[] toJsonByteArray(Object record) {
if (useFlatSchema) {
// Flatten json record to a single layer.
record = flatten(((JSONObject) record));
}
return JSON.toJSONString(record).getBytes(UTF_8);
}
// ----- Read ORC to JSON -----
@Override
protected RecordReader createRecordReader(FSDataInputStream orcInput,
int length,
TypeDescription schema,
@Nullable String timestampFormat) throws IOException {
return new FastJsonReader(orcInput, length, schema, timestampFormat);
}
@Override
protected Object createObjectJsonNode() {
return new JSONObject();
}
@Override
protected Iterable
© 2015 - 2025 Weber Informatics LLC | Privacy Policy