org.apache.parquet.proto.ProtoReadSupport Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.proto;
import com.google.protobuf.Message;
import com.twitter.elephantbird.util.Protobufs;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.Log;
import org.apache.parquet.hadoop.api.InitContext;
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.MessageType;
import java.util.Map;
/**
* @author Lukas Nalezenec
*/
public class ProtoReadSupport extends ReadSupport {
private static final Log LOG = Log.getLog(ProtoReadSupport.class);
public static final String PB_REQUESTED_PROJECTION = "parquet.proto.projection";
public static final String PB_CLASS = "parquet.proto.class";
public static final String PB_DESCRIPTOR = "parquet.proto.descriptor";
public static void setRequestedProjection(Configuration configuration, String requestedProjection) {
configuration.set(PB_REQUESTED_PROJECTION, requestedProjection);
}
/**
* Set name of protobuf class to be used for reading data.
* If no class is set, value from file header is used.
* Note that the value in header is present only if the file was written
* using parquet-protobuf project, it will fail otherwise.
* */
public static void setProtobufClass(Configuration configuration, String protobufClass) {
configuration.set(PB_CLASS, protobufClass);
}
@Override
public ReadContext init(InitContext context) {
String requestedProjectionString = context.getConfiguration().get(PB_REQUESTED_PROJECTION);
if (requestedProjectionString != null && !requestedProjectionString.trim().isEmpty()) {
MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), requestedProjectionString);
LOG.debug("Reading data with projection " + requestedProjection);
return new ReadContext(requestedProjection);
} else {
MessageType fileSchema = context.getFileSchema();
LOG.debug("Reading data with schema " + fileSchema);
return new ReadContext(fileSchema);
}
}
@Override
public RecordMaterializer prepareForRead(Configuration configuration, Map keyValueMetaData, MessageType fileSchema, ReadContext readContext) {
String headerProtoClass = keyValueMetaData.get(PB_CLASS);
String configuredProtoClass = configuration.get(PB_CLASS);
if (configuredProtoClass != null) {
LOG.debug("Replacing class " + headerProtoClass + " by " + configuredProtoClass);
headerProtoClass = configuredProtoClass;
}
if (headerProtoClass == null) {
throw new RuntimeException("I Need parameter " + PB_CLASS + " with Protocol Buffer class");
}
LOG.debug("Reading data with Protocol Buffer class " + headerProtoClass);
MessageType requestedSchema = readContext.getRequestedSchema();
Class extends Message> protobufClass = Protobufs.getProtobufClass(headerProtoClass);
return new ProtoRecordMaterializer(requestedSchema, protobufClass);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy