All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.proto.ProtoReadSupport Maven / Gradle / Ivy

/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.proto;

import com.google.protobuf.Message;
import com.twitter.elephantbird.util.Protobufs;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.Log;
import org.apache.parquet.hadoop.api.InitContext;
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.MessageType;

import java.util.Map;


/**
 * @author Lukas Nalezenec
 */
public class ProtoReadSupport extends ReadSupport {

  private static final Log LOG = Log.getLog(ProtoReadSupport.class);

  public static final String PB_REQUESTED_PROJECTION = "parquet.proto.projection";

  public static final String PB_CLASS = "parquet.proto.class";
  public static final String PB_DESCRIPTOR = "parquet.proto.descriptor";

  public static void setRequestedProjection(Configuration configuration, String requestedProjection) {
    configuration.set(PB_REQUESTED_PROJECTION, requestedProjection);
  }

  /**
   * Set name of protobuf class to be used for reading data.
   * If no class is set, value from file header is used.
   * Note that the value in header is present only if the file was written
   * using parquet-protobuf project, it will fail otherwise.
   * */
  public static void setProtobufClass(Configuration configuration, String protobufClass) {
    configuration.set(PB_CLASS, protobufClass);
  }

  @Override
  public ReadContext init(InitContext context) {
    String requestedProjectionString = context.getConfiguration().get(PB_REQUESTED_PROJECTION);

    if (requestedProjectionString != null && !requestedProjectionString.trim().isEmpty()) {
      MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), requestedProjectionString);
      LOG.debug("Reading data with projection " + requestedProjection);
      return new ReadContext(requestedProjection);
    } else {
      MessageType fileSchema = context.getFileSchema();
      LOG.debug("Reading data with schema " + fileSchema);
      return new ReadContext(fileSchema);
    }
  }

  @Override
  public RecordMaterializer prepareForRead(Configuration configuration, Map keyValueMetaData, MessageType fileSchema, ReadContext readContext) {
    String headerProtoClass = keyValueMetaData.get(PB_CLASS);
    String configuredProtoClass = configuration.get(PB_CLASS);

    if (configuredProtoClass != null) {
      LOG.debug("Replacing class " + headerProtoClass + " by " + configuredProtoClass);
      headerProtoClass = configuredProtoClass;
    }

    if (headerProtoClass == null) {
      throw new RuntimeException("I Need parameter " + PB_CLASS + " with Protocol Buffer class");
    }

    LOG.debug("Reading data with Protocol Buffer class " + headerProtoClass);

    MessageType requestedSchema = readContext.getRequestedSchema();
    Class protobufClass = Protobufs.getProtobufClass(headerProtoClass);
    return new ProtoRecordMaterializer(requestedSchema, protobufClass);
  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy