All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parquet.scrooge.ScroogeStructConverter Maven / Gradle / Ivy

The newest version!
/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package parquet.scrooge;

import com.twitter.scrooge.ThriftStructCodec;
import com.twitter.scrooge.ThriftStructFieldInfo;
import parquet.thrift.struct.ThriftField;
import parquet.thrift.struct.ThriftType;
import parquet.thrift.struct.ThriftType.StructType.StructOrUnionType;
import parquet.thrift.struct.ThriftTypeID;
import scala.collection.JavaConversions;
import scala.collection.JavaConversions$;
import scala.collection.Seq;
import scala.reflect.Manifest;

import java.lang.reflect.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import static parquet.thrift.struct.ThriftField.Requirement;
import static parquet.thrift.struct.ThriftField.Requirement.*;

/**
 * Class to convert a scrooge generated class to {@link ThriftType.StructType}. {@link ScroogeReadSupport } uses this
 * class to get the requested schema
 *
 * @author Tianshuo Deng
 */
public class ScroogeStructConverter {

  /**
   * convert a given scrooge generated class to {@link ThriftType.StructType}
   *
   * @param scroogeClass
   * @return
   * @throws Exception
   */
  public ThriftType.StructType convert(Class scroogeClass) {
    return convertStructFromClass(scroogeClass);
  }

  private Class getCompanionClass(Class klass) {
    try {
     return Class.forName(klass.getName() + "$");
    } catch (ClassNotFoundException e) {
      throw new ScroogeSchemaConversionException("Can not find companion object for scrooge class " + klass, e);
    }
  }

  private ThriftType.StructType convertStructFromClass(Class klass) {
    return convertCompanionClassToStruct(getCompanionClass(klass));
  }

  private ThriftType.StructType convertCompanionClassToStruct(Class companionClass) {
    ThriftStructCodec companionObject = null;
    try {
      companionObject = (ThriftStructCodec)companionClass.getField("MODULE$").get(null);
    } catch (ReflectiveOperationException e) {
      throw new ScroogeSchemaConversionException("Can not get ThriftStructCodec from companion object of " + companionClass.getName(), e);
    }

    List children = new LinkedList();//{@link ThriftType.StructType} uses foreach loop to iterate the children, yields O(n) time for linked list
    Iterable scroogeFields = getFieldInfos(companionObject);
    for (ThriftStructFieldInfo field : scroogeFields) {
      children.add(toThriftField(field));
    }

    StructOrUnionType structOrUnionType =
        isUnion(companionObject.getClass()) ? StructOrUnionType.UNION : StructOrUnionType.STRUCT;

    return new ThriftType.StructType(children, structOrUnionType);
  }

  private Iterable getFieldInfos(ThriftStructCodec c) {
    Class klass = c.getClass();
    if (isUnion(klass)){
      // Union needs special treatment since currently scrooge does not generates the fieldInfos
      // field in the parent union class
      return getFieldInfosForUnion(klass);
    } else {
      //each struct has a generated fieldInfos method to provide metadata to its fields
      try {
        Object r = klass.getMethod("fieldInfos").invoke(c);
        Iterable a = JavaConversions$.MODULE$.asJavaIterable((scala.collection.Iterable)r);
        return a;
      } catch (ReflectiveOperationException e) {
        throw new ScroogeSchemaConversionException("can not get field Info from: " + c.toString(), e);
      }
    }
  }


  private Iterable getFieldInfosForUnion(Class klass) {
    ArrayList fields = new ArrayList();
    for(Field f: klass.getDeclaredFields()){
       if (f.getType().equals(Manifest.class)) {
         Class unionClass = (Class)((ParameterizedType)f.getGenericType()).getActualTypeArguments()[0];
         Class companionUnionClass = getCompanionClass(unionClass);
         try {
           Object companionUnionObj = companionUnionClass.getField("MODULE$").get(null);
           ThriftStructFieldInfo info = (ThriftStructFieldInfo)companionUnionClass.getMethod("fieldInfo").invoke(companionUnionObj);
           fields.add(info);
         } catch (ReflectiveOperationException e) {
           throw new ScroogeSchemaConversionException("can not find fiedInfo for " + unionClass, e);
         }
      }
    }
    return fields;
  }

  private boolean isUnion(Class klass){
    for(Field f: klass.getDeclaredFields()) {
         if (f.getName().equals("Union"))
           return true;
    }
    return false;
  }


  private Requirement getRequirementType(ThriftStructFieldInfo f) {
    if (f.isOptional() && !f.isRequired()) {
      return OPTIONAL;
    } else if (f.isRequired() && !f.isOptional()) {
      return REQUIRED;
    } else if (!f.isOptional() && !f.isRequired()) {
      return DEFAULT;
    } else {
      throw new ScroogeSchemaConversionException("can not determine requirement type for : " + f.toString()
              + ", isOptional=" + f.isOptional() + ", isRequired=" + f.isRequired());
    }
  }

  /**
   * Convert thrift field in scrooge to ThriftField in parquet
   * Use reflection to detect if a field is optional or required since scrooge does not provide requirement information
   * in generated classes.
   * This will not correctly recognize fields that are not specified with a requirement type eg.
   * struct Address {
   * 1: string street
   * }
   * street will be identified as "REQUIRED"
   *
   * @param scroogeField
   * @return
   * @throws Exception
   */
  public ThriftField toThriftField(ThriftStructFieldInfo scroogeField) {
    Requirement requirement = getRequirementType(scroogeField);
    String fieldName = scroogeField.tfield().name;
    short fieldId = scroogeField.tfield().id;
    byte thriftTypeByte = scroogeField.tfield().type;
    ThriftTypeID typeId = ThriftTypeID.fromByte(thriftTypeByte);
    ThriftType thriftType;
    switch (typeId) {
    case BOOL:
      thriftType = new ThriftType.BoolType();
      break;
    case BYTE:
      thriftType = new ThriftType.ByteType();
      break;
    case DOUBLE:
      thriftType = new ThriftType.DoubleType();
      break;
    case I16:
      thriftType = new ThriftType.I16Type();
      break;
    case I32:
      thriftType = new ThriftType.I32Type();
      break;
    case I64:
      thriftType = new ThriftType.I64Type();
      break;
    case STRING:
      thriftType = new ThriftType.StringType();
      break;
    case STRUCT:
      thriftType = convertStructTypeField(scroogeField);
      break;
    case MAP:
      thriftType = convertMapTypeField(scroogeField, requirement);
      break;
    case SET:
      thriftType = convertSetTypeField(scroogeField, requirement);
      break;
    case LIST:
      thriftType = convertListTypeField(scroogeField, requirement);
      break;
    case ENUM:
      thriftType = convertEnumTypeField(scroogeField);
      break;
    case STOP:
    case VOID:
    default:
      throw new IllegalArgumentException("can't convert type " + typeId);
    }
    return new ThriftField(fieldName, fieldId, requirement, thriftType);
  }

  private ThriftType convertSetTypeField(ThriftStructFieldInfo f, Requirement requirement) {
    ThriftType elementType = convertClassToThriftType(f.valueManifest().get().runtimeClass());
    //Set only has one sub-field as element field, therefore using hard-coded 1 as fieldId,
    //it's the same as the solution used in ElephantBird
    ThriftField elementField = generateFieldWithoutId(f.tfield().name, requirement, elementType);
    return new ThriftType.SetType(elementField);
  }

  private ThriftType convertListTypeField(ThriftStructFieldInfo f, Requirement requirement) {
    ThriftType elementType = convertClassToThriftType(f.valueManifest().get().runtimeClass());
    ThriftField elementField = generateFieldWithoutId(f.tfield().name, requirement, elementType);
    return new ThriftType.ListType(elementField);
  }

  private ThriftType convertMapTypeField(ThriftStructFieldInfo f, Requirement requirement) {
    ThriftType keyType = convertClassToThriftType(f.keyManifest().get().runtimeClass());
    ThriftField keyField = generateFieldWithoutId(f.tfield().name + "_map_key", requirement, keyType);

    ThriftType valueType = convertClassToThriftType(f.valueManifest().get().runtimeClass());
    ThriftField valueField = generateFieldWithoutId(f.tfield().name + "_map_value", requirement, valueType);

    return new ThriftType.MapType(keyField, valueField);
  }

  /**
   * Generate artificial field, this kind of fields do not have a field ID.
   * To be consistent with the behavior in ElephantBird, here uses 1 as the field ID
   *
   * @param fieldName
   * @param requirement
   * @param thriftType
   * @return
   */
  private ThriftField generateFieldWithoutId(String fieldName, Requirement requirement, ThriftType thriftType) {
    return new ThriftField(fieldName, (short)1, requirement, thriftType);
  }

  /**
   * In composite types,  such as the type of the key in a map, since we use reflection to get the type class, this method
   * does conversion based on the class provided.
   *
   * @param typeClass
   * @return
   * @throws Exception
   */
  private ThriftType convertClassToThriftType(Class typeClass) {
    if (typeClass == boolean.class) {
      return new ThriftType.BoolType();
    } else if (typeClass == byte.class) {
      return new ThriftType.ByteType();
    } else if (typeClass == double.class) {
      return new ThriftType.DoubleType();
    } else if (typeClass == short.class) {
      return new ThriftType.I16Type();
    } else if (typeClass == int.class) {
      return new ThriftType.I32Type();
    } else if (typeClass == long.class) {
      return new ThriftType.I64Type();
    } else if (typeClass == String.class) {
      return new ThriftType.StringType();
    } else {
      return convertStructFromClass(typeClass);
    }
  }

  private ThriftType convertStructTypeField(ThriftStructFieldInfo f) {
    return convertStructFromClass(f.manifest().runtimeClass());
  }

  /**
   * When define an enum in scrooge, each enum value is a subclass of the enum class, the enum class could be Operation$
   */
  private List getEnumList(String enumName) throws ClassNotFoundException, IllegalAccessException, NoSuchFieldException, NoSuchMethodException, InvocationTargetException {
    enumName += "$";//In scala generated code, the actual class is ended with $
    Class companionObjectClass = Class.forName(enumName);
    Object cObject = companionObjectClass.getField("MODULE$").get(null);
    Method listMethod = companionObjectClass.getMethod("list", new Class[]{});
    Object result = listMethod.invoke(cObject, null);
    return JavaConversions.seqAsJavaList((Seq)result);
  }

  public ThriftType convertEnumTypeField(ThriftStructFieldInfo f) {
    List enumValues = new ArrayList();

    String enumName = f.manifest().runtimeClass().getName();
    try {
      List enumCollection = getEnumList(enumName);
      for (Object enumObj : enumCollection) {
        ScroogeEnumDesc enumDesc = ScroogeEnumDesc.getEnumDesc(enumObj);
        //be compatible with thrift generated enum which have capitalized name
        enumValues.add(new ThriftType.EnumValue(enumDesc.id, enumDesc.originalName));
      }
      return new ThriftType.EnumType(enumValues);
    } catch (ReflectiveOperationException e) {
      throw new ScroogeSchemaConversionException("Can not convert enum field " + f, e);
    }

  }

  private static class ScroogeEnumDesc {
    private int id;
    private String name;
    private String originalName;

    public static ScroogeEnumDesc getEnumDesc(Object rawScroogeEnum) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
      Class enumClass = rawScroogeEnum.getClass();
      Method valueMethod = enumClass.getMethod("value", new Class[]{});
      Method nameMethod = enumClass.getMethod("name", new Class[]{});
      Method originalNameMethod = enumClass.getMethod("originalName", new Class[]{});
      ScroogeEnumDesc result = new ScroogeEnumDesc();
      result.id = (Integer)valueMethod.invoke(rawScroogeEnum, null);
      result.name = (String)nameMethod.invoke(rawScroogeEnum, null);
      result.originalName = (String)originalNameMethod.invoke(rawScroogeEnum, null);
      return result;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy