All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.common.internal.io.AbstractSchemaGenerator Maven / Gradle / Ivy

/*
 * Copyright © 2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.common.internal.io;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.reflect.TypeToken;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.net.URI;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

/**
 * An abstract base class for generating schema. It knows how to generate
 * most of the supported data type, except record (bean class) type, which
 * it delegates to child class.
 */
public abstract class AbstractSchemaGenerator implements SchemaGenerator {
  private static final Logger LOG = LoggerFactory.getLogger(AbstractSchemaGenerator.class);

  /**
   * Mapping Java types into Schemas for simple data types.
   */
  private static final Map, Schema> SIMPLE_SCHEMAS =
    ImmutableMap., Schema>builder()
      .put(Boolean.TYPE, Schema.of(Schema.Type.BOOLEAN))
      .put(Byte.TYPE, Schema.of(Schema.Type.INT))
      .put(Character.TYPE, Schema.of(Schema.Type.INT))
      .put(Short.TYPE, Schema.of(Schema.Type.INT))
      .put(Integer.TYPE, Schema.of(Schema.Type.INT))
      .put(Long.TYPE, Schema.of(Schema.Type.LONG))
      .put(Float.TYPE, Schema.of(Schema.Type.FLOAT))
      .put(Double.TYPE, Schema.of(Schema.Type.DOUBLE))

      .put(Boolean.class, Schema.of(Schema.Type.BOOLEAN))
      .put(Byte.class, Schema.of(Schema.Type.INT))
      .put(Character.class, Schema.of(Schema.Type.INT))
      .put(Short.class, Schema.of(Schema.Type.INT))
      .put(Integer.class, Schema.of(Schema.Type.INT))
      .put(Long.class, Schema.of(Schema.Type.LONG))
      .put(Float.class, Schema.of(Schema.Type.FLOAT))
      .put(Double.class, Schema.of(Schema.Type.DOUBLE))

      .put(String.class, Schema.of(Schema.Type.STRING))
      .put(byte[].class, Schema.of(Schema.Type.BYTES))
      .put(ByteBuffer.class, Schema.of(Schema.Type.BYTES))

      // Some extra ones for some common build-in types. Need corresponding handling in DatumReader/Writer
      .put(URI.class, Schema.of(Schema.Type.STRING))
      .put(URL.class, Schema.of(Schema.Type.STRING))
      .put(UUID.class, Schema.of(Schema.Type.BYTES))
      .build();

  @Override
  public final Schema generate(Type type) throws UnsupportedTypeException {
    return generate(type, true);
  }

  @Override
  public final Schema generate(Type type, boolean acceptRecursiveTypes) throws UnsupportedTypeException {
    Set knownRecords = ImmutableSet.of();
    return doGenerate(TypeToken.of(type), knownRecords, acceptRecursiveTypes);
  }

  /**
   * Actual schema generation. It recursively resolves container types.
   *
   * @param typeToken    Encapsulate the Java type for generating a {@link Schema}.
   * @param knownRecords Set of record names that has the schema already generated. It is used for
   *                     recursive class field references.
   * @param acceptRecursion Whether to tolerate type recursion. If false, will throw UnsupportedTypeException if
   *                        a recursive type is encountered.
   * @return A {@link Schema} representing the given java {@link java.lang.reflect.Type}.
   * @throws UnsupportedTypeException Indicates schema generation is not support for the given java
   *                                  {@link java.lang.reflect.Type}.
   */
  @SuppressWarnings("unchecked")
  protected final Schema doGenerate(TypeToken typeToken, Set knownRecords, boolean acceptRecursion)
    throws UnsupportedTypeException {
    Type type = typeToken.getType();
    Class rawType = typeToken.getRawType();

    if (SIMPLE_SCHEMAS.containsKey(rawType)) {
      return SIMPLE_SCHEMAS.get(rawType);
    }

    // Enum type, simply use all the enum constants for ENUM schema.
    if (rawType.isEnum()) {
      return Schema.enumWith((Class>) rawType);
    }

    // Java array, use ARRAY schema.
    if (rawType.isArray()) {
      Schema componentSchema = doGenerate(TypeToken.of(rawType.getComponentType()), knownRecords, acceptRecursion);
      if (rawType.getComponentType().isPrimitive()) {
        return Schema.arrayOf(componentSchema);
      }
      return Schema.arrayOf(Schema.unionOf(componentSchema, Schema.of(Schema.Type.NULL)));
    }

    if (!(type instanceof Class || type instanceof ParameterizedType)) {
      throw new UnsupportedTypeException("Type " + type + " is not supported. " +
                                         "Only Class or ParameterizedType are supported.");
    }

    // Any parameterized Collection class would be represented by ARRAY schema.
    if (Collection.class.isAssignableFrom(rawType)) {
      if (!(type instanceof ParameterizedType)) {
        throw new UnsupportedTypeException("Only supports parameterized Collection type.");
      }
      TypeToken componentType = typeToken.resolveType(((ParameterizedType) type).getActualTypeArguments()[0]);
      Schema componentSchema = doGenerate(componentType, knownRecords, acceptRecursion);
      return Schema.arrayOf(Schema.unionOf(componentSchema, Schema.of(Schema.Type.NULL)));
    }

    // Java Map, use MAP schema.
    if (Map.class.isAssignableFrom(rawType)) {
      if (!(type instanceof ParameterizedType)) {
        throw new UnsupportedTypeException("Only supports parameterized Map type.");
      }
      Type[] typeArgs = ((ParameterizedType) type).getActualTypeArguments();
      TypeToken keyType = typeToken.resolveType(typeArgs[0]);
      TypeToken valueType = typeToken.resolveType(typeArgs[1]);

      Schema valueSchema = doGenerate(valueType, knownRecords, acceptRecursion);

      return Schema.mapOf(doGenerate(keyType, knownRecords, acceptRecursion),
                          Schema.unionOf(valueSchema, Schema.of(Schema.Type.NULL)));
    }

    // Any Java class, class name as the record name.
    String recordName = typeToken.getRawType().getName();
    if (knownRecords.contains(recordName)) {
      // Record already seen before
      if (acceptRecursion) {
        // simply create a reference RECORD schema by the name.
        return Schema.recordOf(recordName);
      } else {
        throw new UnsupportedTypeException("Recursive type not supported for class " + recordName);
      }
    }

    // Delegate to child class to generate RECORD schema.
    return generateRecord(typeToken, knownRecords, acceptRecursion);
  }

  /**
   * Generates a RECORD schema of the given type.
   *
   * @param typeToken Type of the record.
   * @param knownRecords Set of record names that schema has already been generated.
   * @param acceptRecursiveTypes Whether to tolerate type recursion. If false, will throw UnsupportedTypeException if
   *                             a recursive type is encountered.
   * @return An instance of {@link Schema}
   * @throws UnsupportedTypeException
   */
  protected abstract Schema generateRecord(TypeToken typeToken,
                                           Set knownRecords,
                                           boolean acceptRecursiveTypes) throws UnsupportedTypeException;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy