![JAR search and dependency download from the Maven repository](/logo.png)
water.parser.avro.AvroUtil Maven / Gradle / Ivy
package water.parser.avro;
import org.apache.avro.Schema;
import java.util.Arrays;
import java.util.List;
import water.fvec.Vec;
/**
* Utilities to work with Avro schema.
*/
public final class AvroUtil {
/** Return true if the given schema can be transformed
* into h2o type.
*
* @param s avro field schema
* @return true if the schema can be transformed into H2O type
*/
public static boolean isSupportedSchema(Schema s) {
Schema.Type typ = s.getType();
switch (typ) {
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case ENUM:
case STRING:
case NULL:
case BYTES:
return true;
case UNION: // Flattenize the union
List unionSchemas = s.getTypes();
if (unionSchemas.size() == 1) {
return isSupportedSchema(unionSchemas.get(0));
} else if (unionSchemas.size() == 2) {
Schema s1 = unionSchemas.get(0);
Schema s2 = unionSchemas.get(1);
return s1.getType().equals(Schema.Type.NULL) && isSupportedSchema(s2)
|| s2.getType().equals(Schema.Type.NULL) && isSupportedSchema(s1);
}
default:
return false;
}
}
/**
* Transform Avro schema into H2O type.
*
* @param s avro schema
* @return a byte representing H2O column type
* @throws IllegalArgumentException if schema is not supported
*/
public static byte schemaToColumnType(Schema s) {
Schema.Type typ = s.getType();
switch (typ) {
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
return Vec.T_NUM;
case ENUM:
return Vec.T_CAT;
case STRING:
return Vec.T_STR;
case NULL:
return Vec.T_BAD;
case BYTES:
return Vec.T_STR;
case UNION: // Flattenize the union
List unionSchemas = s.getTypes();
if (unionSchemas.size() == 1) {
return schemaToColumnType(unionSchemas.get(0));
} else if (unionSchemas.size() == 2) {
Schema s1 = unionSchemas.get(0);
Schema s2 = unionSchemas.get(1);
if (s1.getType().equals(Schema.Type.NULL)) return schemaToColumnType(s2);
else if (s2.getType().equals(Schema.Type.NULL)) return schemaToColumnType(s1);
}
default:
throw new IllegalArgumentException("Unsupported Avro schema type: " + s);
}
}
static String[] getDomain(Schema fieldSchema) {
if (fieldSchema.getType() == Schema.Type.ENUM) {
return fieldSchema.getEnumSymbols().toArray(new String[] {});
} else if (fieldSchema.getType() == Schema.Type.UNION) {
List unionSchemas = fieldSchema.getTypes();
if (unionSchemas.size() == 1) {
return getDomain(unionSchemas.get(0));
} else if (unionSchemas.size() == 2) {
Schema s1 = unionSchemas.get(0);
Schema s2 = unionSchemas.get(1);
if (s1.getType() == Schema.Type.NULL) return getDomain(s2);
else if (s2.getType() == Schema.Type.NULL) return getDomain(s1);
}
}
throw new IllegalArgumentException("Cannot get domain from field: " + fieldSchema);
}
/**
* Transform Avro schema into its primitive representation.
*
* @param s avro schema
* @return primitive type as a result of transformation
* @throws IllegalArgumentException if the schema has no primitive transformation
*/
public static Schema.Type toPrimitiveType(Schema s) {
Schema.Type typ = s.getType();
switch(typ) {
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case ENUM:
case STRING:
case NULL:
case BYTES:
return typ;
case UNION:
List unionSchemas = s.getTypes();
if (unionSchemas.size() == 1) {
return toPrimitiveType(unionSchemas.get(0));
} else if (unionSchemas.size() == 2) {
Schema s1 = unionSchemas.get(0);
Schema s2 = unionSchemas.get(1);
if (s1.getType().equals(Schema.Type.NULL)) return toPrimitiveType(s2);
else if (s2.getType().equals(Schema.Type.NULL)) return toPrimitiveType(s1);
}
default:
throw new IllegalArgumentException("Unsupported Avro schema type: " + s);
}
}
/**
* The method "flattenize" the given Avro schema.
* @param s Avro schema
* @return List of supported fields which were extracted from original Schema
*/
public static Schema.Field[] flatSchema(Schema s) {
List fields = s.getFields();
Schema.Field[] flatSchema = new Schema.Field[fields.size()];
int cnt = 0;
for (Schema.Field f : fields) {
if (isSupportedSchema(f.schema())) {
flatSchema[cnt] = f;
cnt++;
}
}
// Return resized array
return cnt != flatSchema.length ? Arrays.copyOf(flatSchema, cnt) : flatSchema;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy