All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.bullet.dsl.converter.AvroBulletRecordConverter Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright 2018, Yahoo Inc.
 *  Licensed under the terms of the Apache License, Version 2.0.
 *  See the LICENSE file associated with the project for terms.
 */
package com.yahoo.bullet.dsl.converter;

import com.yahoo.bullet.common.BulletConfig;
import com.yahoo.bullet.dsl.BulletDSLConfig;
import com.yahoo.bullet.dsl.BulletDSLException;
import com.yahoo.bullet.dsl.schema.BulletRecordField;
import com.yahoo.bullet.record.BulletRecord;
import com.yahoo.bullet.typesystem.TypedObject;
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
 * AvroBulletRecordConverter converts an Avro record into a BulletRecord. The Avro record must use {@link String} as its
 * string type.
 * 

* If a schema is not specified, avro records are effectively flattened without any regard to type-safety. *

* Note, this class is not related to {@link com.yahoo.bullet.record.avro.TypedAvroBulletRecord} or * {@link com.yahoo.bullet.record.avro.UntypedAvroBulletRecord}. */ @Slf4j public class AvroBulletRecordConverter extends BulletRecordConverter { private static final long serialVersionUID = -5066600942303615002L; protected boolean runStringFixer; /** * Constructs an AvroBulletRecordConverter without a schema. * * @throws BulletDSLException if there is an error creating the converter. */ public AvroBulletRecordConverter() throws BulletDSLException { super(null); build(); } /** * Constructs an AvroBulletRecordConverter from a given schema. * * @param schema A schema file that specifies the fields to extract and their types. * @throws BulletDSLException if there is an error creating the converter. */ public AvroBulletRecordConverter(String schema) throws BulletDSLException { super(null); Objects.requireNonNull(schema); config.set(BulletDSLConfig.RECORD_CONVERTER_SCHEMA_FILE, schema); config.validate(); build(); } /** * Constructs an AvroBulletRecordConverter from a given configuration. * * @param bulletConfig The configuration that specifies the settings for an AvroBulletRecordConverter. * @throws BulletDSLException if there is an error creating the converter. */ public AvroBulletRecordConverter(BulletConfig bulletConfig) throws BulletDSLException { super(bulletConfig); build(); } @Override protected BulletRecordConverter build() throws BulletDSLException { BulletRecordConverter converter = super.build(); runStringFixer = config.getAs(BulletDSLConfig.RECORD_CONVERTER_AVRO_STRING_TYPE_FIX_ENABLE, Boolean.class); return converter; } @Override @SuppressWarnings("unchecked") public BulletRecord convert(Object object, BulletRecord record) throws BulletDSLException { if (schema != null) { return super.convert(object, record); } // No Bullet DSL schema GenericRecord avro = (GenericRecord) object; return convertGenericRecord(avro, avro.getSchema(), record); } /** * Converts a {@link GenericRecord} with a {@link Schema} into the provided {@link BulletRecord}. * * @param genericRecord The {@link GenericRecord} to convert. * @param schema The {@link Schema} of the {@link GenericRecord}. * @param record The {@link BulletRecord} to place the fields into. * @return The {@link BulletRecord} with the added fields. */ protected BulletRecord convertGenericRecord(GenericRecord genericRecord, Schema schema, BulletRecord record) { for (Schema.Field field : schema.getFields()) { Object datum = genericRecord.get(field.pos()); if (datum != null) { Serializable value = runStringFixer ? fix(field.schema(), datum) : (Serializable) datum; record.typedSet(field.name(), new TypedObject(value)); } } return record; } @Override @SuppressWarnings("unchecked") protected void setField(BulletRecordField field, Object value, BulletRecord record) { if (field.getType() == null) { if (value instanceof Map) { flattenMap((Map) value, record); } else { flattenRecord((GenericRecord) value, record); } } else { super.setField(field, value, record); } } @Override protected Object get(Object object, String base) { return ((GenericRecord) object).get(base); } @Override @SuppressWarnings("unchecked") protected Object getField(Object object, String field) { if (object instanceof GenericRecord) { return ((GenericRecord) object).get(field); } return super.getField(object, field); } /** * Fixes data, if necessary, to ensure that the datum is {@link Serializable}. * * @param fieldSchema The {@link Schema} of the datum. * @param datum The datum to fix. * @return The datum as a {@link Serializable}. */ protected Serializable fix(Schema fieldSchema, Object datum) { if (datum == null) { return null; } switch (fieldSchema.getType()) { case STRING: return datum.toString(); case UNION: return fixUnion(fieldSchema.getTypes(), datum); case MAP: return fixMap(fieldSchema.getValueType(), (Map) datum); case RECORD: return fixRecord(fieldSchema, (GenericRecord) datum); case ARRAY: return fixArray(fieldSchema.getElementType(), (List) datum); } return (Serializable) datum; } /** * Fixes data, if necessary, to ensure that the datum is {@link Serializable}. * * @param types The list of {@link Schema} of the datum. * @param datum The datum to fix. * @return The datum as a {@link Serializable}. */ protected Serializable fixUnion(List types, Object datum) { Serializable fixed = null; for (Schema schema : types) { Schema.Type type = schema.getType(); if (type == Schema.Type.NULL) { continue; } // Use the first non null type that works try { fixed = fix(schema, datum); } catch (Exception e) { log.error("Caught exception while processing Avro union: ", e); } } return fixed; } /** * Fixes a map, if necessary, to ensure that all fields and values in the map are {@link Serializable}. * * @param valueType The {@link Schema} of the values in the map. * @param value The {@link Map} to fix. * @return A map with all fields and values {@link Serializable}. */ protected Serializable fixMap(Schema valueType, Map value) { HashMap map = new HashMap<>(); value.forEach((k, v) -> map.put(k == null ? null : k.toString(), fix(valueType, v))); return map; } /** * Fixes an Avro record, if necessary, to ensure that all fields and values in the record are {@link Serializable}. * * @param schema The {@link Schema} of the record. * @param record The {@link GenericRecord} to fix. * @return A map representation of the record with all fields and values {@link Serializable}. */ protected Serializable fixRecord(Schema schema, GenericRecord record) { HashMap map = new HashMap<>(); schema.getFields().forEach(f -> map.put(f.name(), fix(f.schema(), record.get(f.pos())))); return map; } /** * Fixes an array, if necessary, to ensure that all elements of the array are {@link Serializable}. * * @param elementType The {@link Schema} of the elements in the array. * @param value The {@link List} representation of the array to fix. * @return A List with all elements {@link Serializable}. */ protected Serializable fixArray(Schema elementType, List value) { ArrayList list = new ArrayList<>(); value.forEach(e -> list.add(fix(elementType, e))); return list; } private void flattenRecord(GenericRecord genericRecord, BulletRecord record) { for (Schema.Field field : genericRecord.getSchema().getFields()) { String key = field.name(); Serializable value = (Serializable) genericRecord.get(field.pos()); if (value != null) { record.typedSet(key, new TypedObject(value)); } } } }