All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.common.StructuredRecordToCubeFact Maven / Gradle / Ivy

There is a newer version: 2.12.3
Show newest version
/*
 * Copyright © 2015-2019 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.plugin.common;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import io.cdap.cdap.api.common.Bytes;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.dataset.lib.cube.CubeFact;
import io.cdap.cdap.api.dataset.lib.cube.MeasureType;
import io.cdap.cdap.api.dataset.lib.cube.Measurement;

import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;

/**
 * Transforms a {@link StructuredRecord} into a {@link CubeFact} object that can be written to a
 * {@link io.cdap.cdap.api.dataset.lib.cube.Cube} dataset.
 * 

* To configure transformation from {@link StructuredRecord} to a {@link CubeFact} the * mapping configuration can be provided for timestamp field and measurements. All fields from {@link StructuredRecord} * will be mapped to corresponded dimensions. *

* The source field for the timestamp can be provided with {@link Properties.Cube#FACT_TS_FIELD} property. * The value in the source field is assumed to be an epoch in milliseconds. For other formats you can use * {@link Properties.Cube#FACT_TS_FORMAT} property to specify the date format according to {@link SimpleDateFormat} * rules. If no {@link Properties.Cube##FACT_TS_FIELD} is provided, the current timestamp (at processing) is used. *

* To add a measurement to a {@link CubeFact} specify its type with the property * cubeFact.measurement.{@literal<}measurement_name>={@literal<}measurement_type>. Measurement name corresponds to a * field name in the {@link StructuredRecord} that contains its value. * Measurement type (specified in 'type' property) can be one of {@link io.cdap.cdap.api.dataset.lib.cube.MeasureType} * values. *

* Example of the configuration:

 cubeFact.timestamp.field=timeField
 cubeFact.timestamp.format=HH:mm:ss
 cubeFact.measurement.metric1=COUNTER
 cubeFact.measurement.metric2=GAUGE
*/ public class StructuredRecordToCubeFact { private final CubeFactBuilder factBuilder; public StructuredRecordToCubeFact(Map properties) { factBuilder = new CubeFactBuilder(properties); } public CubeFact transform(StructuredRecord record) throws Exception { Schema recordSchema = record.getSchema(); Preconditions.checkArgument(recordSchema.getType() == Schema.Type.RECORD, "input must be a record."); return factBuilder.build(record); } private static final class CubeFactBuilder { private final TimestampResolver timestampResolver; private final Collection measurementResolvers; CubeFactBuilder(Map properties) { Map props = new HashMap<>(properties); this.timestampResolver = new TimestampResolver(props); this.measurementResolvers = Lists.newArrayList(); for (Map.Entry property : props.entrySet()) { if (property.getKey().startsWith(Properties.Cube.MEASUREMENT_PREFIX)) { measurementResolvers.add(new MeasurementResolver(property.getKey(), property.getValue())); } } if (measurementResolvers.isEmpty()) { throw new IllegalArgumentException("At least one measurement must be specified with " + Properties.Cube.MEASUREMENT_PREFIX + "="); } } public CubeFact build(StructuredRecord record) { // we divide by 1000 to get seconds - which is expected by Cube CubeFact fact = new CubeFact(timestampResolver.getTimestamp(record) / 1000); addMeasurements(record, fact); for (Schema.Field field : record.getSchema().getFields()) { Object value = record.get(field.getName()); if (value != null) { String stringValue = getStringValue(field, value); if (stringValue != null) { fact.addDimensionValue(field.getName(), stringValue); } } } return fact; } private void addMeasurements(StructuredRecord record, CubeFact fact) { for (MeasurementResolver resolver : measurementResolvers) { Measurement measurement = resolver.getMeasurement(record); if (measurement != null) { fact.addMeasurement(measurement); } } } @Nullable private String getStringValue(Schema.Field field, Object value) { Schema.Type type = validateAndGetType(field); if (type == null) { return null; } String dimValue; switch (type) { case BYTES: if (value instanceof ByteBuffer) { dimValue = Bytes.toString((ByteBuffer) value); } else { dimValue = Bytes.toStringBinary((byte[]) value); } break; default: dimValue = value.toString(); } return dimValue; } } private static final class TimestampResolver { private final String srcField; private final DateFormat dateFormat; TimestampResolver(Map properties) { if (properties.containsKey(Properties.Cube.FACT_TS_FIELD)) { this.srcField = properties.get(Properties.Cube.FACT_TS_FIELD); if (properties.containsKey(Properties.Cube.FACT_TS_FORMAT)) { this.dateFormat = new SimpleDateFormat(properties.get(Properties.Cube.FACT_TS_FORMAT)); } else { this.dateFormat = null; } } else { this.srcField = null; this.dateFormat = null; } } public long getTimestamp(StructuredRecord record) { if (srcField == null) { return System.currentTimeMillis(); } Object val = record.get(srcField); if (val == null) { throw new IllegalArgumentException("Required field to determine timestamp is missing: " + srcField); } String valAsString = val.toString(); if (dateFormat != null) { try { return dateFormat.parse(valAsString).getTime(); } catch (ParseException e) { throw new IllegalArgumentException("Cannot parse field value to determine timestamp: " + valAsString, e); } } return Long.valueOf(valAsString); } } @Nullable private static Schema.Type validateAndGetType(Schema.Field field) { Schema.Type type; if (field.getSchema().isNullable()) { type = field.getSchema().getNonNullable().getType(); } else { type = field.getSchema().getType(); } // We only know how to convert simple types into String. Skipping the rest. if (!type.isSimpleType()) { return null; } return type; } private static final class MeasurementResolver { private String name; private MeasureType type; MeasurementResolver(String measureProperty, String measureType) { String measureName = measureProperty.substring(Properties.Cube.MEASUREMENT_PREFIX.length()); if ("".equals(measureName)) { throw new IllegalArgumentException( "Invalid property: " + measureProperty + ", measureName must be not empty"); } if (Strings.isNullOrEmpty(measureType)) { throw new IllegalArgumentException( "Invalid property: " + measureProperty + ", measureType must be not empty"); } this.name = measureName; this.type = MeasureType.valueOf(measureType); } @Nullable public Measurement getMeasurement(StructuredRecord record) { Long value = getValue(record); if (value == null) { return null; } return new Measurement(name, type, value); } private Long getValue(StructuredRecord record) { Object val = record.get(name); if (val != null) { return Double.valueOf(val.toString()).longValue(); } return null; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy