com.datatorrent.lib.appdata.schemas.DimensionalSchema Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.datatorrent.lib.appdata.schemas;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.apex.malhar.lib.dimensions.aggregator.AggregatorRegistry;
import org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/**
* The {@link DimensionalSchema} class represents the App Data dimensions schema. The App Data dimensions
* schema is built from two sources: a {@link DimensionalConfigurationSchema} and an optional schema stub. The
* {@link DimensionalConfigurationSchema} is responsible for defining the key, values, dimensions combinations,
* and the aggregations performed for each dimensions combination. The schema stub defines the from and to
* times for the App Data dimensions schema. For details on how to define the {@link DimensionalConfigurationSchema}
* schema please the documentation for the {@link DimensionalConfigurationSchema} class. An example of a valid
* schema stub which defines the from and to times is below:
*
*
* {@code
* {
* "time":
* {
* "from":1123455556656,
* "to":382390859384
* }
* }
*
* @since 3.1.0
*/
public class DimensionalSchema implements Schema
{
/**
* The type of the schema.
*/
public static final String SCHEMA_TYPE = "dimensions";
/**
* The version of the schema.
*/
public static final String SCHEMA_VERSION = "1.0";
/**
* The JSON key string corresponding to the from field.
*/
public static final String FIELD_TIME_FROM = "from";
/**
* The JSON key string corresponding to the time field.
*/
public static final String FIELD_TIME = "time";
/**
* The JSON key string corresponding to the to field.
*/
public static final String FIELD_TIME_TO = "to";
/**
* The JSON key string corresponding to the buckets field.
*/
public static final String FIELD_TIME_BUCKETS = "buckets";
/**
* The JSON key string corresponding to the slidingAggregateSupported field.
*/
public static final String FIELD_SLIDING_AGGREGATE_SUPPORTED = "slidingAggregateSupported";
/**
* The JSON key string used to identify the tags.
*/
//TODO To be removed when Malhar Library 3.3 becomes a dependency.
private static final String FIELD_TAGS = "tags";
public static final List VALID_KEYS = ImmutableList.of(new Fields(Sets.newHashSet(FIELD_TIME)));
public static final List VALID_TIME_KEYS = ImmutableList.of(
new Fields(Sets.newHashSet(FIELD_TIME_FROM, FIELD_TIME_TO)));
public static final String FIELD_RESPONSE_DELAY_MILLS = "responseDelayMillis";
/**
* The from value for the schema. Null if there is no from value.
*/
private Long from;
/**
* The to value for the schema. Null if there is no to value.
*/
private Long to;
/**
* boolean flag indicating if any values in the schema have been changed.
*/
private boolean changed = false;
/**
* boolean flag indicating if the from to fields in the schema have been changed.
*/
private boolean changedFromTo = false;
/**
* boolean flag indicating if the schema keys have been updated for the schema.
*/
private boolean changedSchemaKeys = false;
/**
* boolean flag indicating if the enum vals are updated.
*/
private boolean areEnumsUpdated = false;
/**
* The AppData schema JSON string (which is returned in the schema query).
*/
private String schemaJSON;
/**
* The {@link DimensionalConfigurationSchema} from which this {@link DimensionalSchema} was constructed.
*/
private DimensionalConfigurationSchema configurationSchema;
/**
* The {@link JSONObject} representing the AppData dimensions schema.
*/
private JSONObject schema;
/**
* The {@link JSONObject} representing the time section of the AppData dimensions schema.
*/
private JSONObject time;
/**
* The {@link JSONObject} representing the keys section of the AppData dimensions schema.
*/
private JSONArray keys;
/**
* This flag is true if there was a from and to time defined for this schema initially.
*/
private boolean predefinedFromTo = false;
/**
* The schema keys for this schema.
*/
private Map schemaKeys;
/**
* The current enum vals for this schema.
*/
private Map> currentEnumVals;
/**
* The schemaID assigned to this schema. This schemaID is only needed for operators
* which need to host multiple schemas.
*/
private int schemaID = Schema.DEFAULT_SCHEMA_ID;
protected long responseDelayMillis;
/**
* Constructor for serialization
*/
private DimensionalSchema()
{
//For kryo
}
/**
* This creates a {@link DimensionalSchema} object from the given schema stub,
* configuration schema, and schema keys.
*
* @param schemaStub The schema stub to use when creating this {@link DimensionalSchema}.
* @param configurationSchema The configuration schema to use when creating this {@link DimensionalSchema}.
* @param schemaKeys The schemaKeys to use when creating this {@link DimensionalSchema}.
*/
public DimensionalSchema(String schemaStub,
DimensionalConfigurationSchema configurationSchema,
Map schemaKeys,
long responseDelayMillis)
{
this(configurationSchema,
schemaKeys, responseDelayMillis);
if (schemaStub != null) {
predefinedFromTo = true;
try {
setSchemaStub(schemaStub);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
/**
* This creates a {@link DimensionalSchema} object from the given schemaID, schemaStrub,configurationSchema, and
* schemaKeys.
*
* @param schemaID The schemaID assigned to this schema.
* @param schemaStub The schema stub to use when creating this {@link DimensionalSchema}.
* @param configurationSchema The configuration schema to use when creating this {@link DimensionalSchema}.
* @param schemaKeys The schemaKeys to use when creating this {@link DimensionalSchema}.
*/
public DimensionalSchema(int schemaID,
String schemaStub,
DimensionalConfigurationSchema configurationSchema,
Map schemaKeys)
{
this(schemaStub,
configurationSchema,
schemaKeys, 0);
this.schemaID = schemaID;
}
/**
* This creates a {@link DimensionalSchema} from the given schemaStub and configuration schema.
*
* @param schemaStub The schema stub to use when creating this {@link DimensionalSchema}.
* @param configurationSchema The configuration schema to use when creating this {@link DimensionalSchema}.
*/
public DimensionalSchema(String schemaStub,
DimensionalConfigurationSchema configurationSchema,
long responseDelayMillis)
{
this(schemaStub,
configurationSchema,
null, responseDelayMillis);
}
/**
* This creates a {@link DimensionalSchema} from the given schemaID, schemaStub, and
* configurationSchema.
*
* @param schemaID The schemaID assigned to this schema.
* @param schemaStub The schema stub to use when creating this {@link DimensionalSchema}.
* @param configurationSchema The configuration schema to use when creating this {@link DimensionalSchema}.
*/
public DimensionalSchema(int schemaID,
String schemaStub,
DimensionalConfigurationSchema configurationSchema,
long responseDelayMillis)
{
this(schemaStub,
configurationSchema,
responseDelayMillis);
this.schemaID = schemaID;
}
/**
* Creates a {@link DimensionalSchema} from the given configuration schema and schema keys.
*
* @param configurationSchema The configuration schema from which to construct this {@link DimensionalEventSchema}.
* @param schemaKeys The schemaKeys assigned to this schema.
*/
public DimensionalSchema(DimensionalConfigurationSchema configurationSchema,
Map schemaKeys, long responseDelayMillis)
{
setConfigurationSchema(configurationSchema);
setSchemaKeys(schemaKeys);
this.responseDelayMillis = responseDelayMillis;
try {
initialize();
} catch (JSONException e) {
throw new RuntimeException(e);
}
}
/**
* Creates a {@link DimensionalSchema} object from the given schemaID, configurationSchema,
* and schemaKeys.
*
* @param schemaID The schemaID assigned to this schema.
* @param configurationSchema The configuration schema from which this schema was constructed.
* @param schemaKeys The schema keys assigned to this schema.
*/
public DimensionalSchema(int schemaID,
DimensionalConfigurationSchema configurationSchema,
Map schemaKeys)
{
this(configurationSchema,
schemaKeys, 0);
this.schemaID = schemaID;
}
/**
* Creates a {@link DimensionalSchema} object from the given configuration schema.
*
* @param configurationSchema The configuration schema from which to construct this
* schema.
*/
public DimensionalSchema(DimensionalConfigurationSchema configurationSchema)
{
this(configurationSchema,
null, 0);
}
/**
* Creates a {@link DimensionalSchema} object with the given schema ID and
* configuration schema.
*
* @param schemaID The schemaID assigned to this schema.
* @param configurationSchema The configuration schema from which this schema as constructed.
*/
public DimensionalSchema(int schemaID,
DimensionalConfigurationSchema configurationSchema)
{
this(configurationSchema);
this.schemaID = schemaID;
}
/**
* Returns the aggregator registry assigned to this schema object.
*
* @return The aggregator registry.
*/
public AggregatorRegistry getAggregatorRegistry()
{
return configurationSchema.getAggregatorRegistry();
}
@Override
public final void setSchemaKeys(Map schemaKeys)
{
changed = true;
changedSchemaKeys = true;
if (schemaKeys == null) {
this.schemaKeys = null;
return;
}
for (Map.Entry entry : schemaKeys.entrySet()) {
Preconditions.checkNotNull(entry.getKey());
Preconditions.checkNotNull(entry.getValue());
}
this.schemaKeys = Maps.newHashMap(schemaKeys);
}
/**
* This is a helper method for setting the configuration schema.
*
* @param configurationSchema The configuration schema.
*/
private void setConfigurationSchema(DimensionalConfigurationSchema configurationSchema)
{
this.configurationSchema = Preconditions.checkNotNull(configurationSchema, "eventSchema");
}
/**
* This is a helper method extracts and validates the information contained in the schema stub for this schema.
*
* @param schemaStub The schema stub to extract information from and validate.
* @throws JSONException This exception is thrown if there is an error processing the provided JSON schemaStub.
*/
private void setSchemaStub(String schemaStub) throws JSONException
{
JSONObject jo = new JSONObject(schemaStub);
SchemaUtils.checkValidKeysEx(jo, VALID_KEYS);
JSONObject tempTime = jo.getJSONObject(FIELD_TIME);
SchemaUtils.checkValidKeys(jo, VALID_TIME_KEYS);
this.from = tempTime.getLong(FIELD_TIME_FROM);
this.to = tempTime.getLong(FIELD_TIME_TO);
}
/**
* Initializes the schema JSON and schema metadata.
*
* @throws JSONException This exception is thrown when there is an
* exception building the schema for the AppData dimensions schema.
*/
private void initialize() throws JSONException
{
schema = new JSONObject();
if (schemaKeys != null) {
schema.put(Schema.FIELD_SCHEMA_KEYS,
SchemaUtils.createJSONObject(schemaKeys));
}
schema.put(SnapshotSchema.FIELD_SCHEMA_TYPE, DimensionalSchema.SCHEMA_TYPE);
schema.put(SnapshotSchema.FIELD_SCHEMA_VERSION, DimensionalSchema.SCHEMA_VERSION);
//responseDelayMillis
if (responseDelayMillis > 0) {
schema.put(FIELD_RESPONSE_DELAY_MILLS, responseDelayMillis);
}
if (!configurationSchema.getTags().isEmpty()) {
schema.put(FIELD_TAGS, new JSONArray(configurationSchema.getTags()));
}
//time
time = new JSONObject();
schema.put(FIELD_TIME, time);
JSONArray bucketsArray = new JSONArray(configurationSchema.getBucketsString());
time.put(FIELD_TIME_BUCKETS, bucketsArray);
time.put(FIELD_SLIDING_AGGREGATE_SUPPORTED, true);
//keys
keys = new JSONArray(configurationSchema.getKeysString());
for (int keyIndex = 0; keyIndex < keys.length(); keyIndex++) {
JSONObject keyJo = keys.getJSONObject(keyIndex);
String keyName = keyJo.getString(DimensionalConfigurationSchema.FIELD_KEYS_NAME);
List tags = configurationSchema.getKeyToTags().get(keyName);
if (!tags.isEmpty()) {
keyJo.put(FIELD_TAGS, new JSONArray(tags));
}
}
schema.put(DimensionalConfigurationSchema.FIELD_KEYS, keys);
//values
JSONArray values = new JSONArray();
schema.put(SnapshotSchema.FIELD_VALUES, values);
FieldsDescriptor inputValuesDescriptor = configurationSchema.getInputValuesDescriptor();
Map> allValueToAggregator = configurationSchema.getSchemaAllValueToAggregatorToType();
for (Map.Entry> entry : allValueToAggregator.entrySet()) {
String valueName = entry.getKey();
for (Map.Entry entryAggType : entry.getValue().entrySet()) {
String aggregatorName = entryAggType.getKey();
Type outputValueType = entryAggType.getValue();
JSONObject value = new JSONObject();
String combinedName = valueName +
DimensionalConfigurationSchema.ADDITIONAL_VALUE_SEPERATOR +
aggregatorName;
value.put(SnapshotSchema.FIELD_VALUES_NAME, combinedName);
value.put(SnapshotSchema.FIELD_VALUES_TYPE, outputValueType.getName());
List tags = configurationSchema.getValueToTags().get(valueName);
if (!tags.isEmpty()) {
value.put(FIELD_TAGS, new JSONArray(tags));
}
values.put(value);
}
}
JSONArray dimensions = new JSONArray();
for (int combinationID = 0;
combinationID < configurationSchema.getDimensionsDescriptorIDToKeys().size();
combinationID++) {
//TODO: the auto-generated combination for computation of composite aggregator will be added.
//should remove it.
Fields fields = configurationSchema.getDimensionsDescriptorIDToKeys().get(combinationID);
Map> fieldToAggregatorAdditionalValues =
configurationSchema.getDimensionsDescriptorIDToFieldToAggregatorAdditionalValues().get(combinationID);
JSONObject combination = new JSONObject();
JSONArray combinationArray = new JSONArray();
for (String field : fields.getFields()) {
combinationArray.put(field);
}
combination.put(DimensionalConfigurationSchema.FIELD_DIMENSIONS_COMBINATIONS, combinationArray);
if (!fieldToAggregatorAdditionalValues.isEmpty()) {
JSONArray additionalValueArray = new JSONArray();
for (Map.Entry> entry : fieldToAggregatorAdditionalValues.entrySet()) {
String valueName = entry.getKey();
for (String aggregatorName : entry.getValue()) {
JSONObject additionalValueObject = new JSONObject();
String combinedName = valueName
+ DimensionalConfigurationSchema.ADDITIONAL_VALUE_SEPERATOR
+ aggregatorName;
Type inputValueType = inputValuesDescriptor.getType(valueName);
if (!configurationSchema.getAggregatorRegistry().isAggregator(aggregatorName)) {
if (aggregatorName == null) {
LOG.error("{} is not a valid aggregator.", aggregatorName);
}
}
Type outputValueType;
if (configurationSchema.getAggregatorRegistry().isIncrementalAggregator(aggregatorName)) {
IncrementalAggregator aggregator
= configurationSchema.getAggregatorRegistry().getNameToIncrementalAggregator().get(aggregatorName);
outputValueType = aggregator.getOutputType(inputValueType);
} else {
outputValueType = configurationSchema.getAggregatorRegistry().getNameToOTFAggregators().get(
aggregatorName).getOutputType();
}
additionalValueObject.put(DimensionalConfigurationSchema.FIELD_VALUES_NAME, combinedName);
additionalValueObject.put(DimensionalConfigurationSchema.FIELD_VALUES_TYPE, outputValueType.getName());
additionalValueArray.put(additionalValueObject);
}
}
combination.put(DimensionalConfigurationSchema.FIELD_DIMENSIONS_ADDITIONAL_VALUES, additionalValueArray);
}
dimensions.put(combination);
}
schema.put(DimensionalConfigurationSchema.FIELD_DIMENSIONS, dimensions);
this.schemaJSON = this.schema.toString();
}
/**
* Sets the from time for the schema.
*
* @param from The from time for the schema.
*/
public void setFrom(Long from)
{
this.from = from;
changed = true;
changedFromTo = true;
}
/**
* Sets the to time for the schema.
*
* @param to The to time for the schema.
*/
public void setTo(Long to)
{
this.to = to;
changed = true;
changedFromTo = true;
}
/**
* Sets the new enum lists for this schema. The sets in the provided maps are converted into lists.
*
* @param enums The new enum sets for this schema.
*/
public void setEnumsSet(Map> enums)
{
Preconditions.checkNotNull(enums);
areEnumsUpdated = true;
Map> enumsList = Maps.newHashMap();
//Check that all the given keys are valid
Preconditions.checkArgument(
configurationSchema.getKeyDescriptor().getFields().getFields().containsAll(enums.keySet()),
"The given map doesn't contain valid keys. Valid keys are %s and the provided keys are %s",
configurationSchema.getKeyDescriptor().getFields().getFields(),
enums.keySet());
//Todo check the type of the objects, for now just set them on the enum.
for (Map.Entry> entry : enums.entrySet()) {
String name = entry.getKey();
Set
© 2015 - 2025 Weber Informatics LLC | Privacy Policy