All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.logs.MultiWindowDimensionAggregation Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.logs;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.validation.constraints.NotNull;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang.mutable.MutableDouble;

import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.Operator;
import com.datatorrent.lib.util.KeyValPair;

/**
 * This class aggregates the value of given dimension across windows.
 * 

* @displayName Multi Window Dimension Aggregation * @category Stats and Aggregations * @tags aggregation * * @since 0.3.4 */ public class MultiWindowDimensionAggregation implements Operator { @SuppressWarnings("unused") private static final Logger logger = LoggerFactory.getLogger(MultiWindowDimensionAggregation.class); public enum AggregateOperation { SUM, AVERAGE } private int windowSize = 2; private int currentWindow = 0; private String timeBucket = "m"; private String dimensionKeyVal = "0"; private List dimensionArrayString; @NotNull private List dimensionArray; private AggregateOperation operationType = AggregateOperation.SUM; private Map>> outputMap; private Map>> cacheOject; private transient List patternList; private transient int applicationWindowSize = 500; /** * This is the output port which emits aggregated dimensions. */ public final transient DefaultOutputPort>> output = new DefaultOutputPort>>(); /** * This is the input port which receives multi dimensional data. */ public final transient DefaultInputPort>> data = new DefaultInputPort>>() { @Override public void process(Map> tuple) { cacheOject.put(currentWindow, tuple); for (Map.Entry> tupleEntry : tuple.entrySet()) { String tupleKey = tupleEntry.getKey(); Map tupleValue = tupleEntry.getValue(); int currentPattern = 0; for (Pattern pattern : patternList) { Matcher matcher = pattern.matcher(tupleKey); if (matcher.matches()) { String currentPatternString = dimensionArrayString.get(currentPattern); Map> currentPatternMap = outputMap.get(currentPatternString); if (currentPatternMap == null) { currentPatternMap = new HashMap>(); outputMap.put(currentPatternString, currentPatternMap); } StringBuilder builder = new StringBuilder(matcher.group(2)); for (int i = 1; i < dimensionArray.get(currentPattern).length; i++) { builder.append("," + matcher.group(i + 2)); } KeyValPair currentDimensionKeyValPair = currentPatternMap.get(builder.toString()); if (currentDimensionKeyValPair == null) { currentDimensionKeyValPair = new KeyValPair(new MutableDouble(tupleValue.get(dimensionKeyVal)), 1); currentPatternMap.put(builder.toString(), currentDimensionKeyValPair); } else { currentDimensionKeyValPair.getKey().add(tupleValue.get(dimensionKeyVal)); currentDimensionKeyValPair.setValue(currentDimensionKeyValPair.getValue() + 1); } break; } currentPattern++; } } } }; public String getDimensionKeyVal() { return dimensionKeyVal; } public void setDimensionKeyVal(String dimensionKeyVal) { this.dimensionKeyVal = dimensionKeyVal; } public String getTimeBucket() { return timeBucket; } public void setTimeBucket(String timeBucket) { this.timeBucket = timeBucket; } public List getDimensionArray() { return dimensionArray; } public void setDimensionArray(List dimensionArray) { this.dimensionArray = dimensionArray; dimensionArrayString = new ArrayList(); for (int[] e : dimensionArray) { StringBuilder builder = new StringBuilder("" + e[0]); for (int i = 1; i < e.length; i++) { builder.append("," + e[i]); } dimensionArrayString.add(builder.toString()); } } public int getWindowSize() { return windowSize; } public void setWindowSize(int windowSize) { this.windowSize = windowSize; } @Override public void setup(OperatorContext arg0) { if (arg0 != null) { applicationWindowSize = arg0.getValue(OperatorContext.APPLICATION_WINDOW_COUNT); } if (cacheOject == null) { cacheOject = new HashMap>>(windowSize); } if (outputMap == null) { outputMap = new HashMap>>(); } setUpPatternList(); } private void setUpPatternList() { patternList = new ArrayList(); for (int[] e : dimensionArray) { Pattern pattern; StringBuilder builder = new StringBuilder(timeBucket + "\\|(\\d+)"); for (int i = 0; i < e.length; i++) { builder.append("\\|" + e[i] + ":([^\\|]+)"); } pattern = Pattern.compile(builder.toString()); patternList.add(pattern); } } @Override public void teardown() { } @Override public void beginWindow(long arg0) { Map> currentWindowMap = cacheOject.get(currentWindow); if (currentWindowMap == null) { currentWindowMap = new HashMap>(); } else { for (Map.Entry> tupleEntry : currentWindowMap.entrySet()) { String tupleKey = tupleEntry.getKey(); Map tupleValue = tupleEntry.getValue(); int currentPattern = 0; for (Pattern pattern : patternList) { Matcher matcher = pattern.matcher(tupleKey); if (matcher.matches()) { String currentPatternString = dimensionArrayString.get(currentPattern); Map> currentPatternMap = outputMap.get(currentPatternString); if (currentPatternMap != null) { StringBuilder builder = new StringBuilder(matcher.group(2)); for (int i = 1; i < dimensionArray.get(currentPattern).length; i++) { builder.append("," + matcher.group(i + 2)); } KeyValPair currentDimensionKeyValPair = currentPatternMap.get(builder.toString()); if (currentDimensionKeyValPair != null) { currentDimensionKeyValPair.getKey().add(0 - tupleValue.get(dimensionKeyVal).doubleValue()); currentDimensionKeyValPair.setValue(currentDimensionKeyValPair.getValue() - 1); if (currentDimensionKeyValPair.getKey().doubleValue() == 0.0) { currentPatternMap.remove(builder.toString()); } } } break; } currentPattern++; } } } currentWindowMap.clear(); if (patternList == null || patternList.isEmpty()) { setUpPatternList(); } } @Override public void endWindow() { int totalWindowsOccupied = cacheOject.size(); for (Map.Entry>> e : outputMap.entrySet()) { for (Map.Entry> dimensionValObj : e.getValue().entrySet()) { Map> outputData = new HashMap>(); KeyValPair keyVal = dimensionValObj.getValue(); if (operationType == AggregateOperation.SUM) { outputData.put(e.getKey(), new DimensionObject(keyVal.getKey(), dimensionValObj.getKey())); } else if (operationType == AggregateOperation.AVERAGE) { if (keyVal.getValue() != 0) { double totalCount = ((double)(totalWindowsOccupied * applicationWindowSize)) / 1000; outputData.put(e.getKey(), new DimensionObject(new MutableDouble(keyVal.getKey().doubleValue() / totalCount), dimensionValObj.getKey())); } } if (!outputData.isEmpty()) { output.emit(outputData); } } } currentWindow = (currentWindow + 1) % windowSize; } public AggregateOperation getOperationType() { return operationType; } public void setOperationType(AggregateOperation operationType) { this.operationType = operationType; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy