All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.db.jdbc.JDBCDimensionalOutputOperator Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.db.jdbc;

import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;

import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor;
import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.Aggregate;
import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.EventKey;
import org.apache.apex.malhar.lib.dimensions.aggregator.AggregatorRegistry;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import com.datatorrent.api.Context;
import com.datatorrent.lib.appdata.gpo.GPOMutable;
import com.datatorrent.lib.appdata.schemas.DimensionalConfigurationSchema;
import com.datatorrent.lib.appdata.schemas.FieldsDescriptor;
import com.datatorrent.lib.appdata.schemas.Type;
import com.datatorrent.lib.db.AbstractPassThruTransactionableStoreOutputOperator;

import it.unimi.dsi.fastutil.ints.Int2ObjectMap;

/**
 * This operator writes updates emitted by a {@link DimensionsStoreHDHT}
 * operator to a Mysql database. Updates are written to the database in the
 * following fashion: 
*
*
    *
  1. Aggregates are received from an upstream * {@link AbstractDimensionsComputationFlexibleSingleSchema} operator.
  2. *
  3. Each aggregate is written to a different table based on its dimension * combination, time bucket, and corresponding aggregation
  4. *
* * @since 3.4.0 */ @org.apache.hadoop.classification.InterfaceStability.Evolving public class JDBCDimensionalOutputOperator extends AbstractPassThruTransactionableStoreOutputOperator { protected static int DEFAULT_BATCH_SIZE = 1000; @Min(1) private int batchSize; private final List tuples; private transient int batchStartIdx; @NotNull private Map> tableNames; @NotNull private String eventSchema; @NotNull private AggregatorRegistry aggregatorRegistry = AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY; private DimensionalConfigurationSchema schema; private transient Map> ddIDToAggIDToStatement = Maps.newHashMap(); public JDBCDimensionalOutputOperator() { tuples = Lists.newArrayList(); batchSize = DEFAULT_BATCH_SIZE; batchStartIdx = 0; store = new JdbcTransactionalStore(); } @Override public void setup(Context.OperatorContext context) { super.setup(context); LOG.info("Done setting up super"); aggregatorRegistry.setup(); //Create prepared statements schema = new DimensionalConfigurationSchema(eventSchema, aggregatorRegistry); List keyFDs = schema.getDimensionsDescriptorIDToKeyDescriptor(); for (int ddID = 0; ddID < keyFDs.size(); ddID++) { LOG.info("ddID {}", ddID); FieldsDescriptor keyFD = keyFDs.get(ddID); Int2ObjectMap aggIDToAggFD = schema .getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().get(ddID); Map aggIDToStatement = ddIDToAggIDToStatement.get(ddID); if (aggIDToStatement == null) { aggIDToStatement = Maps.newHashMap(); ddIDToAggIDToStatement.put(ddID, aggIDToStatement); } for (Map.Entry aggTable : tableNames.get(ddID).entrySet()) { int aggID = aggregatorRegistry.getIncrementalAggregatorNameToID().get(aggTable.getKey()); LOG.info("aggID {}", aggID); FieldsDescriptor aggFD = aggIDToAggFD.get(aggID); List keyNames = keyFD.getFieldList(); keyNames.remove(DimensionsDescriptor.DIMENSION_TIME_BUCKET); LOG.info("List fields {}", keyNames); List aggregateNames = aggFD.getFieldList(); LOG.info("List fields {}", aggregateNames); String tableName = aggTable.getValue(); String statementString = buildStatement(tableName, keyNames, aggregateNames); try { aggIDToStatement.put(aggID, store.getConnection().prepareStatement(statementString)); } catch (SQLException ex) { throw new RuntimeException(ex); } } } } private String buildStatement(String tableName, List keyNames, List aggregateNames) { LOG.info("building statement"); StringBuilder sb = new StringBuilder(); sb.append("INSERT INTO "); sb.append(tableName); sb.append(" ("); addList(sb, keyNames); sb.append(","); addList(sb, aggregateNames); sb.append(") VALUES ("); for (int qCounter = 0;; qCounter++) { sb.append("?"); if (qCounter == keyNames.size() + aggregateNames.size() - 1) { break; } sb.append(","); } sb.append(") ON DUPLICATE KEY UPDATE "); addOnDuplicate(sb, aggregateNames); return sb.toString(); } private void addOnDuplicate(StringBuilder sb, List names) { LOG.info("add Duplicate"); for (int index = 0;; index++) { String name = names.get(index); sb.append(name); sb.append("="); sb.append("VALUES("); sb.append(name); sb.append(")"); if (index == names.size() - 1) { break; } sb.append(","); } } private void addList(StringBuilder sb, List names) { for (int index = 0;; index++) { sb.append(names.get(index)); if (index == names.size() - 1) { break; } sb.append(","); } } /** * This sets the table names that corresponds to the dimensions combinations * specified in your {@link DimensionalConfigurationSchema}. The structure of * this property is as follows:
*
*
    *
  1. The first key is the dimension combination id assigned to a dimension * combination in your {@link DimensionalConfigurationSchema}.
    *
    * The dimensions descriptor id is determined by the following factors: *
      *
    • The dimensions combination specified in the * {@link DimensionalConfigurationSchema}.
    • *
    • The the Time Buckets defined in your * {@link DimensionalConfigurationSchema}.
    • *
    * The dimensions descriptor id is computed in the following way: *
      *
    1. The first dimensions descriptor id is 0
    2. *
    3. A dimension combination is selected
    4. *
    5. A time bucket is selected
    6. *
    7. The current dimension combination and time bucket pair is assigned a * dimensions descriptor id
    8. *
    9. The current dimensions descriptor id is incremented
    10. *
    11. Steps 3 - 5 are repeated until all the time buckets are done
    12. *
    13. Steps 2 - 6 are repeated until all the dimension combinations are done. *
    14. *
    *
    * <
  2. *
  3. The second key is the name of an aggregation being performed for that * dimensions combination.
  4. *
  5. The value is the name of the output Mysql table
  6. *
* * @param tableNames * The table names that corresponds to the dimensions combinations * specified in your {@link DimensionalConfigurationSchema}. */ public void setTableNames(Map> tableNames) { this.tableNames = Preconditions.checkNotNull(tableNames); } /** * Sets the JSON corresponding to the {@link DimensionalConfigurationSchema} * which was set on the upstream {@link AppDataSingleSchemaDimensionStoreHDHT} * and {@link AbstractDimensionsComputationFlexibleSingleSchema} operators. * * @param eventSchema * The JSON corresponding to the * {@link DimensionalConfigurationSchema} which was set on the * upstream {@link AppDataSingleSchemaDimensionStoreHDHT} and * {@link AbstractDimensionsComputationFlexibleSingleSchema} * operators. */ public void setEventSchema(String eventSchema) { this.eventSchema = eventSchema; } /** * Sets the {@link AggregatorRegistry} that is used to determine what * aggregators correspond to what ids. * * @param aggregatorRegistry * The {@link AggregatorRegistry} that is used to determine what * aggregators correspond to what ids. */ public void setAggregatorRegistry(AggregatorRegistry aggregatorRegistry) { this.aggregatorRegistry = aggregatorRegistry; } @Override public void endWindow() { //Process any remaining tuples. if (tuples.size() - batchStartIdx > 0) { processBatch(); } super.endWindow(); tuples.clear(); batchStartIdx = 0; } @Override public void processTuple(Aggregate tuple) { tuples.add(tuple); if ((tuples.size() - batchStartIdx) >= batchSize) { processBatch(); } } /** * Processes all the tuples in the batch once the batch size for the operator * is reached. */ private void processBatch() { LOG.info("start {} end {}", batchStartIdx, tuples.size()); try { for (int i = batchStartIdx; i < tuples.size(); i++) { setStatementParameters(tuples.get(i)); } for (Map.Entry> ddIDToAggIDToStatementEntry : ddIDToAggIDToStatement .entrySet()) { for (Map.Entry entry : ddIDToAggIDToStatementEntry.getValue().entrySet()) { entry.getValue().executeBatch(); entry.getValue().clearBatch(); } } } catch (SQLException e) { throw new RuntimeException("processing batch", e); } finally { batchStartIdx += tuples.size() - batchStartIdx; } } /** * Sets the parameters on the {@link java.sql.PreparedStatement} based on the * values in the given {@link Aggregate}. * * @param aggregate * The {@link Aggregate} whose values will be set on the * corresponding {@link java.sql.PreparedStatement}. */ private void setStatementParameters(Aggregate aggregate) { EventKey eventKey = aggregate.getEventKey(); int ddID = eventKey.getDimensionDescriptorID(); int aggID = eventKey.getAggregatorID(); LOG.info("Setting statement params {} {}", ddID, aggID); FieldsDescriptor keyFD = schema.getDimensionsDescriptorIDToKeyDescriptor().get(ddID); FieldsDescriptor aggFD = schema.getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().get(ddID) .get(aggID); GPOMutable key = eventKey.getKey(); key.setFieldDescriptor(keyFD); GPOMutable value = aggregate.getAggregates(); value.setFieldDescriptor(aggFD); int qCounter = 1; PreparedStatement ps = ddIDToAggIDToStatement.get(ddID).get(aggID); try { qCounter = setParams(ps, key, qCounter, true); setParams(ps, value, qCounter, false); ps.addBatch(); } catch (SQLException ex) { throw new RuntimeException(ex); } } /** * @param ps * The {@link java.sql.PreparedStatement} which will do an insert * into the Mysql database. * @param gpo * The {@link GPOMutable} object whose values need to be set in the * preparted statement. * @param qCounter * The current index in the prepared statement * @param isKey * TODO use this * @return The current index in the prepared statement. * @throws SQLException */ private int setParams(PreparedStatement ps, GPOMutable gpo, int qCounter, boolean isKey) throws SQLException { FieldsDescriptor fd = gpo.getFieldDescriptor(); Map fieldToType = fd.getFieldToType(); List fields = fd.getFieldList(); for (int fieldCounter = 0; fieldCounter < fields.size(); fieldCounter++, qCounter++) { String fieldName = fields.get(fieldCounter); if (fieldName.equals(DimensionsDescriptor.DIMENSION_TIME_BUCKET)) { qCounter--; continue; } Type type = fieldToType.get(fieldName); LOG.info("Field Name {} {}", fieldName, qCounter); switch (type) { case BOOLEAN: { ps.setByte(qCounter, (byte)(gpo.getFieldBool(fieldName) ? 1 : 0)); break; } case BYTE: { ps.setByte(qCounter, gpo.getFieldByte(fieldName)); break; } case CHAR: { ps.setString(qCounter, Character.toString(gpo.getFieldChar(fieldName))); break; } case STRING: { ps.setString(qCounter, gpo.getFieldString(fieldName)); break; } case SHORT: { ps.setInt(qCounter, gpo.getFieldShort(fieldName)); break; } case INTEGER: { ps.setInt(qCounter, gpo.getFieldInt(fieldName)); break; } case LONG: { ps.setLong(qCounter, gpo.getFieldLong(fieldName)); break; } case FLOAT: { ps.setFloat(qCounter, gpo.getFieldFloat(fieldName)); break; } case DOUBLE: { ps.setDouble(qCounter, gpo.getFieldDouble(fieldName)); break; } default: { throw new UnsupportedOperationException("The type: " + type + " is not supported."); } } } return qCounter; } /** * Sets the size of a batch operation.
* Default: {@value #DEFAULT_BATCH_SIZE} * * @param batchSize * size of a batch */ public void setBatchSize(int batchSize) { this.batchSize = batchSize; } private static final Logger LOG = LoggerFactory.getLogger(AbstractJdbcTransactionableOutputOperator.class); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy