All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.tracker.entity.AuditMetricsCube Maven / Gradle / Ivy

Go to download

CDAP Extension that provides the ability to track data changes and provide input to data governance processes on a cluster

There is a newer version: 0.7.0
Show newest version
/*
 * Copyright © 2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.tracker.entity;

import co.cask.cdap.api.dataset.DatasetSpecification;
import co.cask.cdap.api.dataset.lib.AbstractDataset;
import co.cask.cdap.api.dataset.lib.cube.AggregationFunction;
import co.cask.cdap.api.dataset.lib.cube.Cube;
import co.cask.cdap.api.dataset.lib.cube.CubeFact;
import co.cask.cdap.api.dataset.lib.cube.CubeQuery;
import co.cask.cdap.api.dataset.lib.cube.MeasureType;
import co.cask.cdap.api.dataset.lib.cube.TimeSeries;
import co.cask.cdap.api.dataset.lib.cube.TimeValue;
import co.cask.cdap.api.dataset.module.EmbeddedDataset;
import co.cask.cdap.proto.audit.AuditMessage;
import co.cask.cdap.proto.audit.AuditType;
import co.cask.cdap.proto.audit.payload.access.AccessPayload;
import co.cask.cdap.proto.audit.payload.access.AccessType;
import co.cask.cdap.proto.element.EntityType;
import co.cask.cdap.proto.id.EntityId;
import co.cask.cdap.proto.id.NamespacedEntityId;
import co.cask.cdap.proto.id.ProgramRunId;
import co.cask.tracker.utils.EntityIdHelper;
import co.cask.tracker.utils.ParameterCheck;
import com.google.common.base.Strings;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
 * An OLAP Cube to store metrics about the AuditLog.
 */
public class AuditMetricsCube extends AbstractDataset {
  private final Cube auditMetrics;

  private enum Bucket {
    DAY(TimeUnit.DAYS),
    HOUR(TimeUnit.HOURS);

    private final TimeUnit timeUnit;

    Bucket(TimeUnit timeUnit) {
      this.timeUnit = timeUnit;
    }

    public long getResolutionsSeconds() {
      return timeUnit.toSeconds(1L);
    }
  }

  public AuditMetricsCube(DatasetSpecification spec,
                          @EmbeddedDataset("auditMetrics") Cube auditMetrics) {
    super(spec.getName(), auditMetrics);
    this.auditMetrics = auditMetrics;
  }

  /**
   * Updates cube metrics based on information in the audit message
   *
   * @param auditMessage the message to update the stats for
   * @throws IOException if for some reason, it cannot find the name of the entity
   */
  public void write(AuditMessage auditMessage) throws IOException {
    EntityId entityId = auditMessage.getEntityId();

    if (!(entityId instanceof NamespacedEntityId)) {
      throw
        new IllegalStateException(String.format("Entity '%s' does not have a namespace " +
                                                  "and was not written to AuditMetricsCube",
                                                entityId));
    }
    if (ParameterCheck.isTrackerDataset(entityId)) {
      return;
    }
    String namespace = ((NamespacedEntityId) entityId).getNamespace();
    EntityType entityType = entityId.getEntityType();
    String type = entityType.name().toLowerCase();
    String name = entityId.getEntityName();

    long ts = System.currentTimeMillis() / 1000;
    CubeFact fact = new CubeFact(ts);

    fact.addDimensionValue("namespace", namespace);
    fact.addDimensionValue("entity_type", type);
    fact.addDimensionValue("entity_name", name);
    fact.addDimensionValue("audit_type", auditMessage.getType().name().toLowerCase());
    if (auditMessage.getPayload() instanceof AccessPayload) {
      AccessPayload accessPayload = ((AccessPayload) auditMessage.getPayload());
      EntityId accessor = accessPayload.getAccessor();
      if (ParameterCheck.isTrackerEntity(accessor)) {
        return;
      }
      // Accounting for cross-namespace dataset access
      if (accessor instanceof NamespacedEntityId) {
        String accessorNamespace = ((NamespacedEntityId) accessor).getNamespace();
        fact.addDimensionValue("accessor_namespace", accessorNamespace);
      }
      String appName = EntityIdHelper.getParentApplicationName(accessor);
      if (!appName.isEmpty()) {
        fact.addDimensionValue("app_name", appName);
      }
      String programName;
      if (accessor instanceof ProgramRunId) {
        programName = ((ProgramRunId) accessor).getProgram();
      } else {
        programName = accessor.getEntityName();
      }
      if (!programName.isEmpty()) {
        fact.addDimensionValue("program_name", programName);
      }
      String programType = EntityIdHelper.getProgramType(accessor);
      if (!programType.isEmpty()) {
        fact.addDimensionValue("program_type", programType);
      }
      fact.addMeasurement(accessPayload.getAccessType().name().toLowerCase(), MeasureType.COUNTER, 1L);
    }
    fact.addMeasurement("count", MeasureType.COUNTER, 1L);
    auditMetrics.add(fact);
  }

  /**
   * Returns the top N datasets with the most audit messages
   *
   * @return A list of entities and their stats sorted in DESC order by count
   */
  public List getTopNDatasets(int topN, long startTime, long endTime, String namespace) {
    CubeQuery datasetQuery = CubeQuery.builder()
      .select()
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", EntityType.DATASET.name().toLowerCase())
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(startTime, endTime)
      .groupBy()
      .dimension("entity_name")
      .limit(1000)
      .build();

    CubeQuery streamQuery = CubeQuery.builder()
      .select()
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", EntityType.STREAM.name().toLowerCase())
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(startTime, endTime)
      .groupBy()
      .dimension("entity_name")
      .limit(1000)
      .build();
    Map auditStats = transformTopNDatasetResult(auditMetrics.query(datasetQuery),
                                                                           auditMetrics.query(streamQuery));
    List resultList = new ArrayList<>(auditStats.values());
    Collections.sort(resultList);
    return (topN >= resultList.size()) ? resultList : resultList.subList(0, topN);
  }

  private Map transformTopNDatasetResult(Collection datasetResult,
                                                                    Collection streamResults) {
    HashMap resultsMap = new HashMap<>();
    for (TimeSeries t : datasetResult) {
      String entityName = t.getDimensionValues().get("entity_name");
      String key = getKey(entityName, "dataset");
      if (!resultsMap.containsKey(key)) {
        resultsMap.put(key, new TopDatasetsResult(entityName, "dataset"));
      }
      TopDatasetsResult result = resultsMap.get(key);
      if (t.getMeasureName().equals("read")) {
        result.setRead(t.getTimeValues().get(0).getValue());
      } else if (t.getMeasureName().equals("write")) {
        result.setWrite(t.getTimeValues().get(0).getValue());
      }
    }

    for (TimeSeries t : streamResults) {
      String entityName = t.getDimensionValues().get("entity_name");
      String key = getKey(entityName, "stream");
      if (!resultsMap.containsKey(key)) {
        resultsMap.put(key, new TopDatasetsResult(entityName, "stream"));
      }
      TopDatasetsResult result = resultsMap.get(key);
      if (t.getMeasureName().equals("read")) {
        result.setRead(t.getTimeValues().get(0).getValue());
      } else if (t.getMeasureName().equals("write")) {
        result.setWrite(t.getTimeValues().get(0).getValue());
      }
    }
    return resultsMap;
  }

  public List getTopNPrograms(int topN, long startTime, long endTime, String namespace) {
    CubeQuery programQuery = CubeQuery.builder()
      .select()
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(startTime, endTime)
      .groupBy()
      .dimension("program_name")
      .dimension("app_name")
      .dimension("program_type")
      .limit(1000)
      .build();
    Map auditStats = transformTopNProgramResult(auditMetrics.query(programQuery));
    List resultList = new ArrayList<>(auditStats.values());
    Collections.sort(resultList);
    return (topN >= resultList.size()) ? resultList : resultList.subList(0, topN);
  }

  public List getTopNPrograms(int topN, long startTime, long endTime,
                                                 String namespace, String entityType, String entityName) {
    CubeQuery programQuery = CubeQuery.builder()
      .select()
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_name", entityName)
      .dimension("entity_type", entityType)
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(startTime, endTime)
      .groupBy()
      .dimension("program_name")
      .dimension("app_name")
      .dimension("program_type")
      .limit(1000)
      .build();
    Map auditStats = transformTopNProgramResult(auditMetrics.query(programQuery));
    List resultList = new ArrayList<>(auditStats.values());
    Collections.sort(resultList);
    return (topN >= resultList.size()) ? resultList : resultList.subList(0, topN);
  }

  private Map transformTopNProgramResult(Collection results) {
    HashMap resultsMap = new HashMap<>();
    for (TimeSeries t : results) {
      String programName = t.getDimensionValues().get("program_name");
      if (Strings.isNullOrEmpty(programName)) {
        continue;
      }
      String accessorNamespace = t.getDimensionValues().get("accessor_namespace");
      String appName = t.getDimensionValues().get("app_name");
      String programType = t.getDimensionValues().get("program_type");
      String key = getKey(appName, programName, programType);

      long value = t.getTimeValues().get(0).getValue();
      TopProgramsResult result = resultsMap.get(key);
      if (result == null) {
        resultsMap.put(key, new TopProgramsResult(accessorNamespace, programName, appName, programType, value));
      } else {
        result.increment(value);
      }
    }
    return resultsMap;
  }

  public List getTopNApplications(int topN, long startTime, long endTime,
                                                         String namespace, String entityType, String entityName) {
    CubeQuery applicationQuery = CubeQuery.builder()
      .select()
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_name", entityName)
      .dimension("entity_type", entityType)
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(startTime, endTime)
      .groupBy()
      .dimension("app_name")
      .limit(1000)
      .build();
    Map auditStats
      = transformTopNApplicationResult(auditMetrics.query(applicationQuery));
    List resultList = new ArrayList<>(auditStats.values());
    Collections.sort(resultList);
    return (topN >= resultList.size()) ? resultList : resultList.subList(0, topN);
  }

  public List getTopNApplications(int topN, long startTime, long endTime, String namespace) {
    CubeQuery applicationQuery = CubeQuery.builder()
      .select()
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .dimension("namespace", namespace)
      .timeRange(startTime, endTime)
      .groupBy()
      .dimension("app_name")
      .limit(1000)
      .build();
    Map auditStats
      = transformTopNApplicationResult(auditMetrics.query(applicationQuery));
    List resultList = new ArrayList<>(auditStats.values());
    Collections.sort(resultList);
    return (topN >= resultList.size()) ? resultList : resultList.subList(0, topN);

  }


  private Map transformTopNApplicationResult(Collection results) {
    HashMap resultsMap = new HashMap<>();
    for (TimeSeries t : results) {
      String appName = t.getDimensionValues().get("app_name");
      if (Strings.isNullOrEmpty(appName)) {
        continue;
      }
      long value = t.getTimeValues().get(0).getValue();
      TopApplicationsResult result = resultsMap.get(appName);
      if (result == null) {
        resultsMap.put(appName, new TopApplicationsResult(appName, value));
      } else {
        result.increment(value);
      }
    }
    return resultsMap;
  }

  public AuditHistogramResult getAuditHistogram(long startTime, long endTime, String namespace) {
    Bucket resolution = getResolutionBucket(startTime, endTime);
    CubeQuery histogramQuery = CubeQuery.builder()
      .select()
      .measurement("count", AggregationFunction.SUM)
      .from()
      .resolution(resolution.getResolutionsSeconds(), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .timeRange(startTime, endTime)
      .limit(1000)
      .build();

    Collection results = auditMetrics.query(histogramQuery);
    if (!results.isEmpty()) {
      TimeSeries t = results.iterator().next();
      List timeValueList = t.getTimeValues();

      return new AuditHistogramResult(resolution.name(), timeValueList);
    }
    return new AuditHistogramResult();
  }

  // Overload (String entityType, String entityName)
  public AuditHistogramResult getAuditHistogram(long startTime, long endTime, String namespace,
                                                String entityType, String entityName) {
    Bucket resolution = getResolutionBucket(startTime, endTime);
    CubeQuery histogramQuery = CubeQuery.builder()
      .select()
      .measurement("count", AggregationFunction.SUM)
      .from()
      .resolution(resolution.getResolutionsSeconds(), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", entityType)
      .dimension("entity_name", entityName)
      .timeRange(startTime, endTime)
      .limit(1000)
      .build();

    Collection results = auditMetrics.query(histogramQuery);
    if (!results.isEmpty()) {
      TimeSeries t = results.iterator().next();
      List timeValueList = t.getTimeValues();

      return new AuditHistogramResult(resolution.name(), timeValueList);
    }
    return new AuditHistogramResult();
  }

  // Total number of unique programs
  public long getTotalProgramsCount(String namespace) {
    CubeQuery query = CubeQuery.builder()
      .select()
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(0L, System.currentTimeMillis() / 1000)
      .groupBy()
      .dimension("program_name")
      .dimension("program_type")
      .limit(1000)
      .build();

    Collection results = auditMetrics.query(query);
    Set uniquePrograms = new HashSet<>();
    for (TimeSeries t : results) {
      uniquePrograms.add(getKey(t.getDimensionValues().get("program_name"),
                                t.getDimensionValues().get("program_type")));
    }
    return uniquePrograms.size();
  }

  // Total number of unique programs for a given entityName and entityType
  public long getTotalProgramsCount(String namespace, String entityType, String entityName) {
    CubeQuery query = CubeQuery.builder()
      .select()
      .measurement(AccessType.WRITE.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.READ.name().toLowerCase(), AggregationFunction.SUM)
      .measurement(AccessType.UNKNOWN.name().toLowerCase(), AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", entityType)
      .dimension("entity_name", entityName)
      .dimension("audit_type", AuditType.ACCESS.name().toLowerCase())
      .timeRange(0L, System.currentTimeMillis() / 1000)
      .groupBy()
      .dimension("program_name")
      .dimension("program_type")
      .limit(1000)
      .build();
    try {
      Collection results = auditMetrics.query(query);
      Set uniquePrograms = new HashSet<>();
      for (TimeSeries t : results) {
        uniquePrograms.add(getKey(t.getDimensionValues().get("program_name"),
                                  t.getDimensionValues().get("program_type")));
      }
      return uniquePrograms.size();
    } catch (NoSuchElementException e) {
      return 0L; // No Cube entry for given query exists. 0 total programs.
    }
  }

  // Total Audit log messages
  public long getTotalActivity(String namespace) {
    CubeQuery datasetQuery = CubeQuery.builder()
      .select()
      .measurement("count", AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", EntityType.DATASET.name().toLowerCase())
      .timeRange(0L, System.currentTimeMillis() / 1000)
      .limit(1000)
      .build();

    CubeQuery streamQuery = CubeQuery.builder()
      .select()
      .measurement("count", AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", EntityType.STREAM.name().toLowerCase())
      .timeRange(0L, System.currentTimeMillis() / 1000)
      .limit(1000)
      .build();

    Collection datasetResults = auditMetrics.query(datasetQuery);
    Collection streamResults = auditMetrics.query(streamQuery);
    // Single measurement queried; Aggregated for the 1st 365 days
    long totalActivity = 0;
    if (!datasetResults.isEmpty()) {
      totalActivity += datasetResults.iterator().next().getTimeValues().get(0).getValue();
    }
    if (!streamResults.isEmpty()) {
      totalActivity += streamResults.iterator().next().getTimeValues().get(0).getValue();
    }
    return totalActivity;
  }

  // Total Audit log messages for a given entityName and entityType
  public long getTotalActivity(String namespace, String entityType, String entityName) {
    CubeQuery query = CubeQuery.builder()
      .select()
      .measurement("count", AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", entityType)
      .dimension("entity_name", entityName)
      .timeRange(0L, System.currentTimeMillis() / 1000)
      .limit(1000)
      .build();

    Collection results = auditMetrics.query(query);
    // Single measurement queried; Aggregated for the 1st 365 days
    if (!results.isEmpty()) {
      return results.iterator().next().getTimeValues().get(0).getValue();
    }
    return 0L;
  }

  public List getEntities(String namespace, String entityType) {
    CubeQuery query = CubeQuery.builder()
      .select()
      .measurement("count", AggregationFunction.SUM)
      .from()
      .resolution(TimeUnit.DAYS.toSeconds(365L), TimeUnit.SECONDS)
      .where()
      .dimension("namespace", namespace)
      .dimension("entity_type", entityType)
      .timeRange(0L, System.currentTimeMillis() / 1000)
      .groupBy()
      .dimension("entity_name")
      .limit(1000)
      .build();
      Collection result = auditMetrics.query(query);
      List entityList = new LinkedList<>();
      for (TimeSeries t : result) {
        if (t.getDimensionValues().containsKey("entity_name")) {
          entityList.add(t.getDimensionValues().get("entity_name"));
        }
      }
      return entityList;
  }

  // This will be updated if we change how we select resolution.
  private Bucket getResolutionBucket(long startTime, long endTime) {
    if ((endTime - startTime) > TimeUnit.DAYS.toSeconds(6L)) {
      return Bucket.DAY;
    }
    return Bucket.HOUR;
  }

  private static String getKey(String s1, String s2, String s3) {
    return String.format("%s%s%s%s%s%s", Integer.toString(s1.length()), s1, Integer.toString(s2.length()), s2,
                         Integer.toString(s3.length()), s3);
  }

  private static String getKey(String s1, String s2) {
    return String.format("%s%s%s%s", Integer.toString(s1.length()), s1, Integer.toString(s2.length()), s2);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy