All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.utils.GPUStatistics Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.utils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Measures performance numbers when GPU mode is enabled
 * Printed as part of {@link Statistics}.
 */
public class GPUStatistics {
	//TODO fix formatting 
	//TODO replace AtomicLong with LongAdder
	
	
  // Whether or not extra per-instruction statistics will be recorded and shown for the GPU
  public static boolean DISPLAY_STATISTICS = false;

  private static int iNoOfExecutedGPUInst = 0;

  public static long cudaInitTime = 0;
  public static long cudaLibrariesInitTime = 0;
  public static AtomicLong cudaSparseToDenseTime = new AtomicLong(0);		// time spent in converting sparse matrix block to dense
  public static AtomicLong cudaDenseToSparseTime = new AtomicLong(0);		// time spent in converting dense matrix block to sparse
  public static AtomicLong cudaSparseConversionTime = new AtomicLong(0);	// time spent in converting between sparse block types
  public static AtomicLong cudaSparseToDenseCount = new AtomicLong(0);
  public static AtomicLong cudaDenseToSparseCount = new AtomicLong(0);
  public static AtomicLong cudaSparseConversionCount = new AtomicLong(0);

  public static AtomicLong cudaAllocTime = new AtomicLong(0);             // time spent in allocating memory on the GPU
  public static AtomicLong cudaDeAllocTime = new AtomicLong(0);           // time spent in deallocating memory on the GPU
  public static AtomicLong cudaMemSet0Time = new AtomicLong(0);           // time spent in setting memory to 0 on the GPU (part of reusing and for new allocates)
  public static AtomicLong cudaToDevTime = new AtomicLong(0);             // time spent in copying data from host (CPU) to device (GPU) memory
  public static AtomicLong cudaFromDevTime = new AtomicLong(0);           // time spent in copying data from device to host
  public static AtomicLong cudaAllocCount = new AtomicLong(0);
  public static AtomicLong cudaDeAllocCount = new AtomicLong(0);
  public static AtomicLong cudaMemSet0Count = new AtomicLong(0);
  public static AtomicLong cudaToDevCount = new AtomicLong(0);
  public static AtomicLong cudaFromDevCount = new AtomicLong(0);
  public static AtomicLong cudaEvictionCount = new AtomicLong(0);

  // Per instruction miscellaneous timers.
  // Used to record events in a CP Heavy Hitter instruction and
  // provide a breakdown of how time was spent in that instruction
  private static HashMap> _cpInstMiscTime = new HashMap> ();
  private static HashMap> _cpInstMiscCount = new HashMap> ();

  /**
   * Resets the miscellaneous timers & counters
   */
  public static void resetMiscTimers(){
    _cpInstMiscTime.clear();
    _cpInstMiscCount.clear();
  }

  /**
   * Resets all the cuda counters and timers, including the misc timers & counters
   */
  public static void reset(){
    cudaInitTime = 0;
    cudaLibrariesInitTime = 0;
    cudaAllocTime.set(0);
    cudaDeAllocTime.set(0);
    cudaMemSet0Time.set(0);
    cudaMemSet0Count.set(0);
    cudaToDevTime.set(0);
    cudaFromDevTime.set(0);
    cudaAllocCount.set(0);
    cudaDeAllocCount.set(0);
    cudaToDevCount.set(0);
    cudaFromDevCount.set(0);
    cudaEvictionCount.set(0);
    resetMiscTimers();
  }


  public static synchronized void setNoOfExecutedGPUInst(int numJobs) {
    iNoOfExecutedGPUInst = numJobs;
  }

  public static synchronized void incrementNoOfExecutedGPUInst() {
    iNoOfExecutedGPUInst ++;
  }

  public static synchronized int getNoOfExecutedGPUInst() {
    return iNoOfExecutedGPUInst;
  }

  /**
   * "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count
   * @param instructionName	name of the instruction/op
   * @param miscTimer				name of the miscellaneous timer
   * @param timeNanos				time in nano seconds
   * @param incrementCount	how much to increment the count of the miscTimer by
   */
  public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount)
  {
    if (!DISPLAY_STATISTICS)
      return;

    HashMap miscTimesMap = _cpInstMiscTime.get(instructionName);
    if (miscTimesMap == null) {
      miscTimesMap = new HashMap();
      _cpInstMiscTime.put(instructionName, miscTimesMap);
    }
    Long oldVal = miscTimesMap.get(miscTimer);
    Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0);
    miscTimesMap.put(miscTimer, newVal);

    HashMap miscCountMap = _cpInstMiscCount.get(instructionName);
    if (miscCountMap == null){
      miscCountMap = new HashMap();
      _cpInstMiscCount.put(instructionName, miscCountMap);
    }
    Long oldCnt = miscCountMap.get(miscTimer);
    Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0);
    miscCountMap.put(miscTimer, newCnt);
  }

  /**
   * "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count by 1
   * @param instructionName	name of the instruction/op
   * @param miscTimer				name of the miscellaneous timer
   * @param timeNanos				time in nano seconds
   */
  public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos){
    maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1);
  }

  /**
   * Used to print misc timers (and their counts) for a given instruction/op
   * @param instructionName name of the instruction/op
   * @return  a formatted string of misc timers for a given instruction/op
   */
  public static String getStringForCPMiscTimesPerInstruction(String instructionName) {
    StringBuffer sb = new StringBuffer();
    HashMap miscTimerMap = _cpInstMiscTime.get(instructionName);
    if (miscTimerMap != null) {
      List> sortedList = new ArrayList>(miscTimerMap.entrySet());
      // Sort the times to display by the most expensive first
      Collections.sort(sortedList, new Comparator>() {
        @Override
        public int compare(Map.Entry o1, Map.Entry o2) {
          return (int) (o1.getValue() - o2.getValue());
        }
      });
      Iterator> miscTimeIter = sortedList.iterator();
      HashMap miscCountMap = _cpInstMiscCount.get(instructionName);
      while (miscTimeIter.hasNext()) {
        Map.Entry e = miscTimeIter.next();
        String miscTimerName = e.getKey();
        Long miscTimerTime = e.getValue();
        Long miscCount = miscCountMap.get(miscTimerName);
        sb.append(miscTimerName + "[" + String.format("%.3f", (double) miscTimerTime / 1000000000.0) + "s," + miscCount + "]");
        if (miscTimeIter.hasNext())
          sb.append(", ");
      }
    }
    return sb.toString();
  }

  /**
   * Used to print out cuda timers & counters
   * @return a formatted string of cuda timers & counters
   */
  public static String getStringForCudaTimers() {
    StringBuffer sb = new StringBuffer();
    sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", cudaInitTime*1e-9) + "/"
            + String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n");
    sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + ".\n");
    sb.append("GPU mem tx time  (alloc/dealloc/set0/toDev/fromDev):\t"
            + String.format("%.3f", cudaAllocTime.get()*1e-9) + "/"
            + String.format("%.3f", cudaDeAllocTime.get()*1e-9) + "/"
            + String.format("%.3f", cudaMemSet0Time.get()*1e-9) + "/"
            + String.format("%.3f", cudaToDevTime.get()*1e-9) + "/"
            + String.format("%.3f", cudaFromDevTime.get()*1e-9)  + " sec.\n");
    sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict):\t"
            + cudaAllocCount.get() + "/"
            + cudaDeAllocCount.get() + "/"
            + cudaMemSet0Count.get() + "/"
            + cudaSparseConversionCount.get() + "/"
            + cudaToDevCount.get() + "/"
            + cudaFromDevCount.get() + "/"
            + cudaEvictionCount.get() + ".\n");
    sb.append("GPU conversion time  (sparseConv/sp2dense/dense2sp):\t"
            + String.format("%.3f", cudaSparseConversionTime.get()*1e-9) + "/"
            + String.format("%.3f", cudaSparseToDenseTime.get()*1e-9) + "/"
            + String.format("%.3f", cudaDenseToSparseTime.get()*1e-9) + " sec.\n");
    sb.append("GPU conversion count (sparseConv/sp2dense/dense2sp):\t"
            + cudaSparseConversionCount.get() + "/"
            + cudaSparseToDenseCount.get() + "/"
            + cudaDenseToSparseCount.get() + ".\n");

    return sb.toString();
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy