All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.djl.util.cuda.CudaUtils Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
 * with the License. A copy of the License is located at
 *
 * http://aws.amazon.com/apache2.0/
 *
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
 * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package ai.djl.util.cuda;

import ai.djl.Device;
import ai.djl.engine.EngineException;
import ai.djl.util.Utils;

import com.sun.jna.Native;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.management.MemoryUsage;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;

/** A class containing CUDA utility methods. */
public final class CudaUtils {

    private static final Logger logger = LoggerFactory.getLogger(CudaUtils.class);

    private static final CudaLibrary LIB = loadLibrary();

    private static String[] gpuInfo;
    private static boolean logging = true;

    private CudaUtils() {}

    /**
     * Gets whether CUDA runtime library is in the system.
     *
     * @return {@code true} if CUDA runtime library is in the system
     */
    public static boolean hasCuda() {
        return getGpuCount() > 0;
    }

    /**
     * Returns the number of GPUs available in the system.
     *
     * @return the number of GPUs available in the system
     */
    @SuppressWarnings("PMD.NonThreadSafeSingleton")
    public static int getGpuCount() {
        if (Boolean.getBoolean("ai.djl.util.cuda.fork")) {
            if (gpuInfo == null) {
                gpuInfo = execute(-1); // NOPMD
            }
            try {
                return Integer.parseInt(gpuInfo[0]);
            } catch (NumberFormatException e) {
                logger.warn("Unexpected output: {}", gpuInfo[0], e);
                return 0;
            }
        }

        if (LIB == null) {
            return 0;
        }
        int[] count = new int[1];
        int result = LIB.cudaGetDeviceCount(count);
        switch (result) {
            case 0:
                return count[0];
            case CudaLibrary.ERROR_NO_DEVICE:
                if (logging) {
                    logger.debug(
                            "No GPU device found: {} ({})", LIB.cudaGetErrorString(result), result);
                }
                return 0;
            case CudaLibrary.INITIALIZATION_ERROR:
            case CudaLibrary.INSUFFICIENT_DRIVER:
            case CudaLibrary.ERROR_NOT_PERMITTED:
            default:
                if (logging) {
                    logger.warn(
                            "Failed to detect GPU count: {} ({})",
                            LIB.cudaGetErrorString(result),
                            result);
                }
                return 0;
        }
    }

    /**
     * Returns the version of CUDA runtime.
     *
     * @return the version of CUDA runtime
     */
    @SuppressWarnings("PMD.NonThreadSafeSingleton")
    public static int getCudaVersion() {
        if (Boolean.getBoolean("ai.djl.util.cuda.fork")) {
            if (gpuInfo == null) {
                gpuInfo = execute(-1);
            }
            int version = Integer.parseInt(gpuInfo[1]);
            if (version == -1) {
                throw new IllegalArgumentException("No cuda device found.");
            }
            return version;
        }

        if (LIB == null) {
            throw new IllegalStateException("No cuda library is loaded.");
        }
        int[] version = new int[1];
        int result = LIB.cudaRuntimeGetVersion(version);
        checkCall(result);
        return version[0];
    }

    /**
     * Returns the version string of CUDA runtime.
     *
     * @return the version string of CUDA runtime
     */
    public static String getCudaVersionString() {
        int version = getCudaVersion();
        int major = version / 1000;
        int minor = (version / 10) % 10;
        return String.format(Locale.ROOT, "%02d", major) + minor;
    }

    /**
     * Returns the CUDA compute capability.
     *
     * @param device the GPU {@link Device} to retrieve
     * @return the CUDA compute capability
     */
    public static String getComputeCapability(int device) {
        if (Boolean.getBoolean("ai.djl.util.cuda.fork")) {
            if (gpuInfo == null) { // NOPMD
                gpuInfo = execute(-1);
            }
            if (device >= gpuInfo.length - 2) {
                throw new IllegalArgumentException("Invalid device: " + device);
            }
            return gpuInfo[device + 2];
        }

        if (LIB == null) {
            throw new IllegalStateException("No cuda library is loaded.");
        }
        int attrComputeCapabilityMajor = 75;
        int attrComputeCapabilityMinor = 76;

        int[] major = new int[1];
        int[] minor = new int[1];
        checkCall(LIB.cudaDeviceGetAttribute(major, attrComputeCapabilityMajor, device));
        checkCall(LIB.cudaDeviceGetAttribute(minor, attrComputeCapabilityMinor, device));

        return String.valueOf(major[0]) + minor[0];
    }

    /**
     * Returns the {@link MemoryUsage} of the specified GPU device.
     *
     * @param device the GPU {@link Device} to retrieve
     * @return the {@link MemoryUsage} of the specified GPU device
     * @throws IllegalArgumentException if {@link Device} is not GPU device or does not exist
     */
    public static MemoryUsage getGpuMemory(Device device) {
        if (!device.isGpu()) {
            throw new IllegalArgumentException("Only GPU device is allowed.");
        }

        if (Boolean.getBoolean("ai.djl.util.cuda.fork")) {
            String[] ret = execute(device.getDeviceId());
            if (ret.length != 3) {
                throw new IllegalArgumentException(ret[0]);
            }
            long total = Long.parseLong(ret[1]);
            long used = Long.parseLong(ret[2]);
            return new MemoryUsage(-1, used, used, total);
        }

        if (LIB == null) {
            throw new IllegalStateException("No GPU device detected.");
        }

        int[] currentDevice = new int[1];
        checkCall(LIB.cudaGetDevice(currentDevice));
        checkCall(LIB.cudaSetDevice(device.getDeviceId()));

        long[] free = new long[1];
        long[] total = new long[1];

        checkCall(LIB.cudaMemGetInfo(free, total));
        checkCall(LIB.cudaSetDevice(currentDevice[0]));

        long committed = total[0] - free[0];
        return new MemoryUsage(-1, committed, committed, total[0]);
    }

    /**
     * The main entrypoint to get CUDA information with command line.
     *
     * @param args the command line arguments.
     */
    @SuppressWarnings("PMD.SystemPrintln")
    public static void main(String[] args) {
        logging = false;
        int gpuCount = getGpuCount();
        if (args.length == 0) {
            if (gpuCount <= 0) {
                System.out.println("0,-1");
                return;
            }
            int cudaVersion = getCudaVersion();
            StringBuilder sb = new StringBuilder();
            sb.append(gpuCount).append(',').append(cudaVersion);
            for (int i = 0; i < gpuCount; ++i) {
                sb.append(',').append(getComputeCapability(i));
            }
            System.out.println(sb);
            return;
        }
        try {
            int deviceId = Integer.parseInt(args[0]);
            if (deviceId < 0 || deviceId >= gpuCount) {
                System.out.println("Invalid device: " + deviceId);
                return;
            }
            MemoryUsage mem = getGpuMemory(Device.gpu(deviceId));
            String cc = getComputeCapability(deviceId);
            System.out.println(cc + ',' + mem.getMax() + ',' + mem.getUsed());
        } catch (NumberFormatException e) {
            System.out.println("Invalid device: " + args[0]);
        }
    }

    private static CudaLibrary loadLibrary() {
        try {
            if (Boolean.getBoolean("ai.djl.util.cuda.fork")) {
                return null;
            }
            if (System.getProperty("os.name").startsWith("Win")) {
                String path = Utils.getenv("PATH");
                if (path == null) {
                    return null;
                }
                Pattern p = Pattern.compile("cudart64_\\d+\\.dll");
                String cudaPath = Utils.getenv("CUDA_PATH");
                String[] searchPath;
                if (cudaPath == null) {
                    searchPath = path.split(";");
                } else {
                    searchPath = (cudaPath + "\\bin\\;" + path).split(";");
                }
                for (String item : searchPath) {
                    File dir = new File(item);
                    File[] files = dir.listFiles(n -> p.matcher(n.getName()).matches());
                    if (files != null && files.length > 0) {
                        String fileName = files[0].getName();
                        String cudaRt = fileName.substring(0, fileName.length() - 4);
                        if (logging) {
                            logger.debug("Found cudart: {}", files[0].getAbsolutePath());
                        }
                        return Native.load(cudaRt, CudaLibrary.class);
                    }
                }
                if (logging) {
                    logger.debug("No cudart library found in path.");
                }
                return null;
            }
            return Native.load("cudart", CudaLibrary.class);
        } catch (UnsatisfiedLinkError e) {
            if (logging) {
                logger.debug("cudart library not found.");
                logger.trace("", e);
            }
        } catch (LinkageError e) {
            if (logging) {
                logger.warn("You have a conflict version of JNA in the classpath.");
                logger.debug("", e);
            }
        } catch (SecurityException e) {
            if (logging) {
                logger.warn("Access denied during loading cudart library.");
                logger.trace("", e);
            }
        }
        return null;
    }

    private static String[] execute(int deviceId) {
        try {
            String javaHome = System.getProperty("java.home");
            String classPath = System.getProperty("java.class.path");
            String os = System.getProperty("os.name");
            List cmd = new ArrayList<>(4);
            if (os.startsWith("Win")) {
                cmd.add(javaHome + "\\bin\\java.exe");
            } else {
                cmd.add(javaHome + "/bin/java");
            }
            cmd.add("-cp");
            cmd.add(classPath);
            cmd.add("ai.djl.util.cuda.CudaUtils");
            if (deviceId >= 0) {
                cmd.add(String.valueOf(deviceId));
            }
            Process ps = new ProcessBuilder(cmd).redirectErrorStream(true).start();
            try (InputStream is = ps.getInputStream()) {
                String line = Utils.toString(is).trim();
                return line.split(",");
            }
        } catch (IOException e) {
            throw new IllegalArgumentException("Failed get GPU information", e);
        }
    }

    private static void checkCall(int ret) {
        if (LIB == null) {
            throw new IllegalStateException("No cuda library is loaded.");
        }
        if (ret != 0) {
            throw new EngineException(
                    "CUDA API call failed: " + LIB.cudaGetErrorString(ret) + " (" + ret + ')');
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy