Source code for brian2cuda.utils.gputools

"""
Tools to get information about available GPUs.
"""

import os
import subprocess
import shutil
import shlex
import re
import distutils

from brian2.core.preferences import prefs, PreferenceError
from brian2.codegen.cpp_prefs import get_compiler_and_args
from brian2.utils.logger import get_logger
from brian2cuda.utils.logger import report_issue_message

logger = get_logger("brian2.devices.cuda_standalone")

# To list all GPUs: nvidia-smi -L

# Some code here is adapted from
# https://github.com/cupy/cupy/blob/e6f8d91ffae7ee241ed235ddbeb725c04f593c33/cupy/_environment.py


# To minimize running external commands (`nvidia-smi`, `nvcc`, `deviceQuery`), we define
# these global variables that are computed from the external commands. This way we only
# run them once and whenever they are needed again, we use the global variables defined
# here.
_cuda_installation = {
    "cuda_path": None,
    "nvcc_path": None,
    "runtime_version": None,
}

_gpu_selection = {
    "available_gpus": None,
    "selected_gpu_id": None,
    "selected_gpu_compute_capability": None,
}


[docs]def get_cuda_path():
    """
    Detect the path to the CUDA installation (e.g. '/usr/local/cuda'). This takes into
    account user defined environmental variable `CUDA_PATH` and preference
    `prefs.devices.cuda_standalone.cuda_backend.cuda_path`.
    """
    # If cuda_path was already detected, reuse the global variable
    global _cuda_installation
    if _cuda_installation["cuda_path"] is None:
        cuda_path, detected_from = _get_cuda_path()
        _check_cuda_path(cuda_path, detected_from)
        _cuda_installation["cuda_path"] = cuda_path
    return _cuda_installation["cuda_path"]


[docs]def get_nvcc_path():
    """Return the path to the `nvcc` binary."""
    # If nvcc_path was already detected, reuse the global variable
    global _cuda_installation
    if _cuda_installation["nvcc_path"] is None:
        _cuda_installation["nvcc_path"] = _get_nvcc_path()
    return _cuda_installation["nvcc_path"]


[docs]def get_cuda_runtime_version():
    """Return CUDA runtime version (as float, e.g. `11.2`)"""
    # If runtime_version was already detected, reuse the global variable
    global _cuda_installation
    if _cuda_installation["runtime_version"] is None:
        _cuda_installation["runtime_version"] = _get_cuda_runtime_version()
    return _cuda_installation["runtime_version"]


[docs]def get_cuda_installation():
    """Return new dictionary of cuda installation variables"""
    cuda_installation = {
        'cuda_path': get_cuda_path(),
        'nvcc_path': get_nvcc_path(),
        'runtime_version': get_cuda_runtime_version(),
    }
    global _cuda_installation
    _assert_keys_equal(cuda_installation, _cuda_installation)
    return cuda_installation


[docs]def get_gpu_selection():
    """Return dictionary of selected gpu variable"""
    gpu_id, compute_capability = select_gpu()
    gpu_selection = {
        'available_gpus': get_available_gpus(),
        'selected_gpu_id': gpu_id,
        'selected_gpu_compute_capability': compute_capability,
    }
    global _gpu_selection
    _assert_keys_equal(gpu_selection, _gpu_selection)
    return gpu_selection


[docs]def get_available_gpus():
    """
    Return list of names of available GPUs, sorted by GPU ID as reported in
    `nvidia-smi`
    """
    global _gpu_selection
    if _gpu_selection["available_gpus"] is None:
        _gpu_selection["available_gpus"] = _get_available_gpus()
    return _gpu_selection["available_gpus"]


[docs]def select_gpu():
    """
    Select GPU for simulation, based on user preference
    `prefs.devices.cuda_standalone.cuda_backend.gpu_id` or (if not provided) pick the
    GPU with highest compute capability. Returns tuple of (gpu_id, compute_capability)
    of type (int, float).
    """
    global _gpu_selection
    if _gpu_selection["selected_gpu_id"] is None:
        assert _gpu_selection["selected_gpu_compute_capability"] is None
        gpu_id, compute_capability = _select_gpu()
        _gpu_selection["selected_gpu_id"] = gpu_id
        _gpu_selection["selected_gpu_compute_capability"] = compute_capability
    return (
        _gpu_selection["selected_gpu_id"],
        _gpu_selection["selected_gpu_compute_capability"]
    )


[docs]def reset_cuda_installation():
    """
    Reset detected CUDA installation. This will detect the CUDA installation again when
    it is needed.
    """
    global _cuda_installation
    for key in _cuda_installation.keys():
        _cuda_installation[key] = None


[docs]def reset_gpu_selection():
    """Reset selected GPU. This will select a new GPU the next time it is needed."""
    global _gpu_selection
    for key in _gpu_selection.keys():
        _gpu_selection[key] = None


[docs]def restore_cuda_installation(cuda_installation):
    """Set global cuda installation dictionary to `cuda_installation`"""
    global _cuda_installation
    if sorted(_cuda_installation.keys()) != sorted(cuda_installation.keys()):
        raise KeyError(
            "`cuda_installation` has to have the following keys: {}. Got instead: "
            "{}".format(
                sorted(cuda_installation.keys()), sorted(_cuda_installation.keys())
            )
        )
    _cuda_installation.update(cuda_installation)


[docs]def restore_gpu_selection(gpu_selection):
    """Set global gpu selection dictionary to `gpu_selection`"""
    global _gpu_selection
    if sorted(_gpu_selection.keys()) != sorted(gpu_selection.keys()):
        raise KeyError(
            "`gpu_selection` has to have the following keys: {}. Got instead: "
            "{}".format(sorted(gpu_selection.keys()), sorted(_gpu_selection.keys()))
        )
    _gpu_selection.update(gpu_selection)


def _assert_keys_equal(dict1, dict2):
    keys1 = sorted(dict1.keys())
    keys2 = sorted(dict2.keys())
    assert keys1 == keys2, f"{keys1} != {keys2}"


def _get_cuda_path():
    # Use preference if set
    cuda_path_pref = prefs.devices.cuda_standalone.cuda_backend.cuda_path
    if cuda_path_pref is not None:
        logger.info(
            f"CUDA installation directory given via preference "
            f"`prefs.devices.cuda_standalone.cuda_backend.cuda_path={cuda_path_pref}`"
        )
        # Allow home directory as `~` in path
        cuda_path_pref = os.path.expanduser(cuda_path_pref)
        return (cuda_path_pref, 'pref')

    # Use environment variable if set
    cuda_path = os.environ.get("CUDA_PATH", "")  # Nvidia default on Windows
    if os.path.exists(cuda_path):
        logger.info(
            "CUDA installation directory given via environment variable `CUDA_PATH={}`"
            "".format(cuda_path)
        )
        return (cuda_path, 'env')

    # Use nvcc path if `nvcc` binary in PATH
    nvcc_path = shutil.which("nvcc")
    if nvcc_path is not None:
        cuda_path_nvcc = os.path.dirname(os.path.dirname(nvcc_path))
        logger.info(
            "CUDA installation directory detected via location of `nvcc` binary: {}"
            "".format(cuda_path_nvcc)
        )
        return (cuda_path_nvcc, 'nvcc')

    # Use standard location /usr/local/cuda
    if os.path.exists("/usr/local/cuda"):
        cuda_path_usr = "/usr/local/cuda"
        logger.info(
            f"CUDA installation directory found in standard location: {cuda_path_usr}"
        )
        return (cuda_path_usr, 'default')

    # Use standard location /opt/cuda
    if os.path.exists("/opt/cuda"):
        cuda_path_opt = "/opt/cuda"
        logger.info(
            f"CUDA installation directory found in standard location: {cuda_path_opt}"
        )
        return (cuda_path_opt, 'default')

    # Raise error if cuda path not found
    raise RuntimeError(
        "Couldn't find the CUDA installation. Please set the preference "
        "`prefs.devices.cuda_standalone.cuda_backend.cuda_path` or the environment "
        "variable `CUDA_PATH` to point to your CUDA installation directory (this "
        "should be the directory where `./bin/nvcc` is located, e.g. `/usr/local/cuda`)"
    )


def _check_cuda_path(cuda_path, detected_from):
    # Trigger nvcc path detection now to raise an error if it isn't found
    nvcc_path = _get_nvcc_path(cuda_path=cuda_path)

    if not os.path.exists(nvcc_path):
        # If we detected the cuda_path based on nvcc binary, this should not happen
        assert detected_from != "nvcc", report_issue_message

        msg = f"Couldn't find `nvcc` binary in {nvcc_path}."
        if detected_from == "prefs":
            msg += (
                f" Are you sure your "
                "prefs.devices.cuda_standalone.cuda_backend.cuda_path preference "
                "is correct?"
            )
        elif detected_from == "env":
            msg += f" Are you sure your CUDA_PATH environment variable is correct?"

        if prefs.devices.cuda_standalone.cuda_backend.detect_cuda:
            raise RuntimeError(msg)
        else:
            logger.warn(msg)


def _get_nvcc_path(cuda_path=None):
    """
    Get the nvcc path from the CUDA installation path (path/to/cuda/bin/nvcc)
    """
    # TODO: Check if NVCC is specific to cupy and if we want to support it?
    # If so, make sure cuda_path and nvcc_path fit together, see:
    # https://github.com/cupy/cupy/blob/cb29c07ccbae346841adb3c8bfa33aba463e2588/install/build.py#L65-L70
    #nvcc = os.environ.get("NVCC", None)
    #if nvcc:
    #    return distutils.util.split_quoted(nvcc)

    if cuda_path is None:
        cuda_path = get_cuda_path()

    compiler, _ = get_compiler_and_args()
    if compiler == "msvc":  # Windows
        nvcc_bin = "bin/nvcc.exe"
    else:  # Unix
        nvcc_bin = "bin/nvcc"

    nvcc_path = os.path.join(cuda_path, nvcc_bin)
    return nvcc_path


def _get_cuda_runtime_version():
    """ Get CUDA runtime version """
    version_pref = prefs.devices.cuda_standalone.cuda_backend.cuda_runtime_version
    if version_pref is not None:
        # CUDA runtime version set via preference
        return version_pref

    # Get runtime Version from `nvcc --verion`
    try:
        nvcc_path = get_nvcc_path()
    except RuntimeError as error:
        raise RuntimeError(
            "Couldn't detect CUDA runtime version. You can specify it via "
            "`prefs.devices.cuda_standalone.cuda_backend.cuda_runtime_version`"
        ) from error
    nvcc_output = _run_command_with_output(nvcc_path, "--version")
    nvcc_lines = nvcc_output.split("\n")
    # version_line example: "Cuda compilation tools, release 11.2, V11.2.67"
    version_line = nvcc_lines[3]
    assert version_line.startswith("Cuda compilation tools, release")
    # release_str example: "release 11.2"
    release_str = version_line.split(", ")[1]
    # runtime_version example: 11.2
    runtime_version_str = release_str.split(" ")[1]
    # return version as float
    return float(runtime_version_str)


def _select_gpu():
    gpu_id = prefs.devices.cuda_standalone.cuda_backend.gpu_id
    compute_capability = prefs.devices.cuda_standalone.cuda_backend.compute_capability
    gpu_list = None
    if prefs.devices.cuda_standalone.cuda_backend.detect_gpus:
        if gpu_id is None:
            gpu_id, compute_capability = get_best_gpu()
        else:
            compute_capability = get_compute_capability(gpu_id)
        gpu_list = get_available_gpus()
    else:
        logger.info(
            "Automatic detection of GPU names and compute capabilities disabled, using "
            "manual preferences"
        )
        if gpu_id is None or compute_capability is None:
            raise PreferenceError(
                "Got `prefs.devices.cuda_standalone.cuda_backend.detect_gpus` == `False`. Without GPU detection, "
                "you need to set `prefs.devices.cuda_standalone.cuda_backend.gpu_id` and "
                "`prefs.devices.cuda_standalone.cuda_backend.compute_capability` (got "
                "`{prefs.devices.cuda_standalone.cuda_backend.gpu_id}` and "
                "`{prefs.devices.cuda_standalone.cuda_backend.compute_capability}`).".format(
                    prefs=prefs
                )
            )

    gpu_name = ""
    if gpu_list is not None:
        gpu_name = f" ({gpu_list[gpu_id]})"

    logger.info(
        f"Compiling device code for GPU {gpu_id}{gpu_name}"
    )

    return gpu_id, compute_capability


def _run_command_with_output(command, *args):
    """
    Return the stdout from `command` run in a subprocess and produce meaningful error
    message if it fails. If `args` is empty, `command` can be a string with multiple
    arguments (e.g. `ls -l -a`). If `args` are given, `command` has to be just the
    binary (e.g. `ls`) and each `args` item needs to be a single argument.

    Examples
    --------
    >>> _run_command_with_output("ls -a -l")
    >>> _run_command_with_output("ls", "-a", "-l")
    """
    if not args:
        command_split = shlex.split(command)
    else:
        command_split = [command] + list(args)

    try:
        output = subprocess.check_output(command_split, encoding='UTF-8')
    except subprocess.CalledProcessError as err:
        raise RuntimeError(
            "Running `{binary}` failed with error code {err.returncode}: {err.output}"
            "".format(binary=command_split[0], err=err)
        )
    except FileNotFoundError as err:
        raise FileNotFoundError(f"Binary not found: `{command_split[0]}`") from err

    return output


def _get_available_gpus():
    """
    Detect available GPUs and return a list of their names, where list index corresponds
    to GPU id.
    """
    if not prefs.devices.cuda_standalone.cuda_backend.detect_gpus:
        logger.debug("GPU detection is disabled, can't get available GPUs.")
        return None

    command = "nvidia-smi -L"
    try:
        gpu_info_lines = _run_command_with_output(command).split("\n")
    except (RuntimeError, FileNotFoundError) as excepted_error:
        new_error = RuntimeError(
            f"Running `{command}` failed. This typically means that you have no "
            f"NVIDIA driver installed. Are you sure there is an NVIDIA GPU on this "
            f"machine?"
            #"If `nvidia-smi` is not available in your system, you can disable "
            #"automatic detection of GPU name and compute capability by setting "
            #"`prefs.devices.cuda_standalone.cuda_backend.detect_gpus` = `False`"
        )
        raise new_error from excepted_error

    if gpu_info_lines and gpu_info_lines[0].startswith("No devices found"):
        raise RuntimeError(
            "`nvidia-smi` couldn't find any GPUs on your system. Are you sure you have "
            "a GPU? If you are trying to generate the CUDA standalone code on a system "
            "without GPU, you have to set "
            "`prefs.devices.cuda_standalone.cuda_backend.detect_gpus = False` "
        )

    all_gpu_list = []
    if gpu_info_lines is not None:
        for i, gpu_info in enumerate(gpu_info_lines):
            if gpu_info == "":  # last list item is empty
                continue

            # `gpu_info` example:
            # "GPU 0: GeForce MX150 (UUID: GPU-8abe566f-c211-11c1-7b73-8103bfd30198)"

            # Remove the UUID part
            gpu_info = gpu_info.split(" (UUID")[0]
            # Split ID and NAME parts
            try:
                id_str, gpu_name = gpu_info.split(": ")
            except ValueError as err:
                raise AssertionError(f"gpu_info: '{gpu_info}', gpu_info_lines: '{gpu_info_lines}', err: '{err}'")
            assert id_str.startswith("GPU ")
            gpu_id = id_str[4]
            assert int(gpu_id) == i
            all_gpu_list.append(gpu_name)

    visible_gpu_list = all_gpu_list
    if "CUDA_VISIBLE_DEVICES" in os.environ:
        visible_gpu_list = []
        cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
        for id_str in cuda_visible_devices:
            gpu_id = int(id_str)
            visible_gpu_list.append(all_gpu_list[gpu_id])

    return visible_gpu_list


[docs]def get_compute_capability(gpu_id):
    """
    Get compute capability of GPU with ID `gpu_id`. Returns a float (e.g. `6.1`).
    """
    # nvidia-smi allows querying compute capability only for more recent driver versions
    # (couldn't find the required driver version, sometime around CUDA toolkit 11.6)
    command = "nvidia-smi --query-gpu=compute_cap --format=csv,noheader"
    try:
        compute_capability_list = _run_command_with_output(command).split("\n")
        compute_capability = float(compute_capability_list[gpu_id])
    except RuntimeError as error:
        logger.debug(f"`{command} failed with RuntimeError: {error}")
        # Use `deviceQuery` for systems with older driver versions
        compute_capability = _get_compute_capability_with_device_query(gpu_id)

    return compute_capability


def _get_compute_capability_with_device_query(gpu_id):
    """
    Use `deviceQuery` binary from CUDA samples to get compute capability of `gpu_id`.
    """
    gpu_list = get_available_gpus()
    # Use preference for `deviceQuery` path if set
    device_query_path = prefs.devices.cuda_standalone.cuda_backend.device_query_path
    if device_query_path is None:
        # Look for it in the demo_suite directory
        cuda_path = get_cuda_path()
        device_query_path = os.path.join(
            cuda_path, "extras", "demo_suite", "deviceQuery"
        )
        if not os.path.exists(device_query_path):
            # Note: If `deviceQuery` is not reliably available on user systems, we could
            # 1. use this github gist to scrape compute capabilities for GPU names from the
            #    nvidia website:
            #    https://gist.github.com/huitseeker/b2c79e5b763d58b06b9985de2b3c0d4d
            # 2. add a preference to point to the self-compiled binary?
            raise RuntimeError(
                f"GPU compute capability detection failed. Your NVIDIA driver version "
                f"doesn't support it and your CUDA toolkit installation has no "
                f"`deviceQuery` binary in `{device_query_path}`. You have the following "
                f"options to solve this: 1) update your NVIDIA driver or 2) manually "
                f"compile the `deviceQuery` binary from the CUDA Samples and set "
                f"`prefs.devices.cuda_standalone.cuda_backend.device_query_path` "
                f"accordingly or 3) disable automatic GPU detection via "
                f"`prefs.devices.cuda_standalone.cuda_backend.detect_gpu = False`. See "
                f"Brian2CUDA documentations for more details."
            )
    else:
        logger.info(
            "Path to `deviceQuery` binary set via "
            "`prefs.devices.cuda_standalone.cuda_backend.device_query_path = "
            f"{device_query_path}`"
        )
        # Allow home directory as `~` in path
        device_query_path = os.path.expanduser(device_query_path)
        if not os.path.exists(device_query_path):
            raise RuntimeError(
                f"Couldn't find `{device_query_path}` binary to detect the compute "
                "capability of your GPU. You set it via "
                "`prefs.devices.cuda_standalone.cuda_backend.device_query_path`"
            )
    device_query_output = _run_command_with_output(device_query_path)
    lines = device_query_output.split("\n")
    compute_capability = None
    for i, line in enumerate(lines):
        if line.startswith("Device "):
            # example line:
            # `Device 0: "GeForce MX150"`
            this_gpu_id = int(line[7])  # "Device i ..."  <- i in position 7
            if this_gpu_id == gpu_id:
                # Get GPU name: word in quotation
                gpu_name = re.findall(r'\"(.+?)\"', line)[0]
                # Make sure we got the right GPU here
                assert gpu_list[gpu_id] == gpu_name
                # The compute capability is shown 2 lines after the "Device ..." line
                # Example line:
                # `  CUDA Capability Major/Minor version number:    6.1`
                compute_capability_line = lines[i + 2]
                assert compute_capability_line.strip().startswith(
                    "CUDA Capability Major/Minor version number"
                ), f"Unexpected line parsed: {compute_capability_line}"
                # Last 3 chars are the compute capability
                major = int(compute_capability_line[-3])
                minor = int(compute_capability_line[-1])
                # Turn into float
                compute_capability = major + 0.1 * minor
    return compute_capability


[docs]def get_best_gpu():
    """
    Get the "best" GPU available. This currently chooses the GPU with highest compute
    capability and lowest GPU ID (as reported by `nvidia-smi`)
    """
    gpu_list = get_available_gpus()
    best_gpu_id = 0
    best_compute_capability = 0
    for gpu_id, gpu, in enumerate(gpu_list):
        compute_capability = get_compute_capability(gpu_id)
        if compute_capability > best_compute_capability:
            best_compute_capability = compute_capability
            best_gpu_id = gpu_id

    return best_gpu_id, best_compute_capability


if __name__ == "__main__":
    print(get_best_gpu())
    #a = nvidia_smi()
    #print(a)