Source code for brian2cuda.utils.gputools

"""
Tools to get information about available GPUs.
"""

import os
import subprocess
import shutil
import shlex
import re
import distutils

from brian2.core.preferences import prefs, PreferenceError
from brian2.codegen.cpp_prefs import get_compiler_and_args
from brian2.utils.logger import get_logger
from brian2cuda.utils.logger import report_issue_message

logger = get_logger("brian2.devices.cuda_standalone")

# To list all GPUs: nvidia-smi -L

# Some code here is adapted from
# https://github.com/cupy/cupy/blob/e6f8d91ffae7ee241ed235ddbeb725c04f593c33/cupy/_environment.py


# To minimize running external commands (`nvidia-smi`, `nvcc`, `deviceQuery`), we define
# these global variables that are computed from the external commands. This way we only
# run them once and whenever they are needed again, we use the global variables defined
# here.
_cuda_installation = {
    "cuda_path": None,
    "nvcc_path": None,
    "runtime_version": None,
}

_gpu_selection = {
    "available_gpus": None,
    "selected_gpu_id": None,
    "selected_gpu_compute_capability": None,
}


[docs]def get_cuda_path(): """ Detect the path to the CUDA installation (e.g. '/usr/local/cuda'). This takes into account user defined environmental variable `CUDA_PATH` and preference `prefs.devices.cuda_standalone.cuda_backend.cuda_path`. """ # If cuda_path was already detected, reuse the global variable global _cuda_installation if _cuda_installation["cuda_path"] is None: cuda_path, detected_from = _get_cuda_path() _check_cuda_path(cuda_path, detected_from) _cuda_installation["cuda_path"] = cuda_path return _cuda_installation["cuda_path"]
[docs]def get_nvcc_path(): """Return the path to the `nvcc` binary.""" # If nvcc_path was already detected, reuse the global variable global _cuda_installation if _cuda_installation["nvcc_path"] is None: _cuda_installation["nvcc_path"] = _get_nvcc_path() return _cuda_installation["nvcc_path"]
[docs]def get_cuda_runtime_version(): """Return CUDA runtime version (as float, e.g. `11.2`)""" # If runtime_version was already detected, reuse the global variable global _cuda_installation if _cuda_installation["runtime_version"] is None: _cuda_installation["runtime_version"] = _get_cuda_runtime_version() return _cuda_installation["runtime_version"]
[docs]def get_cuda_installation(): """Return new dictionary of cuda installation variables""" cuda_installation = { 'cuda_path': get_cuda_path(), 'nvcc_path': get_nvcc_path(), 'runtime_version': get_cuda_runtime_version(), } global _cuda_installation _assert_keys_equal(cuda_installation, _cuda_installation) return cuda_installation
[docs]def get_gpu_selection(): """Return dictionary of selected gpu variable""" gpu_id, compute_capability = select_gpu() gpu_selection = { 'available_gpus': get_available_gpus(), 'selected_gpu_id': gpu_id, 'selected_gpu_compute_capability': compute_capability, } global _gpu_selection _assert_keys_equal(gpu_selection, _gpu_selection) return gpu_selection
[docs]def get_available_gpus(): """ Return list of names of available GPUs, sorted by GPU ID as reported in `nvidia-smi` """ global _gpu_selection if _gpu_selection["available_gpus"] is None: _gpu_selection["available_gpus"] = _get_available_gpus() return _gpu_selection["available_gpus"]
[docs]def select_gpu(): """ Select GPU for simulation, based on user preference `prefs.devices.cuda_standalone.cuda_backend.gpu_id` or (if not provided) pick the GPU with highest compute capability. Returns tuple of (gpu_id, compute_capability) of type (int, float). """ global _gpu_selection if _gpu_selection["selected_gpu_id"] is None: assert _gpu_selection["selected_gpu_compute_capability"] is None gpu_id, compute_capability = _select_gpu() _gpu_selection["selected_gpu_id"] = gpu_id _gpu_selection["selected_gpu_compute_capability"] = compute_capability return ( _gpu_selection["selected_gpu_id"], _gpu_selection["selected_gpu_compute_capability"] )
[docs]def reset_cuda_installation(): """ Reset detected CUDA installation. This will detect the CUDA installation again when it is needed. """ global _cuda_installation for key in _cuda_installation.keys(): _cuda_installation[key] = None
[docs]def reset_gpu_selection(): """Reset selected GPU. This will select a new GPU the next time it is needed.""" global _gpu_selection for key in _gpu_selection.keys(): _gpu_selection[key] = None
[docs]def restore_cuda_installation(cuda_installation): """Set global cuda installation dictionary to `cuda_installation`""" global _cuda_installation if sorted(_cuda_installation.keys()) != sorted(cuda_installation.keys()): raise KeyError( "`cuda_installation` has to have the following keys: {}. Got instead: " "{}".format( sorted(cuda_installation.keys()), sorted(_cuda_installation.keys()) ) ) _cuda_installation.update(cuda_installation)
[docs]def restore_gpu_selection(gpu_selection): """Set global gpu selection dictionary to `gpu_selection`""" global _gpu_selection if sorted(_gpu_selection.keys()) != sorted(gpu_selection.keys()): raise KeyError( "`gpu_selection` has to have the following keys: {}. Got instead: " "{}".format(sorted(gpu_selection.keys()), sorted(_gpu_selection.keys())) ) _gpu_selection.update(gpu_selection)
def _assert_keys_equal(dict1, dict2): keys1 = sorted(dict1.keys()) keys2 = sorted(dict2.keys()) assert keys1 == keys2, f"{keys1} != {keys2}" def _get_cuda_path(): # Use preference if set cuda_path_pref = prefs.devices.cuda_standalone.cuda_backend.cuda_path if cuda_path_pref is not None: logger.info( f"CUDA installation directory given via preference " f"`prefs.devices.cuda_standalone.cuda_backend.cuda_path={cuda_path_pref}`" ) # Allow home directory as `~` in path cuda_path_pref = os.path.expanduser(cuda_path_pref) return (cuda_path_pref, 'pref') # Use environment variable if set cuda_path = os.environ.get("CUDA_PATH", "") # Nvidia default on Windows if os.path.exists(cuda_path): logger.info( "CUDA installation directory given via environment variable `CUDA_PATH={}`" "".format(cuda_path) ) return (cuda_path, 'env') # Use nvcc path if `nvcc` binary in PATH nvcc_path = shutil.which("nvcc") if nvcc_path is not None: cuda_path_nvcc = os.path.dirname(os.path.dirname(nvcc_path)) logger.info( "CUDA installation directory detected via location of `nvcc` binary: {}" "".format(cuda_path_nvcc) ) return (cuda_path_nvcc, 'nvcc') # Use standard location /usr/local/cuda if os.path.exists("/usr/local/cuda"): cuda_path_usr = "/usr/local/cuda" logger.info( f"CUDA installation directory found in standard location: {cuda_path_usr}" ) return (cuda_path_usr, 'default') # Use standard location /opt/cuda if os.path.exists("/opt/cuda"): cuda_path_opt = "/opt/cuda" logger.info( f"CUDA installation directory found in standard location: {cuda_path_opt}" ) return (cuda_path_opt, 'default') # Raise error if cuda path not found raise RuntimeError( "Couldn't find the CUDA installation. Please set the preference " "`prefs.devices.cuda_standalone.cuda_backend.cuda_path` or the environment " "variable `CUDA_PATH` to point to your CUDA installation directory (this " "should be the directory where `./bin/nvcc` is located, e.g. `/usr/local/cuda`)" ) def _check_cuda_path(cuda_path, detected_from): # Trigger nvcc path detection now to raise an error if it isn't found nvcc_path = _get_nvcc_path(cuda_path=cuda_path) if not os.path.exists(nvcc_path): # If we detected the cuda_path based on nvcc binary, this should not happen assert detected_from != "nvcc", report_issue_message msg = f"Couldn't find `nvcc` binary in {nvcc_path}." if detected_from == "prefs": msg += ( f" Are you sure your " "prefs.devices.cuda_standalone.cuda_backend.cuda_path preference " "is correct?" ) elif detected_from == "env": msg += f" Are you sure your CUDA_PATH environment variable is correct?" if prefs.devices.cuda_standalone.cuda_backend.detect_cuda: raise RuntimeError(msg) else: logger.warn(msg) def _get_nvcc_path(cuda_path=None): """ Get the nvcc path from the CUDA installation path (path/to/cuda/bin/nvcc) """ # TODO: Check if NVCC is specific to cupy and if we want to support it? # If so, make sure cuda_path and nvcc_path fit together, see: # https://github.com/cupy/cupy/blob/cb29c07ccbae346841adb3c8bfa33aba463e2588/install/build.py#L65-L70 #nvcc = os.environ.get("NVCC", None) #if nvcc: # return distutils.util.split_quoted(nvcc) if cuda_path is None: cuda_path = get_cuda_path() compiler, _ = get_compiler_and_args() if compiler == "msvc": # Windows nvcc_bin = "bin/nvcc.exe" else: # Unix nvcc_bin = "bin/nvcc" nvcc_path = os.path.join(cuda_path, nvcc_bin) return nvcc_path def _get_cuda_runtime_version(): """ Get CUDA runtime version """ version_pref = prefs.devices.cuda_standalone.cuda_backend.cuda_runtime_version if version_pref is not None: # CUDA runtime version set via preference return version_pref # Get runtime Version from `nvcc --verion` try: nvcc_path = get_nvcc_path() except RuntimeError as error: raise RuntimeError( "Couldn't detect CUDA runtime version. You can specify it via " "`prefs.devices.cuda_standalone.cuda_backend.cuda_runtime_version`" ) from error nvcc_output = _run_command_with_output(nvcc_path, "--version") nvcc_lines = nvcc_output.split("\n") # version_line example: "Cuda compilation tools, release 11.2, V11.2.67" version_line = nvcc_lines[3] assert version_line.startswith("Cuda compilation tools, release") # release_str example: "release 11.2" release_str = version_line.split(", ")[1] # runtime_version example: 11.2 runtime_version_str = release_str.split(" ")[1] # return version as float return float(runtime_version_str) def _select_gpu(): gpu_id = prefs.devices.cuda_standalone.cuda_backend.gpu_id compute_capability = prefs.devices.cuda_standalone.cuda_backend.compute_capability gpu_list = None if prefs.devices.cuda_standalone.cuda_backend.detect_gpus: if gpu_id is None: gpu_id, compute_capability = get_best_gpu() else: compute_capability = get_compute_capability(gpu_id) gpu_list = get_available_gpus() else: logger.info( "Automatic detection of GPU names and compute capabilities disabled, using " "manual preferences" ) if gpu_id is None or compute_capability is None: raise PreferenceError( "Got `prefs.devices.cuda_standalone.cuda_backend.detect_gpus` == `False`. Without GPU detection, " "you need to set `prefs.devices.cuda_standalone.cuda_backend.gpu_id` and " "`prefs.devices.cuda_standalone.cuda_backend.compute_capability` (got " "`{prefs.devices.cuda_standalone.cuda_backend.gpu_id}` and " "`{prefs.devices.cuda_standalone.cuda_backend.compute_capability}`).".format( prefs=prefs ) ) gpu_name = "" if gpu_list is not None: gpu_name = f" ({gpu_list[gpu_id]})" logger.info( f"Compiling device code for GPU {gpu_id}{gpu_name}" ) return gpu_id, compute_capability def _run_command_with_output(command, *args): """ Return the stdout from `command` run in a subprocess and produce meaningful error message if it fails. If `args` is empty, `command` can be a string with multiple arguments (e.g. `ls -l -a`). If `args` are given, `command` has to be just the binary (e.g. `ls`) and each `args` item needs to be a single argument. Examples -------- >>> _run_command_with_output("ls -a -l") >>> _run_command_with_output("ls", "-a", "-l") """ if not args: command_split = shlex.split(command) else: command_split = [command] + list(args) try: output = subprocess.check_output(command_split, encoding='UTF-8') except subprocess.CalledProcessError as err: raise RuntimeError( "Running `{binary}` failed with error code {err.returncode}: {err.output}" "".format(binary=command_split[0], err=err) ) except FileNotFoundError as err: raise FileNotFoundError(f"Binary not found: `{command_split[0]}`") from err return output def _get_available_gpus(): """ Detect available GPUs and return a list of their names, where list index corresponds to GPU id. """ if not prefs.devices.cuda_standalone.cuda_backend.detect_gpus: logger.debug("GPU detection is disabled, can't get available GPUs.") return None command = "nvidia-smi -L" try: gpu_info_lines = _run_command_with_output(command).split("\n") except (RuntimeError, FileNotFoundError) as excepted_error: new_error = RuntimeError( f"Running `{command}` failed. This typically means that you have no " f"NVIDIA driver installed. Are you sure there is an NVIDIA GPU on this " f"machine?" #"If `nvidia-smi` is not available in your system, you can disable " #"automatic detection of GPU name and compute capability by setting " #"`prefs.devices.cuda_standalone.cuda_backend.detect_gpus` = `False`" ) raise new_error from excepted_error if gpu_info_lines and gpu_info_lines[0].startswith("No devices found"): raise RuntimeError( "`nvidia-smi` couldn't find any GPUs on your system. Are you sure you have " "a GPU? If you are trying to generate the CUDA standalone code on a system " "without GPU, you have to set " "`prefs.devices.cuda_standalone.cuda_backend.detect_gpus = False` " ) all_gpu_list = [] if gpu_info_lines is not None: for i, gpu_info in enumerate(gpu_info_lines): if gpu_info == "": # last list item is empty continue # `gpu_info` example: # "GPU 0: GeForce MX150 (UUID: GPU-8abe566f-c211-11c1-7b73-8103bfd30198)" # Remove the UUID part gpu_info = gpu_info.split(" (UUID")[0] # Split ID and NAME parts try: id_str, gpu_name = gpu_info.split(": ") except ValueError as err: raise AssertionError(f"gpu_info: '{gpu_info}', gpu_info_lines: '{gpu_info_lines}', err: '{err}'") assert id_str.startswith("GPU ") gpu_id = id_str[4] assert int(gpu_id) == i all_gpu_list.append(gpu_name) visible_gpu_list = all_gpu_list if "CUDA_VISIBLE_DEVICES" in os.environ: visible_gpu_list = [] cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",") for id_str in cuda_visible_devices: gpu_id = int(id_str) visible_gpu_list.append(all_gpu_list[gpu_id]) return visible_gpu_list
[docs]def get_compute_capability(gpu_id): """ Get compute capability of GPU with ID `gpu_id`. Returns a float (e.g. `6.1`). """ # nvidia-smi allows querying compute capability only for more recent driver versions # (couldn't find the required driver version, sometime around CUDA toolkit 11.6) command = "nvidia-smi --query-gpu=compute_cap --format=csv,noheader" try: compute_capability_list = _run_command_with_output(command).split("\n") compute_capability = float(compute_capability_list[gpu_id]) except RuntimeError as error: logger.debug(f"`{command} failed with RuntimeError: {error}") # Use `deviceQuery` for systems with older driver versions compute_capability = _get_compute_capability_with_device_query(gpu_id) return compute_capability
def _get_compute_capability_with_device_query(gpu_id): """ Use `deviceQuery` binary from CUDA samples to get compute capability of `gpu_id`. """ gpu_list = get_available_gpus() # Use preference for `deviceQuery` path if set device_query_path = prefs.devices.cuda_standalone.cuda_backend.device_query_path if device_query_path is None: # Look for it in the demo_suite directory cuda_path = get_cuda_path() device_query_path = os.path.join( cuda_path, "extras", "demo_suite", "deviceQuery" ) if not os.path.exists(device_query_path): # Note: If `deviceQuery` is not reliably available on user systems, we could # 1. use this github gist to scrape compute capabilities for GPU names from the # nvidia website: # https://gist.github.com/huitseeker/b2c79e5b763d58b06b9985de2b3c0d4d # 2. add a preference to point to the self-compiled binary? raise RuntimeError( f"GPU compute capability detection failed. Your NVIDIA driver version " f"doesn't support it and your CUDA toolkit installation has no " f"`deviceQuery` binary in `{device_query_path}`. You have the following " f"options to solve this: 1) update your NVIDIA driver or 2) manually " f"compile the `deviceQuery` binary from the CUDA Samples and set " f"`prefs.devices.cuda_standalone.cuda_backend.device_query_path` " f"accordingly or 3) disable automatic GPU detection via " f"`prefs.devices.cuda_standalone.cuda_backend.detect_gpu = False`. See " f"Brian2CUDA documentations for more details." ) else: logger.info( "Path to `deviceQuery` binary set via " "`prefs.devices.cuda_standalone.cuda_backend.device_query_path = " f"{device_query_path}`" ) # Allow home directory as `~` in path device_query_path = os.path.expanduser(device_query_path) if not os.path.exists(device_query_path): raise RuntimeError( f"Couldn't find `{device_query_path}` binary to detect the compute " "capability of your GPU. You set it via " "`prefs.devices.cuda_standalone.cuda_backend.device_query_path`" ) device_query_output = _run_command_with_output(device_query_path) lines = device_query_output.split("\n") compute_capability = None for i, line in enumerate(lines): if line.startswith("Device "): # example line: # `Device 0: "GeForce MX150"` this_gpu_id = int(line[7]) # "Device i ..." <- i in position 7 if this_gpu_id == gpu_id: # Get GPU name: word in quotation gpu_name = re.findall(r'\"(.+?)\"', line)[0] # Make sure we got the right GPU here assert gpu_list[gpu_id] == gpu_name # The compute capability is shown 2 lines after the "Device ..." line # Example line: # ` CUDA Capability Major/Minor version number: 6.1` compute_capability_line = lines[i + 2] assert compute_capability_line.strip().startswith( "CUDA Capability Major/Minor version number" ), f"Unexpected line parsed: {compute_capability_line}" # Last 3 chars are the compute capability major = int(compute_capability_line[-3]) minor = int(compute_capability_line[-1]) # Turn into float compute_capability = major + 0.1 * minor return compute_capability
[docs]def get_best_gpu(): """ Get the "best" GPU available. This currently chooses the GPU with highest compute capability and lowest GPU ID (as reported by `nvidia-smi`) """ gpu_list = get_available_gpus() best_gpu_id = 0 best_compute_capability = 0 for gpu_id, gpu, in enumerate(gpu_list): compute_capability = get_compute_capability(gpu_id) if compute_capability > best_compute_capability: best_compute_capability = compute_capability best_gpu_id = gpu_id return best_gpu_id, best_compute_capability
if __name__ == "__main__": print(get_best_gpu()) #a = nvidia_smi() #print(a)