Source code for brian2cuda.utils.gputools

Tools to get information about available GPUs.

import os
import subprocess
import shutil
import shlex
import re
import distutils

from brian2.core.preferences import prefs, PreferenceError
from brian2.codegen.cpp_prefs import get_compiler_and_args
from brian2.utils.logger import get_logger
from brian2cuda.utils.logger import report_issue_message

logger = get_logger("brian2.devices.cuda_standalone")

# To list all GPUs: nvidia-smi -L

# Some code here is adapted from

# To minimize running external commands (`nvidia-smi`, `nvcc`, `deviceQuery`), we define
# these global variables that are computed from the external commands. This way we only
# run them once and whenever they are needed again, we use the global variables defined
# here.
_cuda_installation = {
    "cuda_path": None,
    "nvcc_path": None,
    "runtime_version": None,

_gpu_selection = {
    "available_gpus": None,
    "selected_gpu_id": None,
    "selected_gpu_compute_capability": None,

[docs]def get_cuda_path(): """ Detect the path to the CUDA installation (e.g. '/usr/local/cuda'). This takes into account user defined environmental variable `CUDA_PATH` and preference `prefs.devices.cuda_standalone.cuda_backend.cuda_path`. """ # If cuda_path was already detected, reuse the global variable global _cuda_installation if _cuda_installation["cuda_path"] is None: cuda_path, detected_from = _get_cuda_path() _check_cuda_path(cuda_path, detected_from) _cuda_installation["cuda_path"] = cuda_path return _cuda_installation["cuda_path"]
[docs]def get_nvcc_path(): """Return the path to the `nvcc` binary.""" # If nvcc_path was already detected, reuse the global variable global _cuda_installation if _cuda_installation["nvcc_path"] is None: _cuda_installation["nvcc_path"] = _get_nvcc_path() return _cuda_installation["nvcc_path"]
[docs]def get_cuda_runtime_version(): """Return CUDA runtime version (as float, e.g. `11.2`)""" # If runtime_version was already detected, reuse the global variable global _cuda_installation if _cuda_installation["runtime_version"] is None: _cuda_installation["runtime_version"] = _get_cuda_runtime_version() return _cuda_installation["runtime_version"]
[docs]def get_cuda_installation(): """Return new dictionary of cuda installation variables""" cuda_installation = { 'cuda_path': get_cuda_path(), 'nvcc_path': get_nvcc_path(), 'runtime_version': get_cuda_runtime_version(), } global _cuda_installation _assert_keys_equal(cuda_installation, _cuda_installation) return cuda_installation
[docs]def get_gpu_selection(): """Return dictionary of selected gpu variable""" gpu_id, compute_capability = select_gpu() gpu_selection = { 'available_gpus': get_available_gpus(), 'selected_gpu_id': gpu_id, 'selected_gpu_compute_capability': compute_capability, } global _gpu_selection _assert_keys_equal(gpu_selection, _gpu_selection) return gpu_selection
[docs]def get_available_gpus(): """ Return list of names of available GPUs, sorted by GPU ID as reported in `nvidia-smi` """ global _gpu_selection if _gpu_selection["available_gpus"] is None: _gpu_selection["available_gpus"] = _get_available_gpus() return _gpu_selection["available_gpus"]
[docs]def select_gpu(): """ Select GPU for simulation, based on user preference `prefs.devices.cuda_standalone.cuda_backend.gpu_id` or (if not provided) pick the GPU with highest compute capability. Returns tuple of (gpu_id, compute_capability) of type (int, float). """ global _gpu_selection if _gpu_selection["selected_gpu_id"] is None: assert _gpu_selection["selected_gpu_compute_capability"] is None gpu_id, compute_capability = _select_gpu() _gpu_selection["selected_gpu_id"] = gpu_id _gpu_selection["selected_gpu_compute_capability"] = compute_capability return ( _gpu_selection["selected_gpu_id"], _gpu_selection["selected_gpu_compute_capability"] )
[docs]def reset_cuda_installation(): """ Reset detected CUDA installation. This will detect the CUDA installation again when it is needed. """ global _cuda_installation for key in _cuda_installation.keys(): _cuda_installation[key] = None
[docs]def reset_gpu_selection(): """Reset selected GPU. This will select a new GPU the next time it is needed.""" global _gpu_selection for key in _gpu_selection.keys(): _gpu_selection[key] = None
[docs]def restore_cuda_installation(cuda_installation): """Set global cuda installation dictionary to `cuda_installation`""" global _cuda_installation if sorted(_cuda_installation.keys()) != sorted(cuda_installation.keys()): raise KeyError( "`cuda_installation` has to have the following keys: {}. Got instead: " "{}".format( sorted(cuda_installation.keys()), sorted(_cuda_installation.keys()) ) ) _cuda_installation.update(cuda_installation)
[docs]def restore_gpu_selection(gpu_selection): """Set global gpu selection dictionary to `gpu_selection`""" global _gpu_selection if sorted(_gpu_selection.keys()) != sorted(gpu_selection.keys()): raise KeyError( "`gpu_selection` has to have the following keys: {}. Got instead: " "{}".format(sorted(gpu_selection.keys()), sorted(_gpu_selection.keys())) ) _gpu_selection.update(gpu_selection)
def _assert_keys_equal(dict1, dict2): keys1 = sorted(dict1.keys()) keys2 = sorted(dict2.keys()) assert keys1 == keys2, f"{keys1} != {keys2}" def _get_cuda_path(): # Use preference if set cuda_path_pref = prefs.devices.cuda_standalone.cuda_backend.cuda_path if cuda_path_pref is not None: f"CUDA installation directory given via preference " f"`prefs.devices.cuda_standalone.cuda_backend.cuda_path={cuda_path_pref}`" ) # Allow home directory as `~` in path cuda_path_pref = os.path.expanduser(cuda_path_pref) return (cuda_path_pref, 'pref') # Use environment variable if set cuda_path = os.environ.get("CUDA_PATH", "") # Nvidia default on Windows if os.path.exists(cuda_path): "CUDA installation directory given via environment variable `CUDA_PATH={}`" "".format(cuda_path) ) return (cuda_path, 'env') # Use nvcc path if `nvcc` binary in PATH nvcc_path = shutil.which("nvcc") if nvcc_path is not None: cuda_path_nvcc = os.path.dirname(os.path.dirname(nvcc_path)) "CUDA installation directory detected via location of `nvcc` binary: {}" "".format(cuda_path_nvcc) ) return (cuda_path_nvcc, 'nvcc') # Use standard location /usr/local/cuda if os.path.exists("/usr/local/cuda"): cuda_path_usr = "/usr/local/cuda" f"CUDA installation directory found in standard location: {cuda_path_usr}" ) return (cuda_path_usr, 'default') # Use standard location /opt/cuda if os.path.exists("/opt/cuda"): cuda_path_opt = "/opt/cuda" f"CUDA installation directory found in standard location: {cuda_path_opt}" ) return (cuda_path_opt, 'default') # Raise error if cuda path not found raise RuntimeError( "Couldn't find the CUDA installation. Please set the preference " "`prefs.devices.cuda_standalone.cuda_backend.cuda_path` or the environment " "variable `CUDA_PATH` to point to your CUDA installation directory (this " "should be the directory where `./bin/nvcc` is located, e.g. `/usr/local/cuda`)" ) def _check_cuda_path(cuda_path, detected_from): # Trigger nvcc path detection now to raise an error if it isn't found nvcc_path = _get_nvcc_path(cuda_path=cuda_path) if not os.path.exists(nvcc_path): # If we detected the cuda_path based on nvcc binary, this should not happen assert detected_from != "nvcc", report_issue_message msg = f"Couldn't find `nvcc` binary in {nvcc_path}." if detected_from == "prefs": msg += ( f" Are you sure your " "prefs.devices.cuda_standalone.cuda_backend.cuda_path preference " "is correct?" ) elif detected_from == "env": msg += f" Are you sure your CUDA_PATH environment variable is correct?" if prefs.devices.cuda_standalone.cuda_backend.detect_cuda: raise RuntimeError(msg) else: logger.warn(msg) def _get_nvcc_path(cuda_path=None): """ Get the nvcc path from the CUDA installation path (path/to/cuda/bin/nvcc) """ # TODO: Check if NVCC is specific to cupy and if we want to support it? # If so, make sure cuda_path and nvcc_path fit together, see: # #nvcc = os.environ.get("NVCC", None) #if nvcc: # return distutils.util.split_quoted(nvcc) if cuda_path is None: cuda_path = get_cuda_path() compiler, _ = get_compiler_and_args() if compiler == "msvc": # Windows nvcc_bin = "bin/nvcc.exe" else: # Unix nvcc_bin = "bin/nvcc" nvcc_path = os.path.join(cuda_path, nvcc_bin) return nvcc_path def _get_cuda_runtime_version(): """ Get CUDA runtime version """ version_pref = prefs.devices.cuda_standalone.cuda_backend.cuda_runtime_version if version_pref is not None: # CUDA runtime version set via preference return version_pref # Get runtime Version from `nvcc --verion` try: nvcc_path = get_nvcc_path() except RuntimeError as error: raise RuntimeError( "Couldn't detect CUDA runtime version. You can specify it via " "`prefs.devices.cuda_standalone.cuda_backend.cuda_runtime_version`" ) from error nvcc_output = _run_command_with_output(nvcc_path, "--version") nvcc_lines = nvcc_output.split("\n") # version_line example: "Cuda compilation tools, release 11.2, V11.2.67" version_line = nvcc_lines[3] assert version_line.startswith("Cuda compilation tools, release") # release_str example: "release 11.2" release_str = version_line.split(", ")[1] # runtime_version example: 11.2 runtime_version_str = release_str.split(" ")[1] # return version as float return float(runtime_version_str) def _select_gpu(): gpu_id = prefs.devices.cuda_standalone.cuda_backend.gpu_id compute_capability = prefs.devices.cuda_standalone.cuda_backend.compute_capability gpu_list = None if prefs.devices.cuda_standalone.cuda_backend.detect_gpus: if gpu_id is None: gpu_id, compute_capability = get_best_gpu() else: compute_capability = get_compute_capability(gpu_id) gpu_list = get_available_gpus() else: "Automatic detection of GPU names and compute capabilities disabled, using " "manual preferences" ) if gpu_id is None or compute_capability is None: raise PreferenceError( "Got `prefs.devices.cuda_standalone.cuda_backend.detect_gpus` == `False`. Without GPU detection, " "you need to set `prefs.devices.cuda_standalone.cuda_backend.gpu_id` and " "`prefs.devices.cuda_standalone.cuda_backend.compute_capability` (got " "`{prefs.devices.cuda_standalone.cuda_backend.gpu_id}` and " "`{prefs.devices.cuda_standalone.cuda_backend.compute_capability}`).".format( prefs=prefs ) ) gpu_name = "" if gpu_list is not None: gpu_name = f" ({gpu_list[gpu_id]})" f"Compiling device code for GPU {gpu_id}{gpu_name}" ) return gpu_id, compute_capability def _run_command_with_output(command, *args): """ Return the stdout from `command` run in a subprocess and produce meaningful error message if it fails. If `args` is empty, `command` can be a string with multiple arguments (e.g. `ls -l -a`). If `args` are given, `command` has to be just the binary (e.g. `ls`) and each `args` item needs to be a single argument. Examples -------- >>> _run_command_with_output("ls -a -l") >>> _run_command_with_output("ls", "-a", "-l") """ if not args: command_split = shlex.split(command) else: command_split = [command] + list(args) try: output = subprocess.check_output(command_split, encoding='UTF-8') except subprocess.CalledProcessError as err: raise RuntimeError( "Running `{binary}` failed with error code {err.returncode}: {err.output}" "".format(binary=command_split[0], err=err) ) except FileNotFoundError as err: raise FileNotFoundError(f"Binary not found: `{command_split[0]}`") from err return output def _get_available_gpus(): """ Detect available GPUs and return a list of their names, where list index corresponds to GPU id. """ if not prefs.devices.cuda_standalone.cuda_backend.detect_gpus: logger.debug("GPU detection is disabled, can't get available GPUs.") return None command = "nvidia-smi -L" try: gpu_info_lines = _run_command_with_output(command).split("\n") except (RuntimeError, FileNotFoundError) as excepted_error: new_error = RuntimeError( f"Running `{command}` failed. This typically means that you have no " f"NVIDIA driver installed. Are you sure there is an NVIDIA GPU on this " f"machine?" #"If `nvidia-smi` is not available in your system, you can disable " #"automatic detection of GPU name and compute capability by setting " #"`prefs.devices.cuda_standalone.cuda_backend.detect_gpus` = `False`" ) raise new_error from excepted_error if gpu_info_lines and gpu_info_lines[0].startswith("No devices found"): raise RuntimeError( "`nvidia-smi` couldn't find any GPUs on your system. Are you sure you have " "a GPU? If you are trying to generate the CUDA standalone code on a system " "without GPU, you have to set " "`prefs.devices.cuda_standalone.cuda_backend.detect_gpus = False` " ) all_gpu_list = [] if gpu_info_lines is not None: for i, gpu_info in enumerate(gpu_info_lines): if gpu_info == "": # last list item is empty continue # `gpu_info` example: # "GPU 0: GeForce MX150 (UUID: GPU-8abe566f-c211-11c1-7b73-8103bfd30198)" # Remove the UUID part gpu_info = gpu_info.split(" (UUID")[0] # Split ID and NAME parts try: id_str, gpu_name = gpu_info.split(": ") except ValueError as err: raise AssertionError(f"gpu_info: '{gpu_info}', gpu_info_lines: '{gpu_info_lines}', err: '{err}'") assert id_str.startswith("GPU ") gpu_id = id_str[4] assert int(gpu_id) == i all_gpu_list.append(gpu_name) visible_gpu_list = all_gpu_list if "CUDA_VISIBLE_DEVICES" in os.environ: visible_gpu_list = [] cuda_visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",") for id_str in cuda_visible_devices: gpu_id = int(id_str) visible_gpu_list.append(all_gpu_list[gpu_id]) return visible_gpu_list
[docs]def get_compute_capability(gpu_id): """ Get compute capability of GPU with ID `gpu_id`. Returns a float (e.g. `6.1`). """ # nvidia-smi allows querying compute capability only for more recent driver versions # (couldn't find the required driver version, sometime around CUDA toolkit 11.6) command = "nvidia-smi --query-gpu=compute_cap --format=csv,noheader" try: compute_capability_list = _run_command_with_output(command).split("\n") compute_capability = float(compute_capability_list[gpu_id]) except RuntimeError as error: logger.debug(f"`{command} failed with RuntimeError: {error}") # Use `deviceQuery` for systems with older driver versions compute_capability = _get_compute_capability_with_device_query(gpu_id) return compute_capability
def _get_compute_capability_with_device_query(gpu_id): """ Use `deviceQuery` binary from CUDA samples to get compute capability of `gpu_id`. """ gpu_list = get_available_gpus() # Use preference for `deviceQuery` path if set device_query_path = prefs.devices.cuda_standalone.cuda_backend.device_query_path if device_query_path is None: # Look for it in the demo_suite directory cuda_path = get_cuda_path() device_query_path = os.path.join( cuda_path, "extras", "demo_suite", "deviceQuery" ) if not os.path.exists(device_query_path): # Note: If `deviceQuery` is not reliably available on user systems, we could # 1. use this github gist to scrape compute capabilities for GPU names from the # nvidia website: # # 2. add a preference to point to the self-compiled binary? raise RuntimeError( f"GPU compute capability detection failed. Your NVIDIA driver version " f"doesn't support it and your CUDA toolkit installation has no " f"`deviceQuery` binary in `{device_query_path}`. You have the following " f"options to solve this: 1) update your NVIDIA driver or 2) manually " f"compile the `deviceQuery` binary from the CUDA Samples and set " f"`prefs.devices.cuda_standalone.cuda_backend.device_query_path` " f"accordingly or 3) disable automatic GPU detection via " f"`prefs.devices.cuda_standalone.cuda_backend.detect_gpu = False`. See " f"Brian2CUDA documentations for more details." ) else: "Path to `deviceQuery` binary set via " "`prefs.devices.cuda_standalone.cuda_backend.device_query_path = " f"{device_query_path}`" ) # Allow home directory as `~` in path device_query_path = os.path.expanduser(device_query_path) if not os.path.exists(device_query_path): raise RuntimeError( f"Couldn't find `{device_query_path}` binary to detect the compute " "capability of your GPU. You set it via " "`prefs.devices.cuda_standalone.cuda_backend.device_query_path`" ) device_query_output = _run_command_with_output(device_query_path) lines = device_query_output.split("\n") compute_capability = None for i, line in enumerate(lines): if line.startswith("Device "): # example line: # `Device 0: "GeForce MX150"` this_gpu_id = int(line[7]) # "Device i ..." <- i in position 7 if this_gpu_id == gpu_id: # Get GPU name: word in quotation gpu_name = re.findall(r'\"(.+?)\"', line)[0] # Make sure we got the right GPU here assert gpu_list[gpu_id] == gpu_name # The compute capability is shown 2 lines after the "Device ..." line # Example line: # ` CUDA Capability Major/Minor version number: 6.1` compute_capability_line = lines[i + 2] assert compute_capability_line.strip().startswith( "CUDA Capability Major/Minor version number" ), f"Unexpected line parsed: {compute_capability_line}" # Last 3 chars are the compute capability major = int(compute_capability_line[-3]) minor = int(compute_capability_line[-1]) # Turn into float compute_capability = major + 0.1 * minor return compute_capability
[docs]def get_best_gpu(): """ Get the "best" GPU available. This currently chooses the GPU with highest compute capability and lowest GPU ID (as reported by `nvidia-smi`) """ gpu_list = get_available_gpus() best_gpu_id = 0 best_compute_capability = 0 for gpu_id, gpu, in enumerate(gpu_list): compute_capability = get_compute_capability(gpu_id) if compute_capability > best_compute_capability: best_compute_capability = compute_capability best_gpu_id = gpu_id return best_gpu_id, best_compute_capability
if __name__ == "__main__": print(get_best_gpu()) #a = nvidia_smi() #print(a)