// Copyright 2024-2025 Joshua Bakita // Obtain and print the correspondence between TPCs and GPCs for a given GPU. // // Known issues: // - If CUDA cannot see the same number of GPUs as the nvdebug kernel module, // the passed GPU ID may not properly correspond to to an ID an CUDA. This // will cause us to fail to initialize a context on the right device, and // may cause the test to terminate due to no initialized context. This should // only happen if some of the attached GPUs are too old or new for CUDA. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include "libsmctrl.h" int main(int argc, char** argv) { uint32_t num_gpcs = 0, num_tpcs = 0; uint128_t* masks = NULL; int res, print_width, gpu_id; CUcontext ctx; // Optionally support specifying the GPU ID to query via an argument // Important: This GPU ID must match the ID used by the nvdebug module. See // the documentation on libsmctrl_get_gpc_info() for details. if (argc > 2 || (argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-h")))) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; } if (argc > 1) gpu_id = atoi(argv[1]); else gpu_id = 0; // Tell CUDA to use PCI device id ordering (to match nvdebug) putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID"); // Allow CUDA to see all devices (to better match nvdebug) unsetenv("CUDA_VISIBLE_DEVICES"); // A CUDA context is required before reading the topology information if ((res = cuInit(0))) { const char* name; cuGetErrorName(res, &name); fprintf(stderr, "%s: Unable to initialize CUDA, error %s\n", program_invocation_name, name); return 1; } if ((res = cuCtxCreate(&ctx, 0, gpu_id))) { const char* name; cuGetErrorName(res, &name); fprintf(stderr, "%s: Unable to create a CUDA context, error %s\n", program_invocation_name, name); return 1; } // Pull topology information from libsmctrl if ((res = libsmctrl_get_gpc_info_ext(&num_gpcs, &masks, gpu_id)) != 0) { error(0, res, "libsmctrl_get_gpc_info() failed"); if (res == ENOENT) fprintf(stderr, "%s: Is the nvdebug kernel module loaded?\n", program_invocation_name); if (res == EIO) fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name); return 1; } printf("%s: GPU%d has %d enabled GPCs.\n", program_invocation_name, gpu_id, num_gpcs); // Determine how wide the print should be (for pretty-printing) print_width = 0; for (int i = 0; i < num_gpcs; i++) { int shift = 0; while (masks[i] >> shift) shift++; if (shift > print_width) print_width = shift; } // Convert the width to a number of octets, rather than number of bits // (Result of integer divison, +1 if it does not evenly divide) print_width = print_width/4 + !!(print_width % 4); for (int i = 0; i < num_gpcs; i++) { // No built-in for 128-bit integers, so split it into two 64-bit ones int num_tpcs_local = __builtin_popcountl(masks[i]) + __builtin_popcountl(masks[i] >> 64); num_tpcs += num_tpcs_local; if (print_width > 16) printf("%s: Mask of %d TPCs associated with GPC %d: 0x%0*lx%016lx\n", program_invocation_name, num_tpcs_local, i, print_width - 16, (uint64_t)(masks[i] >> 64), (uint64_t)masks[i]); else printf("%s: Mask of %d TPCs associated with GPC %d: 0x%0*lx\n", program_invocation_name, num_tpcs_local, i, print_width, (uint64_t)masks[i]); } printf("%s: Total of %u enabled TPCs.\n", program_invocation_name, num_tpcs); return 0; }