1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
// Copyright 2024-2025 Joshua Bakita
// Obtain and print the correspondence between TPCs and GPCs for a given GPU.
//
// Known issues:
// - If CUDA cannot see the same number of GPUs as the nvdebug kernel module,
// the passed GPU ID may not properly correspond to to an ID an CUDA. This
// will cause us to fail to initialize a context on the right device, and
// may cause the test to terminate due to no initialized context. This should
// only happen if some of the attached GPUs are too old or new for CUDA.
#define _GNU_SOURCE
#include <cuda.h>
#include <error.h>
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "libsmctrl.h"
int main(int argc, char** argv) {
uint32_t num_gpcs = 0, num_tpcs = 0;
uint128_t* masks = NULL;
int res, print_width, gpu_id;
CUcontext ctx;
// Optionally support specifying the GPU ID to query via an argument
// Important: This GPU ID must match the ID used by the nvdebug module. See
// the documentation on libsmctrl_get_gpc_info() for details.
if (argc > 2 || (argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-h")))) {
fprintf(stderr, "Usage: %s <nvdebug GPU ID>\n", argv[0]);
return 1;
}
if (argc > 1)
gpu_id = atoi(argv[1]);
else
gpu_id = 0;
// Tell CUDA to use PCI device id ordering (to match nvdebug)
putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID");
// Allow CUDA to see all devices (to better match nvdebug)
unsetenv("CUDA_VISIBLE_DEVICES");
// A CUDA context is required before reading the topology information
if ((res = cuInit(0))) {
const char* name;
cuGetErrorName(res, &name);
fprintf(stderr, "%s: Unable to initialize CUDA, error %s\n", program_invocation_name, name);
return 1;
}
if ((res = cuCtxCreate(&ctx, 0, gpu_id))) {
const char* name;
cuGetErrorName(res, &name);
fprintf(stderr, "%s: Unable to create a CUDA context, error %s\n", program_invocation_name, name);
return 1;
}
// Pull topology information from libsmctrl
if ((res = libsmctrl_get_gpc_info_ext(&num_gpcs, &masks, gpu_id)) != 0) {
error(0, res, "libsmctrl_get_gpc_info() failed");
if (res == ENOENT)
fprintf(stderr, "%s: Is the nvdebug kernel module loaded?\n", program_invocation_name);
if (res == EIO)
fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name);
return 1;
}
printf("%s: GPU%d has %d enabled GPCs.\n", program_invocation_name, gpu_id, num_gpcs);
// Determine how wide the print should be (for pretty-printing)
print_width = 0;
for (int i = 0; i < num_gpcs; i++) {
int shift = 0;
while (masks[i] >> shift)
shift++;
if (shift > print_width)
print_width = shift;
}
// Convert the width to a number of octets, rather than number of bits
// (Result of integer divison, +1 if it does not evenly divide)
print_width = print_width/4 + !!(print_width % 4);
for (int i = 0; i < num_gpcs; i++) {
// No built-in for 128-bit integers, so split it into two 64-bit ones
int num_tpcs_local = __builtin_popcountl(masks[i]) + __builtin_popcountl(masks[i] >> 64);
num_tpcs += num_tpcs_local;
if (print_width > 16)
printf("%s: Mask of %d TPCs associated with GPC %d: 0x%0*lx%016lx\n",
program_invocation_name, num_tpcs_local, i, print_width - 16,
(uint64_t)(masks[i] >> 64), (uint64_t)masks[i]);
else
printf("%s: Mask of %d TPCs associated with GPC %d: 0x%0*lx\n",
program_invocation_name, num_tpcs_local, i, print_width,
(uint64_t)masks[i]);
}
printf("%s: Total of %u enabled TPCs.\n", program_invocation_name, num_tpcs);
return 0;
}
|