1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
// Copyright 2024 Joshua Bakita
#include "nvdebug_linux.h"
// Maximum number of LCEs that we will print
#define MAX_LCES 32
/* Which Logical Copy Engine (LCE) maps to a given Physical Copy Engine (PCE)?
@param pce_id PCE index
@return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
*/
int get_lce_for_pce(struct nvdebug_state *g, uint8_t pce_id) {
int res;
// LCEs only exist on Pascal+
if (g->chip_id < NV_CHIP_ID_PASCAL)
return -EOPNOTSUPP;
if (g->chip_id < NV_CHIP_ID_VOLTA) {
uint32_t config = nvdebug_readl(g, NV_LCE_FOR_PCE_GP100);
if (config == -1)
return -EIO;
// On Pascal, two PCE configurations are packed per-byte.
res = (config >> (pce_id * 4)) & 0xf;
// 0x7 is the flag value for unconfigured on Pascal
if (res == 0x7)
return -ENODEV;
} else if (g->chip_id < NV_CHIP_ID_AMPERE) {
res = nvdebug_readl(g, NV_LCE_FOR_PCE_GV100(pce_id));
// On the Titan V (GV100), bogus 0xbadf3000 observed if the GPU has yet to be
// used since reset
if (res == -1 || res == 0xbadf3000)
return -EIO;
} else {
// Works through at least Ada
res = nvdebug_readl(g, NV_LCE_FOR_PCE_GA100(pce_id));
if (res == -1)
return -EIO;
}
// At least on Volta through Ampere, 0xf is a flag value for unconfigured.
if (res == 0xf)
return -ENODEV;
return res;
}
/* Which LCE does this GRaphics Copy Engine (GRCE) map to?
@param grce_id GRCE index
@return LCE index if mapping, -ENODEV on no mapping, and -errno otherwise
*/
int get_shared_lce_for_grce(struct nvdebug_state *g, uint8_t grce_id) {
int res;
uint32_t config;
// LCEs only exist on Pascal+
if (g->chip_id < NV_CHIP_ID_PASCAL)
return -EOPNOTSUPP;
if (g->chip_id < NV_CHIP_ID_VOLTA) {
if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(0))) == -1)
return -EIO;
// One config per byte; bit 4 flags if shared
if (((config >> (grce_id * 8)) & 0x8) == 0)
return -ENODEV;
// lower 3 bits contain the mapping
res = (config >> (grce_id * 8)) & 0x7;
} else if (g->chip_id < NV_CHIP_ID_AMPERE) {
if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GP100(grce_id))) == -1)
return -EIO;
// Only the lower 4 bits contain the mapping
res = config & 0xf;
if (res == 0xf)
return -ENODEV;
} else {
// Works through at least Ada
if ((config = nvdebug_readl(g, NV_GRCE_FOR_CE_GA100(grce_id))) == -1)
return -EIO;
// Only the lower 4 bits contain the mapping
res = config & 0xf;
if (res == 0xf)
return -ENODEV;
}
return res;
}
typedef struct {
enum {INVALID_CE, SHARED_LCE, PCE} type;
uint8_t ce;
} lce2pce_entry_t;
/* Which PCE/LCE is each LCE mapped to?
@param lce2pce Array of lce2pce_entry_t to store mappings in
@param lce2pce_len Number of array entries; at least 16 recommended
@return -errno on error, 0 on success.
*/
int get_pces_for_lces(struct nvdebug_state *g, lce2pce_entry_t *lce2pce, int lce2pce_len) {
uint32_t pce_id, grce_id, ce_pce_map;
memset(lce2pce, INVALID_CE, lce2pce_len * sizeof(lce2pce_entry_t));
if ((ce_pce_map = nvdebug_readl(g, NV_CE_PCE_MAP)) == -1)
return -EIO;
// Pull configuration for LCEs which directly map to a PCE
for (pce_id = 0; pce_id < NV_CE_PCE_MAP_SIZE; pce_id++) {
int lce;
// Skip reading configuration if PCE is disabled
if (((1 << pce_id) & ce_pce_map) == 0)
continue;
lce = get_lce_for_pce(g, pce_id);
if (lce == -ENODEV)
continue;
if (lce < 0)
return lce;
if (lce > lce2pce_len)
return -ERANGE;
lce2pce[lce].type = PCE;
lce2pce[lce].ce = pce_id;
}
// Pull configuration for LCEs which share a PCE with another LCE
// GRCE0 is synonymous with LCE0 (GRCE1 and LCE1 likewise)
// Only aware of up to two GRCEs per GPU
for (grce_id = 0; grce_id < NV_GRCE_MAX; grce_id++) {
int shared_lce;
// GRCEs with a PCE already associated do not share with an LCE
if (lce2pce[grce_id].type != INVALID_CE)
continue;
shared_lce = get_shared_lce_for_grce(g, grce_id);
// Each GRCE should be associated with a PCE or shared LCE
if (shared_lce == -ENODEV) {
printk(KERN_WARNING "[nvdebug] GRCE%d unconfigured.\n", grce_id);
continue;
}
if (shared_lce < 0)
return shared_lce;
lce2pce[grce_id].type = SHARED_LCE;
lce2pce[grce_id].ce = shared_lce;
}
return 0;
}
typedef struct {
int idx; // Index of LCE to print
lce2pce_entry_t lce2pce[MAX_LCES]; // MAX_LCES-length table from get_pces_for_lces()
} copy_topology_iter_t;
// The *_seq_* functions in this file follow the patterns in
// device_info_procfs.c. See there for comments on implementation.
static void *copy_topology_file_seq_start(struct seq_file *s, loff_t *pos) {
struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
static copy_topology_iter_t iter;
int err;
if (*pos == 0)
iter.idx = 0;
if ((err = get_pces_for_lces(g, iter.lce2pce, MAX_LCES)) < 0)
return ERR_PTR(err);
if (iter.idx >= MAX_LCES)
return NULL;
return &iter;
}
static void* copy_topology_file_seq_next(struct seq_file *s, void *iter_raw,
loff_t *pos) {
copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
(*pos)++; // Required by seq interface
if (++iter->idx >= MAX_LCES)
return NULL;
return iter;
}
static int copy_topology_file_seq_show(struct seq_file *s, void *iter_raw) {
copy_topology_iter_t *iter = (copy_topology_iter_t*)iter_raw;
lce2pce_entry_t entry = iter->lce2pce[iter->idx];
if (entry.type == INVALID_CE)
return 0;
// First half: The LCE/GRCE in question
if (iter->idx >= NV_GRCE_MAX)
seq_printf(s, "LCE%02d -> ", iter->idx);
else
seq_printf(s, "GRCE%d -> ", iter->idx);
// Second half: The PCE/LCE/GRCE that the LCE/GRCE in question is mapped to
if (entry.type == PCE)
seq_printf(s, "PCE%02d\n", entry.ce);
else if (entry.ce >= NV_GRCE_MAX) // Shared LCE
seq_printf(s, "LCE%02d\n", entry.ce);
else // Shared GRCE
seq_printf(s, "GRCE%d\n", entry.ce);
return 0;
}
static void copy_topology_file_seq_stop(struct seq_file *s, void *lce2pce) {
// No cleanup needed
}
static const struct seq_operations copy_topology_file_seq_ops = {
.start = copy_topology_file_seq_start,
.next = copy_topology_file_seq_next,
.show = copy_topology_file_seq_show,
.stop = copy_topology_file_seq_stop,
};
static int copy_topology_file_open(struct inode *inode, struct file *f) {
return seq_open(f, ©_topology_file_seq_ops);
}
struct file_operations copy_topology_file_ops = {
.open = copy_topology_file_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
|