aboutsummaryrefslogtreecommitdiffstats
path: root/device_info_procfs.c
blob: 168905fb66d3f66d6285632c2221b8ec62a95542 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#include "nvdebug.h"
#include <linux/seq_file.h> // For seq_* functions and types
#include <linux/uaccess.h> // For copy_to_user()

// Generic register printing function, used for PTOP_*_NUM registers (+more)
// @param f    File being read from. `data` field is register offset to read.
// @param buf  User buffer for result
// @param size Length of user buffer
// @param off  Requested offset. Updated by number of characters written.
// @return -errno on error, otherwise number of bytes written to *buf
// Note: Parent `data` field MUST be the GPU index
static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) {
	char out[16];
	int chars_written;
	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
	if (size < 16 || *off != 0)
		return 0;
	// 32 bit register will always take less than 16 characters to print
	chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f))));
	if (copy_to_user(buf, out, chars_written))
		printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);
	*off += chars_written;
	return chars_written;
}

static ssize_t nvdebug_reg_range_read(struct file *f, char __user *buf, size_t size, loff_t *off) {
	char out[12];
	int chars_written;
	uint32_t read, mask;
	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
	// See comment in nvdebug_entry.c to understand `union reg_range`
	union reg_range range;
	range.raw = (uintptr_t)PDE_DATA(file_inode(f));

	// "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters
	if (size < 12 || *off != 0)
		return 0;

	// Print bits `start_bit` to `stop_bit` from 32 bits at address `offset`
	if ((read = nvdebug_readl(g, range.offset)) == -1)
		return -EOPNOTSUPP;
	// Setup `mask` used to throw out unused upper bits
	mask = -1u >> (32 - range.stop_bit + range.start_bit);
	// Throw out unused lower bits via a shift, apply the mask, and print
	chars_written = scnprintf(out, 12, "%#0x\n", (read >> range.start_bit) & mask);
	if (copy_to_user(buf, out, chars_written))
		printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);
	*off += chars_written;
	return chars_written;
}

struct file_operations nvdebug_read_reg32_file_ops = {
	.read = nvdebug_reg32_read,
	.llseek = default_llseek,
};

// Generic mechanism used for printing a subset of bits from a register
// Please store a `union reg_range` rather than a `uintptr_t` in the PDE_DATA
struct file_operations nvdebug_read_reg_range_file_ops = {
	.read = nvdebug_reg_range_read,
	.llseek = default_llseek,
};

typedef struct {
	int idx; // Current index in the device_info table
	int length; // Length of device_info table (including unpopulated entries)
	int type_of_next_entry; // Only used on Ampere+ GPUs
	bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence
} device_info_iter;

//// ==v== PTOP_DEVICE_INFO ==v== ////

// Called to start or resume a sequence. Prior to 4.19, *pos is unreliable.
// Initializes iterator `iter` state and returns it. Ends sequence on NULL.
static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) {
	static device_info_iter iter;
	// If freshly starting a sequence, reset the iterator
	if (*pos == 0) {
		struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
		iter.idx = 0;
		iter.type_of_next_entry = 0;
		iter.has_next_entry = 0;
		// On Ampere+, the device_info table length can vary
		if (g->chip_id >= NV_CHIP_ID_AMPERE)
			iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
		else
			iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104;
	}
	// Number of possible info entries is fixed, and list is sparse, so stop
	// iterating only when all entries have been checked, rather than on the first
	// empty entry.
	if (iter.idx >= iter.length)
		return NULL;
	return &iter;
}

// Steps to next record. Returns `&iter` (address should not change)
// Calls show() on non-NULL return
static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw,
				       loff_t *pos) {
	device_info_iter *iter = (device_info_iter*)iter_raw;
	(*pos)++; // Required by seq interface
	// Number of possible info entries is fixed, and list is sparse, so stop
	// iterating only when all entries have been checked, rather than on the first
	// empty entry.
	if (++iter->idx >= iter->length)
		return NULL;
	// The info_type field is not available in the Ampere device_info data, so
	// it must be inferred. NOP for older devices (cheaper than another branch).
	// This has to be here (rather than in the show function) to support the
	// idempotence requirements of show() in the seq_file interface.
	iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0;
	return iter;
}

// Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error.
// Implementation of this function must be idempotent
static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) {
	device_info_iter *iter = (device_info_iter*)iter_raw;
	ptop_device_info_gk104_t curr_info;
	struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
	curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx));
	// Check for read errors
	if (curr_info.raw == -1)
		return -EIO;

	// Parse and print the data
	switch(curr_info.info_type) {
	case INFO_TYPE_DATA:
		// As of early 2022, only the ENUM2 format of this entry exists
		if (curr_info.is_not_enum2)
			break;
		seq_printf(s, "| BAR0 Base %#.8x\n"
			      "|           instance %d\n",
			curr_info.pri_base << 12, curr_info.inst_id);
		if (curr_info.fault_id_is_valid)
			seq_printf(s, "| Fault ID:        %3d\n", curr_info.fault_id);
		break;
	case INFO_TYPE_ENUM:
		if (curr_info.engine_is_valid)
			seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum);
		if (curr_info.runlist_is_valid)
			seq_printf(s, "| Runlist ID:       %2d\n", curr_info.runlist_enum);
		if (curr_info.intr_is_valid)
			seq_printf(s, "| Interrupt ID:     %2d\n", curr_info.intr_enum);
		if (curr_info.reset_is_valid)
			seq_printf(s, "| Reset ID:         %2d\n", curr_info.reset_enum);
		break;
	case INFO_TYPE_ENGINE_TYPE:
		seq_printf(s, "| Engine Type:      %2d (", curr_info.engine_type);
		if (curr_info.engine_type < ENGINE_TYPES_LEN)
			seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
		else
			seq_printf(s, "Unknown Historical)\n");
		break;
	case INFO_TYPE_NOT_VALID:
	default:
		// Device info records are sparse, so skip unset or unknown ones
		return 0;
	}

	// Draw a line between each device entry
	if (!curr_info.has_next_entry)
		seq_printf(s, "+---------------------+\n");
	return 0;
}

// Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error.
// Implementation of this function must be idempotent
static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) {
	device_info_iter *iter = (device_info_iter*)iter_raw;
	ptop_device_info_ga100_t curr_info;
	struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
	curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx));
	// Check for read errors
	if (curr_info.raw == -1)
		return -EIO;

	// Update tracking data only used by next(); allows preserving idempotence
	iter->has_next_entry = curr_info.has_next_entry;
	// Silently skip empty entries
	if (curr_info.raw == 0)
		return 0;
	// In nvdebug, an entry is considered invalid if it does not consist of at
	// least two rows. So, if this is the first row of an entry, but another row
	// is not indicated, this entry is invalid and should be skipped.
	if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) {
		printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
		return 0;
	}

	// Parse and print the data
	// Note: The goal of this interface is to present useful information to
	// a human user, NOT to provide a stable format for scripts to parse.
	// Because of this, we favor accurately printing the data in each entry,
	// rather than providing stable (if imperfectly correct) field names
	switch(iter->type_of_next_entry) {
	case 0:
		seq_printf(s, "| Engine Type:     %3d (", curr_info.engine_type);
		if (curr_info.engine_type < ENGINE_TYPES_LEN)
			seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
		else
			seq_printf(s, "Unknown, introduced post-Lovelace)\n");
		seq_printf(s, "|           instance %d\n", curr_info.inst_id);
		seq_printf(s, "| Fault ID:       %4d\n", curr_info.fault_id);
		break;
	case 1:
		seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 8);
		seq_printf(s, "| Reset ID:        %3d\n", curr_info.reset_id);
		seq_printf(s, "| Is Engine:         %1d\n", curr_info.is_engine);

		break;
	case 2:
		seq_printf(s, "| Runlist Eng. ID:   %1d\n", curr_info.rleng_id);
		// Theoretically, we could extract an ID from the runlist RAM
		seq_printf(s, "| RL Base:  %#.8x\n", curr_info.runlist_pri_base << 10);
		break;
	default:
		printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
	}

	// Draw a line between each device entry
	if (!curr_info.has_next_entry)
		seq_printf(s, "+---------------------+\n");
	return 0;
}

static void device_info_file_seq_stop(struct seq_file *s, void *idx) {
	// No cleanup needed
}

static struct seq_operations device_info_file_seq_ops = {
	.start = device_info_file_seq_start,
	.next = device_info_file_seq_next,
	.stop = device_info_file_seq_stop,
};

static int device_info_file_open(struct inode *inode, struct file *f) {
	if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE)
		device_info_file_seq_ops.show = device_info_file_seq_show_ga100;
	else
		device_info_file_seq_ops.show = device_info_file_seq_show_gk104;
	return seq_open(f, &device_info_file_seq_ops);
}

struct file_operations device_info_file_ops = {
	.open = device_info_file_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
};