aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.vnet.ibm.com>2018-01-23 06:31:41 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-23 19:42:58 -0500
commit5ef3166e8a32d78dfa985a323aa45ed485ff663a (patch)
treee1321e75dc2f802294f94d71aff7509057d01077
parent2cb3d64b26984703a6bb80e66adcc3727ad37f9f (diff)
ocxl: Driver code for 'generic' opencapi devices
Add an ocxl driver to handle generic opencapi devices. Of course, it's not meant to be the only opencapi driver, any device is free to implement its own. But if a host application only needs basic services like attaching to an opencapi adapter, have translation faults handled or allocate AFU interrupts, it should suffice. The AFU config space must follow the opencapi specification and use the expected vendor/device ID to be seen by the generic driver. The driver exposes the device AFUs as a char device in /dev/ocxl/ Note that the driver currently doesn't handle memory attached to the opencapi device. Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Signed-off-by: Alastair D'Silva <alastair@d-silva.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--drivers/misc/ocxl/config.c712
-rw-r--r--drivers/misc/ocxl/context.c230
-rw-r--r--drivers/misc/ocxl/file.c398
-rw-r--r--drivers/misc/ocxl/link.c603
-rw-r--r--drivers/misc/ocxl/main.c33
-rw-r--r--drivers/misc/ocxl/ocxl_internal.h193
-rw-r--r--drivers/misc/ocxl/pasid.c107
-rw-r--r--drivers/misc/ocxl/pci.c585
-rw-r--r--drivers/misc/ocxl/sysfs.c142
-rw-r--r--include/uapi/misc/ocxl.h40
10 files changed, 3043 insertions, 0 deletions
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644
index 000000000000..ea8cca50ea06
--- /dev/null
+++ b/drivers/misc/ocxl/config.c
@@ -0,0 +1,712 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/pci.h>
4#include <asm/pnv-ocxl.h>
5#include <misc/ocxl-config.h>
6#include "ocxl_internal.h"
7
8#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
9#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
10
11#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0)
12#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0)
13#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0)
14#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0)
15
16#define OCXL_DVSEC_TEMPL_VERSION 0x0
17#define OCXL_DVSEC_TEMPL_NAME 0x4
18#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C
19#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20
20#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28
21#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30
22#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38
23#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C
24#define OCXL_DVSEC_TEMPL_WWID 0x40
25
26#define OCXL_MAX_AFU_PER_FUNCTION 64
27#define OCXL_TEMPL_LEN 0x58
28#define OCXL_TEMPL_NAME_LEN 24
29#define OCXL_CFG_TIMEOUT 3
30
31static int find_dvsec(struct pci_dev *dev, int dvsec_id)
32{
33 int vsec = 0;
34 u16 vendor, id;
35
36 while ((vsec = pci_find_next_ext_capability(dev, vsec,
37 OCXL_EXT_CAP_ID_DVSEC))) {
38 pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
39 &vendor);
40 pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
41 if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
42 return vsec;
43 }
44 return 0;
45}
46
47static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
48{
49 int vsec = 0;
50 u16 vendor, id;
51 u8 idx;
52
53 while ((vsec = pci_find_next_ext_capability(dev, vsec,
54 OCXL_EXT_CAP_ID_DVSEC))) {
55 pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
56 &vendor);
57 pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
58
59 if (vendor == PCI_VENDOR_ID_IBM &&
60 id == OCXL_DVSEC_AFU_CTRL_ID) {
61 pci_read_config_byte(dev,
62 vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
63 &idx);
64 if (idx == afu_idx)
65 return vsec;
66 }
67 }
68 return 0;
69}
70
71static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
72{
73 u16 val;
74 int pos;
75
76 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
77 if (!pos) {
78 /*
79 * PASID capability is not mandatory, but there
80 * shouldn't be any AFU
81 */
82 dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
83 fn->max_pasid_log = -1;
84 goto out;
85 }
86 pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
87 fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
88
89out:
90 dev_dbg(&dev->dev, "PASID capability:\n");
91 dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log);
92 return 0;
93}
94
95static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
96{
97 int pos;
98
99 pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
100 if (!pos && PCI_FUNC(dev->devfn) == 0) {
101 dev_err(&dev->dev, "Can't find TL DVSEC\n");
102 return -ENODEV;
103 }
104 if (pos && PCI_FUNC(dev->devfn) != 0) {
105 dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
106 return -ENODEV;
107 }
108 fn->dvsec_tl_pos = pos;
109 return 0;
110}
111
112static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
113{
114 int pos, afu_present;
115 u32 val;
116
117 pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
118 if (!pos) {
119 dev_err(&dev->dev, "Can't find function DVSEC\n");
120 return -ENODEV;
121 }
122 fn->dvsec_function_pos = pos;
123
124 pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
125 afu_present = EXTRACT_BIT(val, 31);
126 if (!afu_present) {
127 fn->max_afu_index = -1;
128 dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
129 goto out;
130 }
131 fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
132
133out:
134 dev_dbg(&dev->dev, "Function DVSEC:\n");
135 dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index);
136 return 0;
137}
138
139static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
140{
141 int pos;
142
143 if (fn->max_afu_index < 0) {
144 fn->dvsec_afu_info_pos = -1;
145 return 0;
146 }
147
148 pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
149 if (!pos) {
150 dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
151 return -ENODEV;
152 }
153 fn->dvsec_afu_info_pos = pos;
154 return 0;
155}
156
157static int read_dvsec_vendor(struct pci_dev *dev)
158{
159 int pos;
160 u32 cfg, tlx, dlx;
161
162 /*
163 * vendor specific DVSEC is optional
164 *
165 * It's currently only used on function 0 to specify the
166 * version of some logic blocks. Some older images may not
167 * even have it so we ignore any errors
168 */
169 if (PCI_FUNC(dev->devfn) != 0)
170 return 0;
171
172 pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
173 if (!pos)
174 return 0;
175
176 pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
177 pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
178 pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
179
180 dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
181 dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg);
182 dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx);
183 dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx);
184 return 0;
185}
186
187static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
188{
189 if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
190 dev_err(&dev->dev,
191 "AFUs are defined but no PASIDs are requested\n");
192 return -EINVAL;
193 }
194
195 if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
196 dev_err(&dev->dev,
197 "Max AFU index out of architectural limit (%d vs %d)\n",
198 fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
199 return -EINVAL;
200 }
201 return 0;
202}
203
204int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
205{
206 int rc;
207
208 rc = read_pasid(dev, fn);
209 if (rc) {
210 dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
211 return -ENODEV;
212 }
213
214 rc = read_dvsec_tl(dev, fn);
215 if (rc) {
216 dev_err(&dev->dev,
217 "Invalid Transaction Layer DVSEC configuration: %d\n",
218 rc);
219 return -ENODEV;
220 }
221
222 rc = read_dvsec_function(dev, fn);
223 if (rc) {
224 dev_err(&dev->dev,
225 "Invalid Function DVSEC configuration: %d\n", rc);
226 return -ENODEV;
227 }
228
229 rc = read_dvsec_afu_info(dev, fn);
230 if (rc) {
231 dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
232 return -ENODEV;
233 }
234
235 rc = read_dvsec_vendor(dev);
236 if (rc) {
237 dev_err(&dev->dev,
238 "Invalid vendor specific DVSEC configuration: %d\n",
239 rc);
240 return -ENODEV;
241 }
242
243 rc = validate_function(dev, fn);
244 return rc;
245}
246
247static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
248 int offset, u32 *data)
249{
250 u32 val;
251 unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
252 int pos = fn->dvsec_afu_info_pos;
253
254 /* Protect 'data valid' bit */
255 if (EXTRACT_BIT(offset, 31)) {
256 dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
257 return -EINVAL;
258 }
259
260 pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
261 pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
262 while (!EXTRACT_BIT(val, 31)) {
263 if (time_after_eq(jiffies, timeout)) {
264 dev_err(&dev->dev,
265 "Timeout while reading AFU info DVSEC (offset=%d)\n",
266 offset);
267 return -EBUSY;
268 }
269 cpu_relax();
270 pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
271 }
272 pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
273 return 0;
274}
275
276int ocxl_config_check_afu_index(struct pci_dev *dev,
277 struct ocxl_fn_config *fn, int afu_idx)
278{
279 u32 val;
280 int rc, templ_major, templ_minor, len;
281
282 pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
283 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
284 if (rc)
285 return rc;
286
287 /* AFU index map can have holes */
288 if (!val)
289 return 0;
290
291 templ_major = EXTRACT_BITS(val, 8, 15);
292 templ_minor = EXTRACT_BITS(val, 0, 7);
293 dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
294 templ_major, templ_minor);
295
296 len = EXTRACT_BITS(val, 16, 31);
297 if (len != OCXL_TEMPL_LEN) {
298 dev_warn(&dev->dev,
299 "Unexpected template length in AFU information (%#x)\n",
300 len);
301 }
302 return 1;
303}
304
305static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
306 struct ocxl_afu_config *afu)
307{
308 int i, rc;
309 u32 val, *ptr;
310
311 BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
312 for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
313 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
314 if (rc)
315 return rc;
316 ptr = (u32 *) &afu->name[i];
317 *ptr = val;
318 }
319 afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
320 return 0;
321}
322
323static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
324 struct ocxl_afu_config *afu)
325{
326 int rc;
327 u32 val;
328
329 /*
330 * Global MMIO
331 */
332 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
333 if (rc)
334 return rc;
335 afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
336 afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
337
338 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
339 if (rc)
340 return rc;
341 afu->global_mmio_offset += (u64) val << 32;
342
343 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
344 if (rc)
345 return rc;
346 afu->global_mmio_size = val;
347
348 /*
349 * Per-process MMIO
350 */
351 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
352 if (rc)
353 return rc;
354 afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
355 afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
356
357 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
358 if (rc)
359 return rc;
360 afu->pp_mmio_offset += (u64) val << 32;
361
362 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
363 if (rc)
364 return rc;
365 afu->pp_mmio_stride = val;
366
367 return 0;
368}
369
370static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
371{
372 int pos;
373 u8 val8;
374 u16 val16;
375
376 pos = find_dvsec_afu_ctrl(dev, afu->idx);
377 if (!pos) {
378 dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
379 afu->idx);
380 return -ENODEV;
381 }
382 afu->dvsec_afu_control_pos = pos;
383
384 pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
385 afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
386
387 pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
388 afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
389 return 0;
390}
391
392static bool char_allowed(int c)
393{
394 /*
395 * Permitted Characters : Alphanumeric, hyphen, underscore, comma
396 */
397 if ((c >= 0x30 && c <= 0x39) /* digits */ ||
398 (c >= 0x41 && c <= 0x5A) /* upper case */ ||
399 (c >= 0x61 && c <= 0x7A) /* lower case */ ||
400 c == 0 /* NULL */ ||
401 c == 0x2D /* - */ ||
402 c == 0x5F /* _ */ ||
403 c == 0x2C /* , */)
404 return true;
405 return false;
406}
407
408static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
409{
410 int i;
411
412 if (!afu->name[0]) {
413 dev_err(&dev->dev, "Empty AFU name\n");
414 return -EINVAL;
415 }
416 for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
417 if (!char_allowed(afu->name[i])) {
418 dev_err(&dev->dev,
419 "Invalid character in AFU name\n");
420 return -EINVAL;
421 }
422 }
423
424 if (afu->global_mmio_bar != 0 &&
425 afu->global_mmio_bar != 2 &&
426 afu->global_mmio_bar != 4) {
427 dev_err(&dev->dev, "Invalid global MMIO bar number\n");
428 return -EINVAL;
429 }
430 if (afu->pp_mmio_bar != 0 &&
431 afu->pp_mmio_bar != 2 &&
432 afu->pp_mmio_bar != 4) {
433 dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
434 return -EINVAL;
435 }
436 return 0;
437}
438
439int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
440 struct ocxl_afu_config *afu, u8 afu_idx)
441{
442 int rc;
443 u32 val32;
444
445 /*
446 * First, we need to write the AFU idx for the AFU we want to
447 * access.
448 */
449 WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
450 afu->idx = afu_idx;
451 pci_write_config_byte(dev,
452 fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
453 afu->idx);
454
455 rc = read_afu_name(dev, fn, afu);
456 if (rc)
457 return rc;
458
459 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
460 if (rc)
461 return rc;
462 afu->version_major = EXTRACT_BITS(val32, 24, 31);
463 afu->version_minor = EXTRACT_BITS(val32, 16, 23);
464 afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
465 afu->afum_type = EXTRACT_BITS(val32, 12, 13);
466 afu->profile = EXTRACT_BITS(val32, 0, 7);
467
468 rc = read_afu_mmio(dev, fn, afu);
469 if (rc)
470 return rc;
471
472 rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
473 if (rc)
474 return rc;
475 afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
476
477 rc = read_afu_control(dev, afu);
478 if (rc)
479 return rc;
480
481 dev_dbg(&dev->dev, "AFU configuration:\n");
482 dev_dbg(&dev->dev, " name = %s\n", afu->name);
483 dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major,
484 afu->version_minor);
485 dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar);
486 dev_dbg(&dev->dev, " global mmio offset = %#llx\n",
487 afu->global_mmio_offset);
488 dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size);
489 dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar);
490 dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset);
491 dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride);
492 dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size);
493 dev_dbg(&dev->dev, " pasid supported (log) = %u\n",
494 afu->pasid_supported_log);
495 dev_dbg(&dev->dev, " actag supported = %u\n",
496 afu->actag_supported);
497
498 rc = validate_afu(dev, afu);
499 return rc;
500}
501
502int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
503 u16 *supported)
504{
505 int rc;
506
507 /*
508 * This is really a simple wrapper for the kernel API, to
509 * avoid an external driver using ocxl as a library to call
510 * platform-dependent code
511 */
512 rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
513 if (rc) {
514 dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
515 return rc;
516 }
517 return 0;
518}
519
520void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
521 int actag_count)
522{
523 u16 val;
524
525 val = actag_count & OCXL_DVSEC_ACTAG_MASK;
526 pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
527
528 val = actag_base & OCXL_DVSEC_ACTAG_MASK;
529 pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
530}
531
532int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
533{
534 return pnv_ocxl_get_pasid_count(dev, count);
535}
536
537void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
538 u32 pasid_count_log)
539{
540 u8 val8;
541 u32 val32;
542
543 val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
544 pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
545
546 pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
547 &val32);
548 val32 &= ~OCXL_DVSEC_PASID_MASK;
549 val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
550 pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
551 val32);
552}
553
554void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
555{
556 u8 val;
557
558 pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
559 if (enable)
560 val |= 1;
561 else
562 val &= 0xFE;
563 pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
564}
565
566int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
567{
568 u32 val;
569 __be32 *be32ptr;
570 u8 timers;
571 int i, rc;
572 long recv_cap;
573 char *recv_rate;
574
575 /*
576 * Skip on function != 0, as the TL can only be defined on 0
577 */
578 if (PCI_FUNC(dev->devfn) != 0)
579 return 0;
580
581 recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
582 if (!recv_rate)
583 return -ENOMEM;
584 /*
585 * The spec defines 64 templates for messages in the
586 * Transaction Layer (TL).
587 *
588 * The host and device each support a subset, so we need to
589 * configure the transmitters on each side to send only
590 * templates the receiver understands, at a rate the receiver
591 * can process. Per the spec, template 0 must be supported by
592 * everybody. That's the template which has been used by the
593 * host and device so far.
594 *
595 * The sending rate limit must be set before the template is
596 * enabled.
597 */
598
599 /*
600 * Device -> host
601 */
602 rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
603 PNV_OCXL_TL_RATE_BUF_SIZE);
604 if (rc)
605 goto out;
606
607 for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
608 be32ptr = (__be32 *) &recv_rate[i];
609 pci_write_config_dword(dev,
610 tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
611 be32_to_cpu(*be32ptr));
612 }
613 val = recv_cap >> 32;
614 pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
615 val = recv_cap & GENMASK(31, 0);
616 pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
617
618 /*
619 * Host -> device
620 */
621 for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
622 pci_read_config_dword(dev,
623 tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
624 &val);
625 be32ptr = (__be32 *) &recv_rate[i];
626 *be32ptr = cpu_to_be32(val);
627 }
628 pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
629 recv_cap = (long) val << 32;
630 pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
631 recv_cap |= val;
632
633 rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
634 PNV_OCXL_TL_RATE_BUF_SIZE);
635 if (rc)
636 goto out;
637
638 /*
639 * Opencapi commands needing to be retried are classified per
640 * the TL in 2 groups: short and long commands.
641 *
642 * The short back off timer it not used for now. It will be
643 * for opencapi 4.0.
644 *
645 * The long back off timer is typically used when an AFU hits
646 * a page fault but the NPU is already processing one. So the
647 * AFU needs to wait before it can resubmit. Having a value
648 * too low doesn't break anything, but can generate extra
649 * traffic on the link.
650 * We set it to 1.6 us for now. It's shorter than, but in the
651 * same order of magnitude as the time spent to process a page
652 * fault.
653 */
654 timers = 0x2 << 4; /* long timer = 1.6 us */
655 pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
656 timers);
657
658 rc = 0;
659out:
660 kfree(recv_rate);
661 return rc;
662}
663
664int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
665{
666 u32 val;
667 unsigned long timeout;
668
669 pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
670 &val);
671 if (EXTRACT_BIT(val, 20)) {
672 dev_err(&dev->dev,
673 "Can't terminate PASID %#x, previous termination didn't complete\n",
674 pasid);
675 return -EBUSY;
676 }
677
678 val &= ~OCXL_DVSEC_PASID_MASK;
679 val |= pasid & OCXL_DVSEC_PASID_MASK;
680 val |= BIT(20);
681 pci_write_config_dword(dev,
682 afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
683 val);
684
685 timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
686 pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
687 &val);
688 while (EXTRACT_BIT(val, 20)) {
689 if (time_after_eq(jiffies, timeout)) {
690 dev_err(&dev->dev,
691 "Timeout while waiting for AFU to terminate PASID %#x\n",
692 pasid);
693 return -EBUSY;
694 }
695 cpu_relax();
696 pci_read_config_dword(dev,
697 afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
698 &val);
699 }
700 return 0;
701}
702
703void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
704 u32 tag_count)
705{
706 u32 val;
707
708 val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
709 val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
710 pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
711 val);
712}
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644
index 000000000000..b34b836f924c
--- /dev/null
+++ b/drivers/misc/ocxl/context.c
@@ -0,0 +1,230 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/sched/mm.h>
4#include "ocxl_internal.h"
5
6struct ocxl_context *ocxl_context_alloc(void)
7{
8 return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
9}
10
11int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
12 struct address_space *mapping)
13{
14 int pasid;
15
16 ctx->afu = afu;
17 mutex_lock(&afu->contexts_lock);
18 pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
19 afu->pasid_base + afu->pasid_max, GFP_KERNEL);
20 if (pasid < 0) {
21 mutex_unlock(&afu->contexts_lock);
22 return pasid;
23 }
24 afu->pasid_count++;
25 mutex_unlock(&afu->contexts_lock);
26
27 ctx->pasid = pasid;
28 ctx->status = OPENED;
29 mutex_init(&ctx->status_mutex);
30 ctx->mapping = mapping;
31 mutex_init(&ctx->mapping_lock);
32 init_waitqueue_head(&ctx->events_wq);
33 mutex_init(&ctx->xsl_error_lock);
34 /*
35 * Keep a reference on the AFU to make sure it's valid for the
36 * duration of the life of the context
37 */
38 ocxl_afu_get(afu);
39 return 0;
40}
41
42/*
43 * Callback for when a translation fault triggers an error
44 * data: a pointer to the context which triggered the fault
45 * addr: the address that triggered the error
46 * dsisr: the value of the PPC64 dsisr register
47 */
48static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
49{
50 struct ocxl_context *ctx = (struct ocxl_context *) data;
51
52 mutex_lock(&ctx->xsl_error_lock);
53 ctx->xsl_error.addr = addr;
54 ctx->xsl_error.dsisr = dsisr;
55 ctx->xsl_error.count++;
56 mutex_unlock(&ctx->xsl_error_lock);
57
58 wake_up_all(&ctx->events_wq);
59}
60
61int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
62{
63 int rc;
64
65 mutex_lock(&ctx->status_mutex);
66 if (ctx->status != OPENED) {
67 rc = -EIO;
68 goto out;
69 }
70
71 rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
72 current->mm->context.id, 0, amr, current->mm,
73 xsl_fault_error, ctx);
74 if (rc)
75 goto out;
76
77 ctx->status = ATTACHED;
78out:
79 mutex_unlock(&ctx->status_mutex);
80 return rc;
81}
82
83static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
84 u64 offset, struct ocxl_context *ctx)
85{
86 u64 pp_mmio_addr;
87 int pasid_off;
88
89 if (offset >= ctx->afu->config.pp_mmio_stride)
90 return VM_FAULT_SIGBUS;
91
92 mutex_lock(&ctx->status_mutex);
93 if (ctx->status != ATTACHED) {
94 mutex_unlock(&ctx->status_mutex);
95 pr_debug("%s: Context not attached, failing mmio mmap\n",
96 __func__);
97 return VM_FAULT_SIGBUS;
98 }
99
100 pasid_off = ctx->pasid - ctx->afu->pasid_base;
101 pp_mmio_addr = ctx->afu->pp_mmio_start +
102 pasid_off * ctx->afu->config.pp_mmio_stride +
103 offset;
104
105 vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
106 mutex_unlock(&ctx->status_mutex);
107 return VM_FAULT_NOPAGE;
108}
109
110static int ocxl_mmap_fault(struct vm_fault *vmf)
111{
112 struct vm_area_struct *vma = vmf->vma;
113 struct ocxl_context *ctx = vma->vm_file->private_data;
114 u64 offset;
115 int rc;
116
117 offset = vmf->pgoff << PAGE_SHIFT;
118 pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
119 ctx->pasid, vmf->address, offset);
120
121 rc = map_pp_mmio(vma, vmf->address, offset, ctx);
122 return rc;
123}
124
125static const struct vm_operations_struct ocxl_vmops = {
126 .fault = ocxl_mmap_fault,
127};
128
129static int check_mmap_mmio(struct ocxl_context *ctx,
130 struct vm_area_struct *vma)
131{
132 if ((vma_pages(vma) + vma->vm_pgoff) >
133 (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
134 return -EINVAL;
135 return 0;
136}
137
138int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
139{
140 int rc;
141
142 rc = check_mmap_mmio(ctx, vma);
143 if (rc)
144 return rc;
145
146 vma->vm_flags |= VM_IO | VM_PFNMAP;
147 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
148 vma->vm_ops = &ocxl_vmops;
149 return 0;
150}
151
152int ocxl_context_detach(struct ocxl_context *ctx)
153{
154 struct pci_dev *dev;
155 int afu_control_pos;
156 enum ocxl_context_status status;
157 int rc;
158
159 mutex_lock(&ctx->status_mutex);
160 status = ctx->status;
161 ctx->status = CLOSED;
162 mutex_unlock(&ctx->status_mutex);
163 if (status != ATTACHED)
164 return 0;
165
166 dev = to_pci_dev(ctx->afu->fn->dev.parent);
167 afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
168
169 mutex_lock(&ctx->afu->afu_control_lock);
170 rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
171 mutex_unlock(&ctx->afu->afu_control_lock);
172 if (rc) {
173 /*
174 * If we timeout waiting for the AFU to terminate the
175 * pasid, then it's dangerous to clean up the Process
176 * Element entry in the SPA, as it may be referenced
177 * in the future by the AFU. In which case, we would
178 * checkstop because of an invalid PE access (FIR
179 * register 2, bit 42). So leave the PE
180 * defined. Caller shouldn't free the context so that
181 * PASID remains allocated.
182 *
183 * A link reset will be required to cleanup the AFU
184 * and the SPA.
185 */
186 if (rc == -EBUSY)
187 return rc;
188 }
189 rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
190 if (rc) {
191 dev_warn(&ctx->afu->dev,
192 "Couldn't remove PE entry cleanly: %d\n", rc);
193 }
194 return 0;
195}
196
197void ocxl_context_detach_all(struct ocxl_afu *afu)
198{
199 struct ocxl_context *ctx;
200 int tmp;
201
202 mutex_lock(&afu->contexts_lock);
203 idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
204 ocxl_context_detach(ctx);
205 /*
206 * We are force detaching - remove any active mmio
207 * mappings so userspace cannot interfere with the
208 * card if it comes back. Easiest way to exercise
209 * this is to unbind and rebind the driver via sysfs
210 * while it is in use.
211 */
212 mutex_lock(&ctx->mapping_lock);
213 if (ctx->mapping)
214 unmap_mapping_range(ctx->mapping, 0, 0, 1);
215 mutex_unlock(&ctx->mapping_lock);
216 }
217 mutex_unlock(&afu->contexts_lock);
218}
219
220void ocxl_context_free(struct ocxl_context *ctx)
221{
222 mutex_lock(&ctx->afu->contexts_lock);
223 ctx->afu->pasid_count--;
224 idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
225 mutex_unlock(&ctx->afu->contexts_lock);
226
227 /* reference to the AFU taken in ocxl_context_init */
228 ocxl_afu_put(ctx->afu);
229 kfree(ctx);
230}
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644
index 000000000000..6f0befda6a8a
--- /dev/null
+++ b/drivers/misc/ocxl/file.c
@@ -0,0 +1,398 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/fs.h>
4#include <linux/poll.h>
5#include <linux/sched/signal.h>
6#include <linux/uaccess.h>
7#include <uapi/misc/ocxl.h>
8#include "ocxl_internal.h"
9
10
11#define OCXL_NUM_MINORS 256 /* Total to reserve */
12
13static dev_t ocxl_dev;
14static struct class *ocxl_class;
15static struct mutex minors_idr_lock;
16static struct idr minors_idr;
17
18static struct ocxl_afu *find_and_get_afu(dev_t devno)
19{
20 struct ocxl_afu *afu;
21 int afu_minor;
22
23 afu_minor = MINOR(devno);
24 /*
25 * We don't declare an RCU critical section here, as our AFU
26 * is protected by a reference counter on the device. By the time the
27 * minor number of a device is removed from the idr, the ref count of
28 * the device is already at 0, so no user API will access that AFU and
29 * this function can't return it.
30 */
31 afu = idr_find(&minors_idr, afu_minor);
32 if (afu)
33 ocxl_afu_get(afu);
34 return afu;
35}
36
37static int allocate_afu_minor(struct ocxl_afu *afu)
38{
39 int minor;
40
41 mutex_lock(&minors_idr_lock);
42 minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
43 mutex_unlock(&minors_idr_lock);
44 return minor;
45}
46
47static void free_afu_minor(struct ocxl_afu *afu)
48{
49 mutex_lock(&minors_idr_lock);
50 idr_remove(&minors_idr, MINOR(afu->dev.devt));
51 mutex_unlock(&minors_idr_lock);
52}
53
54static int afu_open(struct inode *inode, struct file *file)
55{
56 struct ocxl_afu *afu;
57 struct ocxl_context *ctx;
58 int rc;
59
60 pr_debug("%s for device %x\n", __func__, inode->i_rdev);
61
62 afu = find_and_get_afu(inode->i_rdev);
63 if (!afu)
64 return -ENODEV;
65
66 ctx = ocxl_context_alloc();
67 if (!ctx) {
68 rc = -ENOMEM;
69 goto put_afu;
70 }
71
72 rc = ocxl_context_init(ctx, afu, inode->i_mapping);
73 if (rc)
74 goto put_afu;
75 file->private_data = ctx;
76 ocxl_afu_put(afu);
77 return 0;
78
79put_afu:
80 ocxl_afu_put(afu);
81 return rc;
82}
83
84static long afu_ioctl_attach(struct ocxl_context *ctx,
85 struct ocxl_ioctl_attach __user *uarg)
86{
87 struct ocxl_ioctl_attach arg;
88 u64 amr = 0;
89 int rc;
90
91 pr_debug("%s for context %d\n", __func__, ctx->pasid);
92
93 if (copy_from_user(&arg, uarg, sizeof(arg)))
94 return -EFAULT;
95
96 /* Make sure reserved fields are not set for forward compatibility */
97 if (arg.reserved1 || arg.reserved2 || arg.reserved3)
98 return -EINVAL;
99
100 amr = arg.amr & mfspr(SPRN_UAMOR);
101 rc = ocxl_context_attach(ctx, amr);
102 return rc;
103}
104
105#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \
106 "UNKNOWN")
107
108static long afu_ioctl(struct file *file, unsigned int cmd,
109 unsigned long args)
110{
111 struct ocxl_context *ctx = file->private_data;
112 long rc;
113
114 pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
115 CMD_STR(cmd));
116
117 if (ctx->status == CLOSED)
118 return -EIO;
119
120 switch (cmd) {
121 case OCXL_IOCTL_ATTACH:
122 rc = afu_ioctl_attach(ctx,
123 (struct ocxl_ioctl_attach __user *) args);
124 break;
125
126 default:
127 rc = -EINVAL;
128 }
129 return rc;
130}
131
132static long afu_compat_ioctl(struct file *file, unsigned int cmd,
133 unsigned long args)
134{
135 return afu_ioctl(file, cmd, args);
136}
137
138static int afu_mmap(struct file *file, struct vm_area_struct *vma)
139{
140 struct ocxl_context *ctx = file->private_data;
141
142 pr_debug("%s for context %d\n", __func__, ctx->pasid);
143 return ocxl_context_mmap(ctx, vma);
144}
145
146static bool has_xsl_error(struct ocxl_context *ctx)
147{
148 bool ret;
149
150 mutex_lock(&ctx->xsl_error_lock);
151 ret = !!ctx->xsl_error.addr;
152 mutex_unlock(&ctx->xsl_error_lock);
153
154 return ret;
155}
156
157/*
158 * Are there any events pending on the AFU
159 * ctx: The AFU context
160 * Returns: true if there are events pending
161 */
162static bool afu_events_pending(struct ocxl_context *ctx)
163{
164 if (has_xsl_error(ctx))
165 return true;
166 return false;
167}
168
169static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
170{
171 struct ocxl_context *ctx = file->private_data;
172 unsigned int mask = 0;
173 bool closed;
174
175 pr_debug("%s for context %d\n", __func__, ctx->pasid);
176
177 poll_wait(file, &ctx->events_wq, wait);
178
179 mutex_lock(&ctx->status_mutex);
180 closed = (ctx->status == CLOSED);
181 mutex_unlock(&ctx->status_mutex);
182
183 if (afu_events_pending(ctx))
184 mask = POLLIN | POLLRDNORM;
185 else if (closed)
186 mask = POLLERR;
187
188 return mask;
189}
190
191/*
192 * Populate the supplied buffer with a single XSL error
193 * ctx: The AFU context to report the error from
194 * header: the event header to populate
195 * buf: The buffer to write the body into (should be at least
196 * AFU_EVENT_BODY_XSL_ERROR_SIZE)
197 * Return: the amount of buffer that was populated
198 */
199static ssize_t append_xsl_error(struct ocxl_context *ctx,
200 struct ocxl_kernel_event_header *header,
201 char __user *buf)
202{
203 struct ocxl_kernel_event_xsl_fault_error body;
204
205 memset(&body, 0, sizeof(body));
206
207 mutex_lock(&ctx->xsl_error_lock);
208 if (!ctx->xsl_error.addr) {
209 mutex_unlock(&ctx->xsl_error_lock);
210 return 0;
211 }
212
213 body.addr = ctx->xsl_error.addr;
214 body.dsisr = ctx->xsl_error.dsisr;
215 body.count = ctx->xsl_error.count;
216
217 ctx->xsl_error.addr = 0;
218 ctx->xsl_error.dsisr = 0;
219 ctx->xsl_error.count = 0;
220
221 mutex_unlock(&ctx->xsl_error_lock);
222
223 header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
224
225 if (copy_to_user(buf, &body, sizeof(body)))
226 return -EFAULT;
227
228 return sizeof(body);
229}
230
231#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
232
233/*
234 * Reports events on the AFU
235 * Format:
236 * Header (struct ocxl_kernel_event_header)
237 * Body (struct ocxl_kernel_event_*)
238 * Header...
239 */
240static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
241 loff_t *off)
242{
243 struct ocxl_context *ctx = file->private_data;
244 struct ocxl_kernel_event_header header;
245 ssize_t rc;
246 size_t used = 0;
247 DEFINE_WAIT(event_wait);
248
249 memset(&header, 0, sizeof(header));
250
251 /* Require offset to be 0 */
252 if (*off != 0)
253 return -EINVAL;
254
255 if (count < (sizeof(struct ocxl_kernel_event_header) +
256 AFU_EVENT_BODY_MAX_SIZE))
257 return -EINVAL;
258
259 for (;;) {
260 prepare_to_wait(&ctx->events_wq, &event_wait,
261 TASK_INTERRUPTIBLE);
262
263 if (afu_events_pending(ctx))
264 break;
265
266 if (ctx->status == CLOSED)
267 break;
268
269 if (file->f_flags & O_NONBLOCK) {
270 finish_wait(&ctx->events_wq, &event_wait);
271 return -EAGAIN;
272 }
273
274 if (signal_pending(current)) {
275 finish_wait(&ctx->events_wq, &event_wait);
276 return -ERESTARTSYS;
277 }
278
279 schedule();
280 }
281
282 finish_wait(&ctx->events_wq, &event_wait);
283
284 if (has_xsl_error(ctx)) {
285 used = append_xsl_error(ctx, &header, buf + sizeof(header));
286 if (used < 0)
287 return used;
288 }
289
290 if (!afu_events_pending(ctx))
291 header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
292
293 if (copy_to_user(buf, &header, sizeof(header)))
294 return -EFAULT;
295
296 used += sizeof(header);
297
298 rc = (ssize_t) used;
299 return rc;
300}
301
302static int afu_release(struct inode *inode, struct file *file)
303{
304 struct ocxl_context *ctx = file->private_data;
305 int rc;
306
307 pr_debug("%s for device %x\n", __func__, inode->i_rdev);
308 rc = ocxl_context_detach(ctx);
309 mutex_lock(&ctx->mapping_lock);
310 ctx->mapping = NULL;
311 mutex_unlock(&ctx->mapping_lock);
312 wake_up_all(&ctx->events_wq);
313 if (rc != -EBUSY)
314 ocxl_context_free(ctx);
315 return 0;
316}
317
318static const struct file_operations ocxl_afu_fops = {
319 .owner = THIS_MODULE,
320 .open = afu_open,
321 .unlocked_ioctl = afu_ioctl,
322 .compat_ioctl = afu_compat_ioctl,
323 .mmap = afu_mmap,
324 .poll = afu_poll,
325 .read = afu_read,
326 .release = afu_release,
327};
328
329int ocxl_create_cdev(struct ocxl_afu *afu)
330{
331 int rc;
332
333 cdev_init(&afu->cdev, &ocxl_afu_fops);
334 rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
335 if (rc) {
336 dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
337 return rc;
338 }
339 return 0;
340}
341
342void ocxl_destroy_cdev(struct ocxl_afu *afu)
343{
344 cdev_del(&afu->cdev);
345}
346
347int ocxl_register_afu(struct ocxl_afu *afu)
348{
349 int minor;
350
351 minor = allocate_afu_minor(afu);
352 if (minor < 0)
353 return minor;
354 afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
355 afu->dev.class = ocxl_class;
356 return device_register(&afu->dev);
357}
358
359void ocxl_unregister_afu(struct ocxl_afu *afu)
360{
361 free_afu_minor(afu);
362}
363
364static char *ocxl_devnode(struct device *dev, umode_t *mode)
365{
366 return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
367}
368
369int ocxl_file_init(void)
370{
371 int rc;
372
373 mutex_init(&minors_idr_lock);
374 idr_init(&minors_idr);
375
376 rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
377 if (rc) {
378 pr_err("Unable to allocate ocxl major number: %d\n", rc);
379 return rc;
380 }
381
382 ocxl_class = class_create(THIS_MODULE, "ocxl");
383 if (IS_ERR(ocxl_class)) {
384 pr_err("Unable to create ocxl class\n");
385 unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
386 return PTR_ERR(ocxl_class);
387 }
388
389 ocxl_class->devnode = ocxl_devnode;
390 return 0;
391}
392
393void ocxl_file_exit(void)
394{
395 class_destroy(ocxl_class);
396 unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
397 idr_destroy(&minors_idr);
398}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
new file mode 100644
index 000000000000..64d7a98c904a
--- /dev/null
+++ b/drivers/misc/ocxl/link.c
@@ -0,0 +1,603 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/sched/mm.h>
4#include <linux/mutex.h>
5#include <linux/mmu_context.h>
6#include <asm/copro.h>
7#include <asm/pnv-ocxl.h>
8#include "ocxl_internal.h"
9
10
11#define SPA_PASID_BITS 15
12#define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1)
13#define SPA_PE_MASK SPA_PASID_MAX
14#define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */
15
16#define SPA_CFG_SF (1ull << (63-0))
17#define SPA_CFG_TA (1ull << (63-1))
18#define SPA_CFG_HV (1ull << (63-3))
19#define SPA_CFG_UV (1ull << (63-4))
20#define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */
21#define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */
22#define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */
23#define SPA_CFG_PR (1ull << (63-49))
24#define SPA_CFG_TC (1ull << (63-54))
25#define SPA_CFG_DR (1ull << (63-59))
26
27#define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */
28#define SPA_XSL_S (1ull << (63-38)) /* Store operation */
29
30#define SPA_PE_VALID 0x80000000
31
32
33struct pe_data {
34 struct mm_struct *mm;
35 /* callback to trigger when a translation fault occurs */
36 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
37 /* opaque pointer to be passed to the above callback */
38 void *xsl_err_data;
39 struct rcu_head rcu;
40};
41
42struct spa {
43 struct ocxl_process_element *spa_mem;
44 int spa_order;
45 struct mutex spa_lock;
46 struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
47 char *irq_name;
48 int virq;
49 void __iomem *reg_dsisr;
50 void __iomem *reg_dar;
51 void __iomem *reg_tfc;
52 void __iomem *reg_pe_handle;
53 /*
54 * The following field are used by the memory fault
55 * interrupt handler. We can only have one interrupt at a
56 * time. The NPU won't raise another interrupt until the
57 * previous one has been ack'd by writing to the TFC register
58 */
59 struct xsl_fault {
60 struct work_struct fault_work;
61 u64 pe;
62 u64 dsisr;
63 u64 dar;
64 struct pe_data pe_data;
65 } xsl_fault;
66};
67
68/*
69 * A opencapi link can be used be by several PCI functions. We have
70 * one link per device slot.
71 *
72 * A linked list of opencapi links should suffice, as there's a
73 * limited number of opencapi slots on a system and lookup is only
74 * done when the device is probed
75 */
76struct link {
77 struct list_head list;
78 struct kref ref;
79 int domain;
80 int bus;
81 int dev;
82 atomic_t irq_available;
83 struct spa *spa;
84 void *platform_data;
85};
86static struct list_head links_list = LIST_HEAD_INIT(links_list);
87static DEFINE_MUTEX(links_list_lock);
88
89enum xsl_response {
90 CONTINUE,
91 ADDRESS_ERROR,
92 RESTART,
93};
94
95
96static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
97{
98 u64 reg;
99
100 *dsisr = in_be64(spa->reg_dsisr);
101 *dar = in_be64(spa->reg_dar);
102 reg = in_be64(spa->reg_pe_handle);
103 *pe = reg & SPA_PE_MASK;
104}
105
106static void ack_irq(struct spa *spa, enum xsl_response r)
107{
108 u64 reg = 0;
109
110 /* continue is not supported */
111 if (r == RESTART)
112 reg = PPC_BIT(31);
113 else if (r == ADDRESS_ERROR)
114 reg = PPC_BIT(30);
115 else
116 WARN(1, "Invalid irq response %d\n", r);
117
118 if (reg)
119 out_be64(spa->reg_tfc, reg);
120}
121
122static void xsl_fault_handler_bh(struct work_struct *fault_work)
123{
124 unsigned int flt = 0;
125 unsigned long access, flags, inv_flags = 0;
126 enum xsl_response r;
127 struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
128 fault_work);
129 struct spa *spa = container_of(fault, struct spa, xsl_fault);
130
131 int rc;
132
133 /*
134 * We need to release a reference on the mm whenever exiting this
135 * function (taken in the memory fault interrupt handler)
136 */
137 rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
138 &flt);
139 if (rc) {
140 pr_debug("copro_handle_mm_fault failed: %d\n", rc);
141 if (fault->pe_data.xsl_err_cb) {
142 fault->pe_data.xsl_err_cb(
143 fault->pe_data.xsl_err_data,
144 fault->dar, fault->dsisr);
145 }
146 r = ADDRESS_ERROR;
147 goto ack;
148 }
149
150 if (!radix_enabled()) {
151 /*
152 * update_mmu_cache() will not have loaded the hash
153 * since current->trap is not a 0x400 or 0x300, so
154 * just call hash_page_mm() here.
155 */
156 access = _PAGE_PRESENT | _PAGE_READ;
157 if (fault->dsisr & SPA_XSL_S)
158 access |= _PAGE_WRITE;
159
160 if (REGION_ID(fault->dar) != USER_REGION_ID)
161 access |= _PAGE_PRIVILEGED;
162
163 local_irq_save(flags);
164 hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
165 inv_flags);
166 local_irq_restore(flags);
167 }
168 r = RESTART;
169ack:
170 mmdrop(fault->pe_data.mm);
171 ack_irq(spa, r);
172}
173
174static irqreturn_t xsl_fault_handler(int irq, void *data)
175{
176 struct link *link = (struct link *) data;
177 struct spa *spa = link->spa;
178 u64 dsisr, dar, pe_handle;
179 struct pe_data *pe_data;
180 struct ocxl_process_element *pe;
181 int lpid, pid, tid;
182
183 read_irq(spa, &dsisr, &dar, &pe_handle);
184
185 WARN_ON(pe_handle > SPA_PE_MASK);
186 pe = spa->spa_mem + pe_handle;
187 lpid = be32_to_cpu(pe->lpid);
188 pid = be32_to_cpu(pe->pid);
189 tid = be32_to_cpu(pe->tid);
190 /* We could be reading all null values here if the PE is being
191 * removed while an interrupt kicks in. It's not supposed to
192 * happen if the driver notified the AFU to terminate the
193 * PASID, and the AFU waited for pending operations before
194 * acknowledging. But even if it happens, we won't find a
195 * memory context below and fail silently, so it should be ok.
196 */
197 if (!(dsisr & SPA_XSL_TF)) {
198 WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
199 ack_irq(spa, ADDRESS_ERROR);
200 return IRQ_HANDLED;
201 }
202
203 rcu_read_lock();
204 pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
205 if (!pe_data) {
206 /*
207 * Could only happen if the driver didn't notify the
208 * AFU about PASID termination before removing the PE,
209 * or the AFU didn't wait for all memory access to
210 * have completed.
211 *
212 * Either way, we fail early, but we shouldn't log an
213 * error message, as it is a valid (if unexpected)
214 * scenario
215 */
216 rcu_read_unlock();
217 pr_debug("Unknown mm context for xsl interrupt\n");
218 ack_irq(spa, ADDRESS_ERROR);
219 return IRQ_HANDLED;
220 }
221 WARN_ON(pe_data->mm->context.id != pid);
222
223 spa->xsl_fault.pe = pe_handle;
224 spa->xsl_fault.dar = dar;
225 spa->xsl_fault.dsisr = dsisr;
226 spa->xsl_fault.pe_data = *pe_data;
227 mmgrab(pe_data->mm); /* mm count is released by bottom half */
228
229 rcu_read_unlock();
230 schedule_work(&spa->xsl_fault.fault_work);
231 return IRQ_HANDLED;
232}
233
234static void unmap_irq_registers(struct spa *spa)
235{
236 pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
237 spa->reg_pe_handle);
238}
239
240static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
241{
242 return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
243 &spa->reg_tfc, &spa->reg_pe_handle);
244}
245
246static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
247{
248 struct spa *spa = link->spa;
249 int rc;
250 int hwirq;
251
252 rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
253 if (rc)
254 return rc;
255
256 rc = map_irq_registers(dev, spa);
257 if (rc)
258 return rc;
259
260 spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
261 link->domain, link->bus, link->dev);
262 if (!spa->irq_name) {
263 unmap_irq_registers(spa);
264 dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
265 return -ENOMEM;
266 }
267 /*
268 * At some point, we'll need to look into allowing a higher
269 * number of interrupts. Could we have an IRQ domain per link?
270 */
271 spa->virq = irq_create_mapping(NULL, hwirq);
272 if (!spa->virq) {
273 kfree(spa->irq_name);
274 unmap_irq_registers(spa);
275 dev_err(&dev->dev,
276 "irq_create_mapping failed for translation interrupt\n");
277 return -EINVAL;
278 }
279
280 dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
281
282 rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
283 link);
284 if (rc) {
285 irq_dispose_mapping(spa->virq);
286 kfree(spa->irq_name);
287 unmap_irq_registers(spa);
288 dev_err(&dev->dev,
289 "request_irq failed for translation interrupt: %d\n",
290 rc);
291 return -EINVAL;
292 }
293 return 0;
294}
295
296static void release_xsl_irq(struct link *link)
297{
298 struct spa *spa = link->spa;
299
300 if (spa->virq) {
301 free_irq(spa->virq, link);
302 irq_dispose_mapping(spa->virq);
303 }
304 kfree(spa->irq_name);
305 unmap_irq_registers(spa);
306}
307
308static int alloc_spa(struct pci_dev *dev, struct link *link)
309{
310 struct spa *spa;
311
312 spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
313 if (!spa)
314 return -ENOMEM;
315
316 mutex_init(&spa->spa_lock);
317 INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
318 INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
319
320 spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
321 spa->spa_mem = (struct ocxl_process_element *)
322 __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
323 if (!spa->spa_mem) {
324 dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
325 kfree(spa);
326 return -ENOMEM;
327 }
328 pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
329 link->dev, spa->spa_mem);
330
331 link->spa = spa;
332 return 0;
333}
334
335static void free_spa(struct link *link)
336{
337 struct spa *spa = link->spa;
338
339 pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
340 link->dev);
341
342 if (spa && spa->spa_mem) {
343 free_pages((unsigned long) spa->spa_mem, spa->spa_order);
344 kfree(spa);
345 link->spa = NULL;
346 }
347}
348
349static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
350{
351 struct link *link;
352 int rc;
353
354 link = kzalloc(sizeof(struct link), GFP_KERNEL);
355 if (!link)
356 return -ENOMEM;
357
358 kref_init(&link->ref);
359 link->domain = pci_domain_nr(dev->bus);
360 link->bus = dev->bus->number;
361 link->dev = PCI_SLOT(dev->devfn);
362 atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
363
364 rc = alloc_spa(dev, link);
365 if (rc)
366 goto err_free;
367
368 rc = setup_xsl_irq(dev, link);
369 if (rc)
370 goto err_spa;
371
372 /* platform specific hook */
373 rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
374 &link->platform_data);
375 if (rc)
376 goto err_xsl_irq;
377
378 *out_link = link;
379 return 0;
380
381err_xsl_irq:
382 release_xsl_irq(link);
383err_spa:
384 free_spa(link);
385err_free:
386 kfree(link);
387 return rc;
388}
389
390static void free_link(struct link *link)
391{
392 release_xsl_irq(link);
393 free_spa(link);
394 kfree(link);
395}
396
397int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
398{
399 int rc = 0;
400 struct link *link;
401
402 mutex_lock(&links_list_lock);
403 list_for_each_entry(link, &links_list, list) {
404 /* The functions of a device all share the same link */
405 if (link->domain == pci_domain_nr(dev->bus) &&
406 link->bus == dev->bus->number &&
407 link->dev == PCI_SLOT(dev->devfn)) {
408 kref_get(&link->ref);
409 *link_handle = link;
410 goto unlock;
411 }
412 }
413 rc = alloc_link(dev, PE_mask, &link);
414 if (rc)
415 goto unlock;
416
417 list_add(&link->list, &links_list);
418 *link_handle = link;
419unlock:
420 mutex_unlock(&links_list_lock);
421 return rc;
422}
423
424static void release_xsl(struct kref *ref)
425{
426 struct link *link = container_of(ref, struct link, ref);
427
428 list_del(&link->list);
429 /* call platform code before releasing data */
430 pnv_ocxl_spa_release(link->platform_data);
431 free_link(link);
432}
433
434void ocxl_link_release(struct pci_dev *dev, void *link_handle)
435{
436 struct link *link = (struct link *) link_handle;
437
438 mutex_lock(&links_list_lock);
439 kref_put(&link->ref, release_xsl);
440 mutex_unlock(&links_list_lock);
441}
442
443static u64 calculate_cfg_state(bool kernel)
444{
445 u64 state;
446
447 state = SPA_CFG_DR;
448 if (mfspr(SPRN_LPCR) & LPCR_TC)
449 state |= SPA_CFG_TC;
450 if (radix_enabled())
451 state |= SPA_CFG_XLAT_ror;
452 else
453 state |= SPA_CFG_XLAT_hpt;
454 state |= SPA_CFG_HV;
455 if (kernel) {
456 if (mfmsr() & MSR_SF)
457 state |= SPA_CFG_SF;
458 } else {
459 state |= SPA_CFG_PR;
460 if (!test_tsk_thread_flag(current, TIF_32BIT))
461 state |= SPA_CFG_SF;
462 }
463 return state;
464}
465
466int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
467 u64 amr, struct mm_struct *mm,
468 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
469 void *xsl_err_data)
470{
471 struct link *link = (struct link *) link_handle;
472 struct spa *spa = link->spa;
473 struct ocxl_process_element *pe;
474 int pe_handle, rc = 0;
475 struct pe_data *pe_data;
476
477 BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
478 if (pasid > SPA_PASID_MAX)
479 return -EINVAL;
480
481 mutex_lock(&spa->spa_lock);
482 pe_handle = pasid & SPA_PE_MASK;
483 pe = spa->spa_mem + pe_handle;
484
485 if (pe->software_state) {
486 rc = -EBUSY;
487 goto unlock;
488 }
489
490 pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
491 if (!pe_data) {
492 rc = -ENOMEM;
493 goto unlock;
494 }
495
496 pe_data->mm = mm;
497 pe_data->xsl_err_cb = xsl_err_cb;
498 pe_data->xsl_err_data = xsl_err_data;
499
500 memset(pe, 0, sizeof(struct ocxl_process_element));
501 pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
502 pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
503 pe->pid = cpu_to_be32(pidr);
504 pe->tid = cpu_to_be32(tidr);
505 pe->amr = cpu_to_be64(amr);
506 pe->software_state = cpu_to_be32(SPA_PE_VALID);
507
508 mm_context_add_copro(mm);
509 /*
510 * Barrier is to make sure PE is visible in the SPA before it
511 * is used by the device. It also helps with the global TLBI
512 * invalidation
513 */
514 mb();
515 radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
516
517 /*
518 * The mm must stay valid for as long as the device uses it. We
519 * lower the count when the context is removed from the SPA.
520 *
521 * We grab mm_count (and not mm_users), as we don't want to
522 * end up in a circular dependency if a process mmaps its
523 * mmio, therefore incrementing the file ref count when
524 * calling mmap(), and forgets to unmap before exiting. In
525 * that scenario, when the kernel handles the death of the
526 * process, the file is not cleaned because unmap was not
527 * called, and the mm wouldn't be freed because we would still
528 * have a reference on mm_users. Incrementing mm_count solves
529 * the problem.
530 */
531 mmgrab(mm);
532unlock:
533 mutex_unlock(&spa->spa_lock);
534 return rc;
535}
536
537int ocxl_link_remove_pe(void *link_handle, int pasid)
538{
539 struct link *link = (struct link *) link_handle;
540 struct spa *spa = link->spa;
541 struct ocxl_process_element *pe;
542 struct pe_data *pe_data;
543 int pe_handle, rc;
544
545 if (pasid > SPA_PASID_MAX)
546 return -EINVAL;
547
548 /*
549 * About synchronization with our memory fault handler:
550 *
551 * Before removing the PE, the driver is supposed to have
552 * notified the AFU, which should have cleaned up and make
553 * sure the PASID is no longer in use, including pending
554 * interrupts. However, there's no way to be sure...
555 *
556 * We clear the PE and remove the context from our radix
557 * tree. From that point on, any new interrupt for that
558 * context will fail silently, which is ok. As mentioned
559 * above, that's not expected, but it could happen if the
560 * driver or AFU didn't do the right thing.
561 *
562 * There could still be a bottom half running, but we don't
563 * need to wait/flush, as it is managing a reference count on
564 * the mm it reads from the radix tree.
565 */
566 pe_handle = pasid & SPA_PE_MASK;
567 pe = spa->spa_mem + pe_handle;
568
569 mutex_lock(&spa->spa_lock);
570
571 if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
572 rc = -EINVAL;
573 goto unlock;
574 }
575
576 memset(pe, 0, sizeof(struct ocxl_process_element));
577 /*
578 * The barrier makes sure the PE is removed from the SPA
579 * before we clear the NPU context cache below, so that the
580 * old PE cannot be reloaded erroneously.
581 */
582 mb();
583
584 /*
585 * hook to platform code
586 * On powerpc, the entry needs to be cleared from the context
587 * cache of the NPU.
588 */
589 rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
590 WARN_ON(rc);
591
592 pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
593 if (!pe_data) {
594 WARN(1, "Couldn't find pe data when removing PE\n");
595 } else {
596 mm_context_remove_copro(pe_data->mm);
597 mmdrop(pe_data->mm);
598 kfree_rcu(pe_data, rcu);
599 }
600unlock:
601 mutex_unlock(&spa->spa_lock);
602 return rc;
603}
diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
new file mode 100644
index 000000000000..7210d9e059be
--- /dev/null
+++ b/drivers/misc/ocxl/main.c
@@ -0,0 +1,33 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/module.h>
4#include <linux/pci.h>
5#include "ocxl_internal.h"
6
7static int __init init_ocxl(void)
8{
9 int rc = 0;
10
11 rc = ocxl_file_init();
12 if (rc)
13 return rc;
14
15 rc = pci_register_driver(&ocxl_pci_driver);
16 if (rc) {
17 ocxl_file_exit();
18 return rc;
19 }
20 return 0;
21}
22
23static void exit_ocxl(void)
24{
25 pci_unregister_driver(&ocxl_pci_driver);
26 ocxl_file_exit();
27}
28
29module_init(init_ocxl);
30module_exit(exit_ocxl);
31
32MODULE_DESCRIPTION("Open Coherent Accelerator");
33MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
new file mode 100644
index 000000000000..04fc160c7bd5
--- /dev/null
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -0,0 +1,193 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#ifndef _OCXL_INTERNAL_H_
4#define _OCXL_INTERNAL_H_
5
6#include <linux/pci.h>
7#include <linux/cdev.h>
8#include <linux/list.h>
9
10#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */
11#define MAX_IRQ_PER_LINK 2000
12#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK
13
14#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
15#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
16
17extern struct pci_driver ocxl_pci_driver;
18
19/*
20 * The following 2 structures are a fairly generic way of representing
21 * the configuration data for a function and AFU, as read from the
22 * configuration space.
23 */
24struct ocxl_afu_config {
25 u8 idx;
26 int dvsec_afu_control_pos;
27 char name[OCXL_AFU_NAME_SZ];
28 u8 version_major;
29 u8 version_minor;
30 u8 afuc_type;
31 u8 afum_type;
32 u8 profile;
33 u8 global_mmio_bar;
34 u64 global_mmio_offset;
35 u32 global_mmio_size;
36 u8 pp_mmio_bar;
37 u64 pp_mmio_offset;
38 u32 pp_mmio_stride;
39 u8 log_mem_size;
40 u8 pasid_supported_log;
41 u16 actag_supported;
42};
43
44struct ocxl_fn_config {
45 int dvsec_tl_pos;
46 int dvsec_function_pos;
47 int dvsec_afu_info_pos;
48 s8 max_pasid_log;
49 s8 max_afu_index;
50};
51
52struct ocxl_fn {
53 struct device dev;
54 int bar_used[3];
55 struct ocxl_fn_config config;
56 struct list_head afu_list;
57 int pasid_base;
58 int actag_base;
59 int actag_enabled;
60 int actag_supported;
61 struct list_head pasid_list;
62 struct list_head actag_list;
63 void *link;
64};
65
66struct ocxl_afu {
67 struct ocxl_fn *fn;
68 struct list_head list;
69 struct device dev;
70 struct cdev cdev;
71 struct ocxl_afu_config config;
72 int pasid_base;
73 int pasid_count; /* opened contexts */
74 int pasid_max; /* maximum number of contexts */
75 int actag_base;
76 int actag_enabled;
77 struct mutex contexts_lock;
78 struct idr contexts_idr;
79 struct mutex afu_control_lock;
80 u64 global_mmio_start;
81 u64 irq_base_offset;
82 void __iomem *global_mmio_ptr;
83 u64 pp_mmio_start;
84 struct bin_attribute attr_global_mmio;
85};
86
87enum ocxl_context_status {
88 CLOSED,
89 OPENED,
90 ATTACHED,
91};
92
93// Contains metadata about a translation fault
94struct ocxl_xsl_error {
95 u64 addr; // The address that triggered the fault
96 u64 dsisr; // the value of the dsisr register
97 u64 count; // The number of times this fault has been triggered
98};
99
100struct ocxl_context {
101 struct ocxl_afu *afu;
102 int pasid;
103 struct mutex status_mutex;
104 enum ocxl_context_status status;
105 struct address_space *mapping;
106 struct mutex mapping_lock;
107 wait_queue_head_t events_wq;
108 struct mutex xsl_error_lock;
109 struct ocxl_xsl_error xsl_error;
110 struct mutex irq_lock;
111 struct idr irq_idr;
112};
113
114struct ocxl_process_element {
115 __be64 config_state;
116 __be32 reserved1[11];
117 __be32 lpid;
118 __be32 tid;
119 __be32 pid;
120 __be32 reserved2[10];
121 __be64 amr;
122 __be32 reserved3[3];
123 __be32 software_state;
124};
125
126
127extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
128extern void ocxl_afu_put(struct ocxl_afu *afu);
129
130extern int ocxl_create_cdev(struct ocxl_afu *afu);
131extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
132extern int ocxl_register_afu(struct ocxl_afu *afu);
133extern void ocxl_unregister_afu(struct ocxl_afu *afu);
134
135extern int ocxl_file_init(void);
136extern void ocxl_file_exit(void);
137
138extern int ocxl_config_read_function(struct pci_dev *dev,
139 struct ocxl_fn_config *fn);
140
141extern int ocxl_config_check_afu_index(struct pci_dev *dev,
142 struct ocxl_fn_config *fn, int afu_idx);
143extern int ocxl_config_read_afu(struct pci_dev *dev,
144 struct ocxl_fn_config *fn,
145 struct ocxl_afu_config *afu,
146 u8 afu_idx);
147extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
148extern void ocxl_config_set_afu_pasid(struct pci_dev *dev,
149 int afu_control,
150 int pasid_base, u32 pasid_count_log);
151extern int ocxl_config_get_actag_info(struct pci_dev *dev,
152 u16 *base, u16 *enabled, u16 *supported);
153extern void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec,
154 u32 tag_first, u32 tag_count);
155extern void ocxl_config_set_afu_actag(struct pci_dev *dev, int afu_control,
156 int actag_base, int actag_count);
157extern void ocxl_config_set_afu_state(struct pci_dev *dev, int afu_control,
158 int enable);
159extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
160extern int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control,
161 int pasid);
162
163extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask,
164 void **link_handle);
165extern void ocxl_link_release(struct pci_dev *dev, void *link_handle);
166extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
167 u64 amr, struct mm_struct *mm,
168 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
169 void *xsl_err_data);
170extern int ocxl_link_remove_pe(void *link_handle, int pasid);
171extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq,
172 u64 *addr);
173extern void ocxl_link_free_irq(void *link_handle, int hw_irq);
174
175extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
176extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
177extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
178extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
179
180extern struct ocxl_context *ocxl_context_alloc(void);
181extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
182 struct address_space *mapping);
183extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
184extern int ocxl_context_mmap(struct ocxl_context *ctx,
185 struct vm_area_struct *vma);
186extern int ocxl_context_detach(struct ocxl_context *ctx);
187extern void ocxl_context_detach_all(struct ocxl_afu *afu);
188extern void ocxl_context_free(struct ocxl_context *ctx);
189
190extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
191extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
192
193#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
new file mode 100644
index 000000000000..d14cb56e6920
--- /dev/null
+++ b/drivers/misc/ocxl/pasid.c
@@ -0,0 +1,107 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include "ocxl_internal.h"
4
5
6struct id_range {
7 struct list_head list;
8 u32 start;
9 u32 end;
10};
11
12#ifdef DEBUG
13static void dump_list(struct list_head *head, char *type_str)
14{
15 struct id_range *cur;
16
17 pr_debug("%s ranges allocated:\n", type_str);
18 list_for_each_entry(cur, head, list) {
19 pr_debug("Range %d->%d\n", cur->start, cur->end);
20 }
21}
22#endif
23
24static int range_alloc(struct list_head *head, u32 size, int max_id,
25 char *type_str)
26{
27 struct list_head *pos;
28 struct id_range *cur, *new;
29 int rc, last_end;
30
31 new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
32 if (!new)
33 return -ENOMEM;
34
35 pos = head;
36 last_end = -1;
37 list_for_each_entry(cur, head, list) {
38 if ((cur->start - last_end) > size)
39 break;
40 last_end = cur->end;
41 pos = &cur->list;
42 }
43
44 new->start = last_end + 1;
45 new->end = new->start + size - 1;
46
47 if (new->end > max_id) {
48 kfree(new);
49 rc = -ENOSPC;
50 } else {
51 list_add(&new->list, pos);
52 rc = new->start;
53 }
54
55#ifdef DEBUG
56 dump_list(head, type_str);
57#endif
58 return rc;
59}
60
61static void range_free(struct list_head *head, u32 start, u32 size,
62 char *type_str)
63{
64 bool found = false;
65 struct id_range *cur, *tmp;
66
67 list_for_each_entry_safe(cur, tmp, head, list) {
68 if (cur->start == start && cur->end == (start + size - 1)) {
69 found = true;
70 list_del(&cur->list);
71 kfree(cur);
72 break;
73 }
74 }
75 WARN_ON(!found);
76#ifdef DEBUG
77 dump_list(head, type_str);
78#endif
79}
80
81int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
82{
83 int max_pasid;
84
85 if (fn->config.max_pasid_log < 0)
86 return -ENOSPC;
87 max_pasid = 1 << fn->config.max_pasid_log;
88 return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
89}
90
91void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
92{
93 return range_free(&fn->pasid_list, start, size, "afu pasid");
94}
95
96int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
97{
98 int max_actag;
99
100 max_actag = fn->actag_enabled;
101 return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
102}
103
104void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
105{
106 return range_free(&fn->actag_list, start, size, "afu actag");
107}
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
new file mode 100644
index 000000000000..0051d9ec76cc
--- /dev/null
+++ b/drivers/misc/ocxl/pci.c
@@ -0,0 +1,585 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/module.h>
4#include <linux/pci.h>
5#include <linux/idr.h>
6#include <asm/pnv-ocxl.h>
7#include "ocxl_internal.h"
8
9/*
10 * Any opencapi device which wants to use this 'generic' driver should
11 * use the 0x062B device ID. Vendors should define the subsystem
12 * vendor/device ID to help differentiate devices.
13 */
14static const struct pci_device_id ocxl_pci_tbl[] = {
15 { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
16 { }
17};
18MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
19
20
21static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
22{
23 return (get_device(&fn->dev) == NULL) ? NULL : fn;
24}
25
26static void ocxl_fn_put(struct ocxl_fn *fn)
27{
28 put_device(&fn->dev);
29}
30
31struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
32{
33 return (get_device(&afu->dev) == NULL) ? NULL : afu;
34}
35
36void ocxl_afu_put(struct ocxl_afu *afu)
37{
38 put_device(&afu->dev);
39}
40
41static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
42{
43 struct ocxl_afu *afu;
44
45 afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
46 if (!afu)
47 return NULL;
48
49 mutex_init(&afu->contexts_lock);
50 mutex_init(&afu->afu_control_lock);
51 idr_init(&afu->contexts_idr);
52 afu->fn = fn;
53 ocxl_fn_get(fn);
54 return afu;
55}
56
57static void free_afu(struct ocxl_afu *afu)
58{
59 idr_destroy(&afu->contexts_idr);
60 ocxl_fn_put(afu->fn);
61 kfree(afu);
62}
63
64static void free_afu_dev(struct device *dev)
65{
66 struct ocxl_afu *afu = to_ocxl_afu(dev);
67
68 ocxl_unregister_afu(afu);
69 free_afu(afu);
70}
71
72static int set_afu_device(struct ocxl_afu *afu, const char *location)
73{
74 struct ocxl_fn *fn = afu->fn;
75 int rc;
76
77 afu->dev.parent = &fn->dev;
78 afu->dev.release = free_afu_dev;
79 rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
80 afu->config.idx);
81 return rc;
82}
83
84static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
85{
86 struct ocxl_fn *fn = afu->fn;
87 int actag_count, actag_offset;
88
89 /*
90 * if there were not enough actags for the function, each afu
91 * reduces its count as well
92 */
93 actag_count = afu->config.actag_supported *
94 fn->actag_enabled / fn->actag_supported;
95 actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
96 if (actag_offset < 0) {
97 dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
98 actag_count, actag_offset);
99 return actag_offset;
100 }
101 afu->actag_base = fn->actag_base + actag_offset;
102 afu->actag_enabled = actag_count;
103
104 ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
105 afu->actag_base, afu->actag_enabled);
106 dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
107 afu->actag_base, afu->actag_enabled);
108 return 0;
109}
110
111static void reclaim_afu_actag(struct ocxl_afu *afu)
112{
113 struct ocxl_fn *fn = afu->fn;
114 int start_offset, size;
115
116 start_offset = afu->actag_base - fn->actag_base;
117 size = afu->actag_enabled;
118 ocxl_actag_afu_free(afu->fn, start_offset, size);
119}
120
121static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
122{
123 struct ocxl_fn *fn = afu->fn;
124 int pasid_count, pasid_offset;
125
126 /*
127 * We only support the case where the function configuration
128 * requested enough PASIDs to cover all AFUs.
129 */
130 pasid_count = 1 << afu->config.pasid_supported_log;
131 pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
132 if (pasid_offset < 0) {
133 dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
134 pasid_count, pasid_offset);
135 return pasid_offset;
136 }
137 afu->pasid_base = fn->pasid_base + pasid_offset;
138 afu->pasid_count = 0;
139 afu->pasid_max = pasid_count;
140
141 ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
142 afu->pasid_base,
143 afu->config.pasid_supported_log);
144 dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
145 afu->pasid_base, pasid_count);
146 return 0;
147}
148
149static void reclaim_afu_pasid(struct ocxl_afu *afu)
150{
151 struct ocxl_fn *fn = afu->fn;
152 int start_offset, size;
153
154 start_offset = afu->pasid_base - fn->pasid_base;
155 size = 1 << afu->config.pasid_supported_log;
156 ocxl_pasid_afu_free(afu->fn, start_offset, size);
157}
158
159static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
160{
161 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
162 int rc, idx;
163
164 if (bar != 0 && bar != 2 && bar != 4)
165 return -EINVAL;
166
167 idx = bar >> 1;
168 if (fn->bar_used[idx]++ == 0) {
169 rc = pci_request_region(dev, bar, "ocxl");
170 if (rc)
171 return rc;
172 }
173 return 0;
174}
175
176static void release_fn_bar(struct ocxl_fn *fn, int bar)
177{
178 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
179 int idx;
180
181 if (bar != 0 && bar != 2 && bar != 4)
182 return;
183
184 idx = bar >> 1;
185 if (--fn->bar_used[idx] == 0)
186 pci_release_region(dev, bar);
187 WARN_ON(fn->bar_used[idx] < 0);
188}
189
190static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
191{
192 int rc;
193
194 rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
195 if (rc)
196 return rc;
197
198 rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
199 if (rc) {
200 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
201 return rc;
202 }
203
204 afu->global_mmio_start =
205 pci_resource_start(dev, afu->config.global_mmio_bar) +
206 afu->config.global_mmio_offset;
207 afu->pp_mmio_start =
208 pci_resource_start(dev, afu->config.pp_mmio_bar) +
209 afu->config.pp_mmio_offset;
210
211 afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
212 afu->config.global_mmio_size);
213 if (!afu->global_mmio_ptr) {
214 release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
215 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
216 dev_err(&dev->dev, "Error mapping global mmio area\n");
217 return -ENOMEM;
218 }
219
220 /*
221 * Leave an empty page between the per-process mmio area and
222 * the AFU interrupt mappings
223 */
224 afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
225 return 0;
226}
227
228static void unmap_mmio_areas(struct ocxl_afu *afu)
229{
230 if (afu->global_mmio_ptr) {
231 iounmap(afu->global_mmio_ptr);
232 afu->global_mmio_ptr = NULL;
233 }
234 afu->global_mmio_start = 0;
235 afu->pp_mmio_start = 0;
236 release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
237 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
238}
239
240static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
241{
242 int rc;
243
244 rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
245 if (rc)
246 return rc;
247
248 rc = set_afu_device(afu, dev_name(&dev->dev));
249 if (rc)
250 return rc;
251
252 rc = assign_afu_actag(afu, dev);
253 if (rc)
254 return rc;
255
256 rc = assign_afu_pasid(afu, dev);
257 if (rc) {
258 reclaim_afu_actag(afu);
259 return rc;
260 }
261
262 rc = map_mmio_areas(afu, dev);
263 if (rc) {
264 reclaim_afu_pasid(afu);
265 reclaim_afu_actag(afu);
266 return rc;
267 }
268 return 0;
269}
270
271static void deconfigure_afu(struct ocxl_afu *afu)
272{
273 unmap_mmio_areas(afu);
274 reclaim_afu_pasid(afu);
275 reclaim_afu_actag(afu);
276}
277
278static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
279{
280 int rc;
281
282 ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
283 /*
284 * Char device creation is the last step, as processes can
285 * call our driver immediately, so all our inits must be finished.
286 */
287 rc = ocxl_create_cdev(afu);
288 if (rc)
289 return rc;
290 return 0;
291}
292
293static void deactivate_afu(struct ocxl_afu *afu)
294{
295 struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
296
297 ocxl_destroy_cdev(afu);
298 ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
299}
300
301static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
302{
303 int rc;
304 struct ocxl_afu *afu;
305
306 afu = alloc_afu(fn);
307 if (!afu)
308 return -ENOMEM;
309
310 rc = configure_afu(afu, afu_idx, dev);
311 if (rc) {
312 free_afu(afu);
313 return rc;
314 }
315
316 rc = ocxl_register_afu(afu);
317 if (rc)
318 goto err;
319
320 rc = ocxl_sysfs_add_afu(afu);
321 if (rc)
322 goto err;
323
324 rc = activate_afu(dev, afu);
325 if (rc)
326 goto err_sys;
327
328 list_add_tail(&afu->list, &fn->afu_list);
329 return 0;
330
331err_sys:
332 ocxl_sysfs_remove_afu(afu);
333err:
334 deconfigure_afu(afu);
335 device_unregister(&afu->dev);
336 return rc;
337}
338
339static void remove_afu(struct ocxl_afu *afu)
340{
341 list_del(&afu->list);
342 ocxl_context_detach_all(afu);
343 deactivate_afu(afu);
344 ocxl_sysfs_remove_afu(afu);
345 deconfigure_afu(afu);
346 device_unregister(&afu->dev);
347}
348
349static struct ocxl_fn *alloc_function(struct pci_dev *dev)
350{
351 struct ocxl_fn *fn;
352
353 fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
354 if (!fn)
355 return NULL;
356
357 INIT_LIST_HEAD(&fn->afu_list);
358 INIT_LIST_HEAD(&fn->pasid_list);
359 INIT_LIST_HEAD(&fn->actag_list);
360 return fn;
361}
362
363static void free_function(struct ocxl_fn *fn)
364{
365 WARN_ON(!list_empty(&fn->afu_list));
366 WARN_ON(!list_empty(&fn->pasid_list));
367 kfree(fn);
368}
369
370static void free_function_dev(struct device *dev)
371{
372 struct ocxl_fn *fn = to_ocxl_function(dev);
373
374 free_function(fn);
375}
376
377static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
378{
379 int rc;
380
381 fn->dev.parent = &dev->dev;
382 fn->dev.release = free_function_dev;
383 rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
384 if (rc)
385 return rc;
386 pci_set_drvdata(dev, fn);
387 return 0;
388}
389
390static int assign_function_actag(struct ocxl_fn *fn)
391{
392 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
393 u16 base, enabled, supported;
394 int rc;
395
396 rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
397 if (rc)
398 return rc;
399
400 fn->actag_base = base;
401 fn->actag_enabled = enabled;
402 fn->actag_supported = supported;
403
404 ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
405 fn->actag_base, fn->actag_enabled);
406 dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
407 fn->actag_base, fn->actag_enabled);
408 return 0;
409}
410
411static int set_function_pasid(struct ocxl_fn *fn)
412{
413 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
414 int rc, desired_count, max_count;
415
416 /* A function may not require any PASID */
417 if (fn->config.max_pasid_log < 0)
418 return 0;
419
420 rc = ocxl_config_get_pasid_info(dev, &max_count);
421 if (rc)
422 return rc;
423
424 desired_count = 1 << fn->config.max_pasid_log;
425
426 if (desired_count > max_count) {
427 dev_err(&fn->dev,
428 "Function requires more PASIDs than is available (%d vs. %d)\n",
429 desired_count, max_count);
430 return -ENOSPC;
431 }
432
433 fn->pasid_base = 0;
434 return 0;
435}
436
437static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
438{
439 int rc;
440
441 rc = pci_enable_device(dev);
442 if (rc) {
443 dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
444 return rc;
445 }
446
447 /*
448 * Once it has been confirmed to work on our hardware, we
449 * should reset the function, to force the adapter to restart
450 * from scratch.
451 * A function reset would also reset all its AFUs.
452 *
453 * Some hints for implementation:
454 *
455 * - there's not status bit to know when the reset is done. We
456 * should try reading the config space to know when it's
457 * done.
458 * - probably something like:
459 * Reset
460 * wait 100ms
461 * issue config read
462 * allow device up to 1 sec to return success on config
463 * read before declaring it broken
464 *
465 * Some shared logic on the card (CFG, TLX) won't be reset, so
466 * there's no guarantee that it will be enough.
467 */
468 rc = ocxl_config_read_function(dev, &fn->config);
469 if (rc)
470 return rc;
471
472 rc = set_function_device(fn, dev);
473 if (rc)
474 return rc;
475
476 rc = assign_function_actag(fn);
477 if (rc)
478 return rc;
479
480 rc = set_function_pasid(fn);
481 if (rc)
482 return rc;
483
484 rc = ocxl_link_setup(dev, 0, &fn->link);
485 if (rc)
486 return rc;
487
488 rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
489 if (rc) {
490 ocxl_link_release(dev, fn->link);
491 return rc;
492 }
493 return 0;
494}
495
496static void deconfigure_function(struct ocxl_fn *fn)
497{
498 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
499
500 ocxl_link_release(dev, fn->link);
501 pci_disable_device(dev);
502}
503
504static struct ocxl_fn *init_function(struct pci_dev *dev)
505{
506 struct ocxl_fn *fn;
507 int rc;
508
509 fn = alloc_function(dev);
510 if (!fn)
511 return ERR_PTR(-ENOMEM);
512
513 rc = configure_function(fn, dev);
514 if (rc) {
515 free_function(fn);
516 return ERR_PTR(rc);
517 }
518
519 rc = device_register(&fn->dev);
520 if (rc) {
521 deconfigure_function(fn);
522 device_unregister(&fn->dev);
523 return ERR_PTR(rc);
524 }
525 return fn;
526}
527
528static void remove_function(struct ocxl_fn *fn)
529{
530 deconfigure_function(fn);
531 device_unregister(&fn->dev);
532}
533
534static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
535{
536 int rc, afu_count = 0;
537 u8 afu;
538 struct ocxl_fn *fn;
539
540 if (!radix_enabled()) {
541 dev_err(&dev->dev, "Unsupported memory model (hash)\n");
542 return -ENODEV;
543 }
544
545 fn = init_function(dev);
546 if (IS_ERR(fn)) {
547 dev_err(&dev->dev, "function init failed: %li\n",
548 PTR_ERR(fn));
549 return PTR_ERR(fn);
550 }
551
552 for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
553 rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
554 if (rc > 0) {
555 rc = init_afu(dev, fn, afu);
556 if (rc) {
557 dev_err(&dev->dev,
558 "Can't initialize AFU index %d\n", afu);
559 continue;
560 }
561 afu_count++;
562 }
563 }
564 dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
565 return 0;
566}
567
568static void ocxl_remove(struct pci_dev *dev)
569{
570 struct ocxl_afu *afu, *tmp;
571 struct ocxl_fn *fn = pci_get_drvdata(dev);
572
573 list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
574 remove_afu(afu);
575 }
576 remove_function(fn);
577}
578
579struct pci_driver ocxl_pci_driver = {
580 .name = "ocxl",
581 .id_table = ocxl_pci_tbl,
582 .probe = ocxl_probe,
583 .remove = ocxl_remove,
584 .shutdown = ocxl_remove,
585};
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
new file mode 100644
index 000000000000..d9753a1db14b
--- /dev/null
+++ b/drivers/misc/ocxl/sysfs.c
@@ -0,0 +1,142 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <linux/sysfs.h>
4#include "ocxl_internal.h"
5
6static ssize_t global_mmio_size_show(struct device *device,
7 struct device_attribute *attr,
8 char *buf)
9{
10 struct ocxl_afu *afu = to_ocxl_afu(device);
11
12 return scnprintf(buf, PAGE_SIZE, "%d\n",
13 afu->config.global_mmio_size);
14}
15
16static ssize_t pp_mmio_size_show(struct device *device,
17 struct device_attribute *attr,
18 char *buf)
19{
20 struct ocxl_afu *afu = to_ocxl_afu(device);
21
22 return scnprintf(buf, PAGE_SIZE, "%d\n",
23 afu->config.pp_mmio_stride);
24}
25
26static ssize_t afu_version_show(struct device *device,
27 struct device_attribute *attr,
28 char *buf)
29{
30 struct ocxl_afu *afu = to_ocxl_afu(device);
31
32 return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
33 afu->config.version_major,
34 afu->config.version_minor);
35}
36
37static ssize_t contexts_show(struct device *device,
38 struct device_attribute *attr,
39 char *buf)
40{
41 struct ocxl_afu *afu = to_ocxl_afu(device);
42
43 return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
44 afu->pasid_count, afu->pasid_max);
45}
46
47static struct device_attribute afu_attrs[] = {
48 __ATTR_RO(global_mmio_size),
49 __ATTR_RO(pp_mmio_size),
50 __ATTR_RO(afu_version),
51 __ATTR_RO(contexts),
52};
53
54static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
55 struct bin_attribute *bin_attr, char *buf,
56 loff_t off, size_t count)
57{
58 struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
59
60 if (count == 0 || off < 0 ||
61 off >= afu->config.global_mmio_size)
62 return 0;
63 memcpy_fromio(buf, afu->global_mmio_ptr + off, count);
64 return count;
65}
66
67static int global_mmio_fault(struct vm_fault *vmf)
68{
69 struct vm_area_struct *vma = vmf->vma;
70 struct ocxl_afu *afu = vma->vm_private_data;
71 unsigned long offset;
72
73 if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
74 return VM_FAULT_SIGBUS;
75
76 offset = vmf->pgoff;
77 offset += (afu->global_mmio_start >> PAGE_SHIFT);
78 vm_insert_pfn(vma, vmf->address, offset);
79 return VM_FAULT_NOPAGE;
80}
81
82static const struct vm_operations_struct global_mmio_vmops = {
83 .fault = global_mmio_fault,
84};
85
86static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
87 struct bin_attribute *bin_attr,
88 struct vm_area_struct *vma)
89{
90 struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
91
92 if ((vma_pages(vma) + vma->vm_pgoff) >
93 (afu->config.global_mmio_size >> PAGE_SHIFT))
94 return -EINVAL;
95
96 vma->vm_flags |= VM_IO | VM_PFNMAP;
97 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
98 vma->vm_ops = &global_mmio_vmops;
99 vma->vm_private_data = afu;
100 return 0;
101}
102
103int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
104{
105 int i, rc;
106
107 for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
108 rc = device_create_file(&afu->dev, &afu_attrs[i]);
109 if (rc)
110 goto err;
111 }
112
113 sysfs_attr_init(&afu->attr_global_mmio.attr);
114 afu->attr_global_mmio.attr.name = "global_mmio_area";
115 afu->attr_global_mmio.attr.mode = 0600;
116 afu->attr_global_mmio.size = afu->config.global_mmio_size;
117 afu->attr_global_mmio.read = global_mmio_read;
118 afu->attr_global_mmio.mmap = global_mmio_mmap;
119 rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio);
120 if (rc) {
121 dev_err(&afu->dev,
122 "Unable to create global mmio attr for afu: %d\n",
123 rc);
124 goto err;
125 }
126
127 return 0;
128
129err:
130 for (i--; i >= 0; i--)
131 device_remove_file(&afu->dev, &afu_attrs[i]);
132 return rc;
133}
134
135void ocxl_sysfs_remove_afu(struct ocxl_afu *afu)
136{
137 int i;
138
139 for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
140 device_remove_file(&afu->dev, &afu_attrs[i]);
141 device_remove_bin_file(&afu->dev, &afu->attr_global_mmio);
142}
diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
new file mode 100644
index 000000000000..a37e34edf52f
--- /dev/null
+++ b/include/uapi/misc/ocxl.h
@@ -0,0 +1,40 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2/* Copyright 2017 IBM Corp. */
3#ifndef _UAPI_MISC_OCXL_H
4#define _UAPI_MISC_OCXL_H
5
6#include <linux/types.h>
7#include <linux/ioctl.h>
8
9enum ocxl_event_type {
10 OCXL_AFU_EVENT_XSL_FAULT_ERROR = 0,
11};
12
13#define OCXL_KERNEL_EVENT_FLAG_LAST 0x0001 /* This is the last event pending */
14
15struct ocxl_kernel_event_header {
16 __u16 type;
17 __u16 flags;
18 __u32 reserved;
19};
20
21struct ocxl_kernel_event_xsl_fault_error {
22 __u64 addr;
23 __u64 dsisr;
24 __u64 count;
25 __u64 reserved;
26};
27
28struct ocxl_ioctl_attach {
29 __u64 amr;
30 __u64 reserved1;
31 __u64 reserved2;
32 __u64 reserved3;
33};
34
35/* ioctl numbers */
36#define OCXL_MAGIC 0xCA
37/* AFU devices */
38#define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach)
39
40#endif /* _UAPI_MISC_OCXL_H */