diff options
-rw-r--r-- | Documentation/vfio.txt | 87 | ||||
-rw-r--r-- | drivers/vfio/Makefile | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 18 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 17 | ||||
-rw-r--r-- | drivers/vfio/vfio_spapr_eeh.c | 87 | ||||
-rw-r--r-- | include/linux/vfio.h | 23 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 34 |
7 files changed, 259 insertions, 8 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index b9ca02370d46..96978eced341 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt | |||
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides | |||
305 | an excellent performance which has limitations such as inability to do | 305 | an excellent performance which has limitations such as inability to do |
306 | locked pages accounting in real time. | 306 | locked pages accounting in real time. |
307 | 307 | ||
308 | So 3 additional ioctls have been added: | 308 | 4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O |
309 | subtree that can be treated as a unit for the purposes of partitioning and | ||
310 | error recovery. A PE may be a single or multi-function IOA (IO Adapter), a | ||
311 | function of a multi-function IOA, or multiple IOAs (possibly including switch | ||
312 | and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors | ||
313 | and recover from them via EEH RTAS services, which works on the basis of | ||
314 | additional ioctl commands. | ||
315 | |||
316 | So 4 additional ioctls have been added: | ||
309 | 317 | ||
310 | VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start | 318 | VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start |
311 | of the DMA window on the PCI bus. | 319 | of the DMA window on the PCI bus. |
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added: | |||
316 | 324 | ||
317 | VFIO_IOMMU_DISABLE - disables the container. | 325 | VFIO_IOMMU_DISABLE - disables the container. |
318 | 326 | ||
327 | VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery. | ||
319 | 328 | ||
320 | The code flow from the example above should be slightly changed: | 329 | The code flow from the example above should be slightly changed: |
321 | 330 | ||
331 | struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 }; | ||
332 | |||
322 | ..... | 333 | ..... |
323 | /* Add the group to the container */ | 334 | /* Add the group to the container */ |
324 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | 335 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); |
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed: | |||
342 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | 353 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; |
343 | 354 | ||
344 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ | 355 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ |
345 | |||
346 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | 356 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); |
347 | ..... | 357 | |
358 | /* Get a file descriptor for the device */ | ||
359 | device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); | ||
360 | |||
361 | .... | ||
362 | |||
363 | /* Gratuitous device reset and go... */ | ||
364 | ioctl(device, VFIO_DEVICE_RESET); | ||
365 | |||
366 | /* Make sure EEH is supported */ | ||
367 | ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH); | ||
368 | |||
369 | /* Enable the EEH functionality on the device */ | ||
370 | pe_op.op = VFIO_EEH_PE_ENABLE; | ||
371 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
372 | |||
373 | /* You're suggested to create additional data struct to represent | ||
374 | * PE, and put child devices belonging to same IOMMU group to the | ||
375 | * PE instance for later reference. | ||
376 | */ | ||
377 | |||
378 | /* Check the PE's state and make sure it's in functional state */ | ||
379 | pe_op.op = VFIO_EEH_PE_GET_STATE; | ||
380 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
381 | |||
382 | /* Save device state using pci_save_state(). | ||
383 | * EEH should be enabled on the specified device. | ||
384 | */ | ||
385 | |||
386 | .... | ||
387 | |||
388 | /* When 0xFF's returned from reading PCI config space or IO BARs | ||
389 | * of the PCI device. Check the PE's state to see if that has been | ||
390 | * frozen. | ||
391 | */ | ||
392 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
393 | |||
394 | /* Waiting for pending PCI transactions to be completed and don't | ||
395 | * produce any more PCI traffic from/to the affected PE until | ||
396 | * recovery is finished. | ||
397 | */ | ||
398 | |||
399 | /* Enable IO for the affected PE and collect logs. Usually, the | ||
400 | * standard part of PCI config space, AER registers are dumped | ||
401 | * as logs for further analysis. | ||
402 | */ | ||
403 | pe_op.op = VFIO_EEH_PE_UNFREEZE_IO; | ||
404 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
405 | |||
406 | /* | ||
407 | * Issue PE reset: hot or fundamental reset. Usually, hot reset | ||
408 | * is enough. However, the firmware of some PCI adapters would | ||
409 | * require fundamental reset. | ||
410 | */ | ||
411 | pe_op.op = VFIO_EEH_PE_RESET_HOT; | ||
412 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
413 | pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE; | ||
414 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
415 | |||
416 | /* Configure the PCI bridges for the affected PE */ | ||
417 | pe_op.op = VFIO_EEH_PE_CONFIGURE; | ||
418 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
419 | |||
420 | /* Restored state we saved at initialization time. pci_restore_state() | ||
421 | * is good enough as an example. | ||
422 | */ | ||
423 | |||
424 | /* Hopefully, error is recovered successfully. Now, you can resume to | ||
425 | * start PCI traffic to/from the affected PE. | ||
426 | */ | ||
427 | |||
428 | .... | ||
348 | 429 | ||
349 | ------------------------------------------------------------------------------- | 430 | ------------------------------------------------------------------------------- |
350 | 431 | ||
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 72bfabc8629e..50e30bc75e85 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | obj-$(CONFIG_VFIO) += vfio.o | 1 | obj-$(CONFIG_VFIO) += vfio.o |
2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o | 2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o |
3 | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o | 3 | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o |
4 | obj-$(CONFIG_EEH) += vfio_spapr_eeh.o | ||
4 | obj-$(CONFIG_VFIO_PCI) += pci/ | 5 | obj-$(CONFIG_VFIO_PCI) += pci/ |
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 010e0f8b8e4f..e2ee80f36e3e 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c | |||
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data) | |||
157 | { | 157 | { |
158 | struct vfio_pci_device *vdev = device_data; | 158 | struct vfio_pci_device *vdev = device_data; |
159 | 159 | ||
160 | if (atomic_dec_and_test(&vdev->refcnt)) | 160 | if (atomic_dec_and_test(&vdev->refcnt)) { |
161 | vfio_spapr_pci_eeh_release(vdev->pdev); | ||
161 | vfio_pci_disable(vdev); | 162 | vfio_pci_disable(vdev); |
163 | } | ||
162 | 164 | ||
163 | module_put(THIS_MODULE); | 165 | module_put(THIS_MODULE); |
164 | } | 166 | } |
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data) | |||
166 | static int vfio_pci_open(void *device_data) | 168 | static int vfio_pci_open(void *device_data) |
167 | { | 169 | { |
168 | struct vfio_pci_device *vdev = device_data; | 170 | struct vfio_pci_device *vdev = device_data; |
171 | int ret; | ||
169 | 172 | ||
170 | if (!try_module_get(THIS_MODULE)) | 173 | if (!try_module_get(THIS_MODULE)) |
171 | return -ENODEV; | 174 | return -ENODEV; |
172 | 175 | ||
173 | if (atomic_inc_return(&vdev->refcnt) == 1) { | 176 | if (atomic_inc_return(&vdev->refcnt) == 1) { |
174 | int ret = vfio_pci_enable(vdev); | 177 | ret = vfio_pci_enable(vdev); |
178 | if (ret) | ||
179 | goto error; | ||
180 | |||
181 | ret = vfio_spapr_pci_eeh_open(vdev->pdev); | ||
175 | if (ret) { | 182 | if (ret) { |
176 | module_put(THIS_MODULE); | 183 | vfio_pci_disable(vdev); |
177 | return ret; | 184 | goto error; |
178 | } | 185 | } |
179 | } | 186 | } |
180 | 187 | ||
181 | return 0; | 188 | return 0; |
189 | error: | ||
190 | module_put(THIS_MODULE); | ||
191 | return ret; | ||
182 | } | 192 | } |
183 | 193 | ||
184 | static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) | 194 | static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) |
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index a84788ba662c..730b4ef3e0cc 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c | |||
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
156 | 156 | ||
157 | switch (cmd) { | 157 | switch (cmd) { |
158 | case VFIO_CHECK_EXTENSION: | 158 | case VFIO_CHECK_EXTENSION: |
159 | return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; | 159 | switch (arg) { |
160 | case VFIO_SPAPR_TCE_IOMMU: | ||
161 | ret = 1; | ||
162 | break; | ||
163 | default: | ||
164 | ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | return (ret < 0) ? 0 : ret; | ||
160 | 169 | ||
161 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { | 170 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { |
162 | struct vfio_iommu_spapr_tce_info info; | 171 | struct vfio_iommu_spapr_tce_info info; |
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
283 | tce_iommu_disable(container); | 292 | tce_iommu_disable(container); |
284 | mutex_unlock(&container->lock); | 293 | mutex_unlock(&container->lock); |
285 | return 0; | 294 | return 0; |
295 | case VFIO_EEH_PE_OP: | ||
296 | if (!container->tbl || !container->tbl->it_group) | ||
297 | return -ENODEV; | ||
298 | |||
299 | return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, | ||
300 | cmd, arg); | ||
286 | } | 301 | } |
287 | 302 | ||
288 | return -ENOTTY; | 303 | return -ENOTTY; |
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c new file mode 100644 index 000000000000..f834b4ce1431 --- /dev/null +++ b/drivers/vfio/vfio_spapr_eeh.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * EEH functionality support for VFIO devices. The feature is only | ||
3 | * available on sPAPR compatible platforms. | ||
4 | * | ||
5 | * Copyright Gavin Shan, IBM Corporation 2014. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/uaccess.h> | ||
13 | #include <linux/vfio.h> | ||
14 | #include <asm/eeh.h> | ||
15 | |||
16 | /* We might build address mapping here for "fast" path later */ | ||
17 | int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) | ||
18 | { | ||
19 | return eeh_dev_open(pdev); | ||
20 | } | ||
21 | |||
22 | void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) | ||
23 | { | ||
24 | eeh_dev_release(pdev); | ||
25 | } | ||
26 | |||
27 | long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | ||
28 | unsigned int cmd, unsigned long arg) | ||
29 | { | ||
30 | struct eeh_pe *pe; | ||
31 | struct vfio_eeh_pe_op op; | ||
32 | unsigned long minsz; | ||
33 | long ret = -EINVAL; | ||
34 | |||
35 | switch (cmd) { | ||
36 | case VFIO_CHECK_EXTENSION: | ||
37 | if (arg == VFIO_EEH) | ||
38 | ret = eeh_enabled() ? 1 : 0; | ||
39 | else | ||
40 | ret = 0; | ||
41 | break; | ||
42 | case VFIO_EEH_PE_OP: | ||
43 | pe = eeh_iommu_group_to_pe(group); | ||
44 | if (!pe) | ||
45 | return -ENODEV; | ||
46 | |||
47 | minsz = offsetofend(struct vfio_eeh_pe_op, op); | ||
48 | if (copy_from_user(&op, (void __user *)arg, minsz)) | ||
49 | return -EFAULT; | ||
50 | if (op.argsz < minsz || op.flags) | ||
51 | return -EINVAL; | ||
52 | |||
53 | switch (op.op) { | ||
54 | case VFIO_EEH_PE_DISABLE: | ||
55 | ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE); | ||
56 | break; | ||
57 | case VFIO_EEH_PE_ENABLE: | ||
58 | ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE); | ||
59 | break; | ||
60 | case VFIO_EEH_PE_UNFREEZE_IO: | ||
61 | ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); | ||
62 | break; | ||
63 | case VFIO_EEH_PE_UNFREEZE_DMA: | ||
64 | ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); | ||
65 | break; | ||
66 | case VFIO_EEH_PE_GET_STATE: | ||
67 | ret = eeh_pe_get_state(pe); | ||
68 | break; | ||
69 | case VFIO_EEH_PE_RESET_DEACTIVATE: | ||
70 | ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); | ||
71 | break; | ||
72 | case VFIO_EEH_PE_RESET_HOT: | ||
73 | ret = eeh_pe_reset(pe, EEH_RESET_HOT); | ||
74 | break; | ||
75 | case VFIO_EEH_PE_RESET_FUNDAMENTAL: | ||
76 | ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL); | ||
77 | break; | ||
78 | case VFIO_EEH_PE_CONFIGURE: | ||
79 | ret = eeh_pe_configure(pe); | ||
80 | break; | ||
81 | default: | ||
82 | ret = -EINVAL; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | return ret; | ||
87 | } | ||
diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8ec980b5e3af..25a0fbd4b998 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h | |||
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); | |||
98 | extern long vfio_external_check_extension(struct vfio_group *group, | 98 | extern long vfio_external_check_extension(struct vfio_group *group, |
99 | unsigned long arg); | 99 | unsigned long arg); |
100 | 100 | ||
101 | #ifdef CONFIG_EEH | ||
102 | extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); | ||
103 | extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); | ||
104 | extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | ||
105 | unsigned int cmd, | ||
106 | unsigned long arg); | ||
107 | #else | ||
108 | static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) | ||
109 | { | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) | ||
114 | { | ||
115 | } | ||
116 | |||
117 | static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | ||
118 | unsigned int cmd, | ||
119 | unsigned long arg) | ||
120 | { | ||
121 | return -ENOTTY; | ||
122 | } | ||
123 | #endif /* CONFIG_EEH */ | ||
101 | #endif /* VFIO_H */ | 124 | #endif /* VFIO_H */ |
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index cb9023d4f063..6612974c64bf 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h | |||
@@ -30,6 +30,9 @@ | |||
30 | */ | 30 | */ |
31 | #define VFIO_DMA_CC_IOMMU 4 | 31 | #define VFIO_DMA_CC_IOMMU 4 |
32 | 32 | ||
33 | /* Check if EEH is supported */ | ||
34 | #define VFIO_EEH 5 | ||
35 | |||
33 | /* | 36 | /* |
34 | * The IOCTL interface is designed for extensibility by embedding the | 37 | * The IOCTL interface is designed for extensibility by embedding the |
35 | * structure length (argsz) and flags into structures passed between | 38 | * structure length (argsz) and flags into structures passed between |
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info { | |||
455 | 458 | ||
456 | #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) | 459 | #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) |
457 | 460 | ||
461 | /* | ||
462 | * EEH PE operation struct provides ways to: | ||
463 | * - enable/disable EEH functionality; | ||
464 | * - unfreeze IO/DMA for frozen PE; | ||
465 | * - read PE state; | ||
466 | * - reset PE; | ||
467 | * - configure PE. | ||
468 | */ | ||
469 | struct vfio_eeh_pe_op { | ||
470 | __u32 argsz; | ||
471 | __u32 flags; | ||
472 | __u32 op; | ||
473 | }; | ||
474 | |||
475 | #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ | ||
476 | #define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */ | ||
477 | #define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */ | ||
478 | #define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */ | ||
479 | #define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */ | ||
480 | #define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */ | ||
481 | #define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */ | ||
482 | #define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */ | ||
483 | #define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ | ||
484 | #define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */ | ||
485 | #define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */ | ||
486 | #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ | ||
487 | #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ | ||
488 | #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ | ||
489 | |||
490 | #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) | ||
491 | |||
458 | /* ***************************************************************** */ | 492 | /* ***************************************************************** */ |
459 | 493 | ||
460 | #endif /* _UAPIVFIO_H */ | 494 | #endif /* _UAPIVFIO_H */ |