aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-06-09 21:41:57 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-08-05 01:28:48 -0400
commit1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 (patch)
treef87cc5eae17311b309c7498ce063eb33cc028fc3
parent212d16cdca2d0f7708c9c1d284a845c22bfc90c4 (diff)
drivers/vfio: EEH support for VFIO PCI device
The patch adds new IOCTL commands for sPAPR VFIO container device to support EEH functionality for PCI devices, which have been passed through from host to somebody else via VFIO. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Acked-by: Alexander Graf <agraf@suse.de> Acked-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--Documentation/vfio.txt87
-rw-r--r--drivers/vfio/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c18
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c17
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c87
-rw-r--r--include/linux/vfio.h23
-rw-r--r--include/uapi/linux/vfio.h34
7 files changed, 259 insertions, 8 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
305an excellent performance which has limitations such as inability to do 305an excellent performance which has limitations such as inability to do
306locked pages accounting in real time. 306locked pages accounting in real time.
307 307
308So 3 additional ioctls have been added: 3084) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
309subtree that can be treated as a unit for the purposes of partitioning and
310error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
311function of a multi-function IOA, or multiple IOAs (possibly including switch
312and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
313and recover from them via EEH RTAS services, which works on the basis of
314additional ioctl commands.
315
316So 4 additional ioctls have been added:
309 317
310 VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start 318 VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
311 of the DMA window on the PCI bus. 319 of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
316 324
317 VFIO_IOMMU_DISABLE - disables the container. 325 VFIO_IOMMU_DISABLE - disables the container.
318 326
327 VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
319 328
320The code flow from the example above should be slightly changed: 329The code flow from the example above should be slightly changed:
321 330
331 struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
332
322 ..... 333 .....
323 /* Add the group to the container */ 334 /* Add the group to the container */
324 ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); 335 ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
342 dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; 353 dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
343 354
344 /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ 355 /* Check here is .iova/.size are within DMA window from spapr_iommu_info */
345
346 ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); 356 ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
347 ..... 357
358 /* Get a file descriptor for the device */
359 device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
360
361 ....
362
363 /* Gratuitous device reset and go... */
364 ioctl(device, VFIO_DEVICE_RESET);
365
366 /* Make sure EEH is supported */
367 ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
368
369 /* Enable the EEH functionality on the device */
370 pe_op.op = VFIO_EEH_PE_ENABLE;
371 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
372
373 /* You're suggested to create additional data struct to represent
374 * PE, and put child devices belonging to same IOMMU group to the
375 * PE instance for later reference.
376 */
377
378 /* Check the PE's state and make sure it's in functional state */
379 pe_op.op = VFIO_EEH_PE_GET_STATE;
380 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
381
382 /* Save device state using pci_save_state().
383 * EEH should be enabled on the specified device.
384 */
385
386 ....
387
388 /* When 0xFF's returned from reading PCI config space or IO BARs
389 * of the PCI device. Check the PE's state to see if that has been
390 * frozen.
391 */
392 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
393
394 /* Waiting for pending PCI transactions to be completed and don't
395 * produce any more PCI traffic from/to the affected PE until
396 * recovery is finished.
397 */
398
399 /* Enable IO for the affected PE and collect logs. Usually, the
400 * standard part of PCI config space, AER registers are dumped
401 * as logs for further analysis.
402 */
403 pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
404 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
405
406 /*
407 * Issue PE reset: hot or fundamental reset. Usually, hot reset
408 * is enough. However, the firmware of some PCI adapters would
409 * require fundamental reset.
410 */
411 pe_op.op = VFIO_EEH_PE_RESET_HOT;
412 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
413 pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
414 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
415
416 /* Configure the PCI bridges for the affected PE */
417 pe_op.op = VFIO_EEH_PE_CONFIGURE;
418 ioctl(container, VFIO_EEH_PE_OP, &pe_op);
419
420 /* Restored state we saved at initialization time. pci_restore_state()
421 * is good enough as an example.
422 */
423
424 /* Hopefully, error is recovered successfully. Now, you can resume to
425 * start PCI traffic to/from the affected PE.
426 */
427
428 ....
348 429
349------------------------------------------------------------------------------- 430-------------------------------------------------------------------------------
350 431
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_VFIO) += vfio.o 1obj-$(CONFIG_VFIO) += vfio.o
2obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o 2obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
3obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o 3obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
4obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
4obj-$(CONFIG_VFIO_PCI) += pci/ 5obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
157{ 157{
158 struct vfio_pci_device *vdev = device_data; 158 struct vfio_pci_device *vdev = device_data;
159 159
160 if (atomic_dec_and_test(&vdev->refcnt)) 160 if (atomic_dec_and_test(&vdev->refcnt)) {
161 vfio_spapr_pci_eeh_release(vdev->pdev);
161 vfio_pci_disable(vdev); 162 vfio_pci_disable(vdev);
163 }
162 164
163 module_put(THIS_MODULE); 165 module_put(THIS_MODULE);
164} 166}
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
166static int vfio_pci_open(void *device_data) 168static int vfio_pci_open(void *device_data)
167{ 169{
168 struct vfio_pci_device *vdev = device_data; 170 struct vfio_pci_device *vdev = device_data;
171 int ret;
169 172
170 if (!try_module_get(THIS_MODULE)) 173 if (!try_module_get(THIS_MODULE))
171 return -ENODEV; 174 return -ENODEV;
172 175
173 if (atomic_inc_return(&vdev->refcnt) == 1) { 176 if (atomic_inc_return(&vdev->refcnt) == 1) {
174 int ret = vfio_pci_enable(vdev); 177 ret = vfio_pci_enable(vdev);
178 if (ret)
179 goto error;
180
181 ret = vfio_spapr_pci_eeh_open(vdev->pdev);
175 if (ret) { 182 if (ret) {
176 module_put(THIS_MODULE); 183 vfio_pci_disable(vdev);
177 return ret; 184 goto error;
178 } 185 }
179 } 186 }
180 187
181 return 0; 188 return 0;
189error:
190 module_put(THIS_MODULE);
191 return ret;
182} 192}
183 193
184static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) 194static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
156 156
157 switch (cmd) { 157 switch (cmd) {
158 case VFIO_CHECK_EXTENSION: 158 case VFIO_CHECK_EXTENSION:
159 return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; 159 switch (arg) {
160 case VFIO_SPAPR_TCE_IOMMU:
161 ret = 1;
162 break;
163 default:
164 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
165 break;
166 }
167
168 return (ret < 0) ? 0 : ret;
160 169
161 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { 170 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
162 struct vfio_iommu_spapr_tce_info info; 171 struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
283 tce_iommu_disable(container); 292 tce_iommu_disable(container);
284 mutex_unlock(&container->lock); 293 mutex_unlock(&container->lock);
285 return 0; 294 return 0;
295 case VFIO_EEH_PE_OP:
296 if (!container->tbl || !container->tbl->it_group)
297 return -ENODEV;
298
299 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
300 cmd, arg);
286 } 301 }
287 302
288 return -ENOTTY; 303 return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
1/*
2 * EEH functionality support for VFIO devices. The feature is only
3 * available on sPAPR compatible platforms.
4 *
5 * Copyright Gavin Shan, IBM Corporation 2014.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/uaccess.h>
13#include <linux/vfio.h>
14#include <asm/eeh.h>
15
16/* We might build address mapping here for "fast" path later */
17int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
18{
19 return eeh_dev_open(pdev);
20}
21
22void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
23{
24 eeh_dev_release(pdev);
25}
26
27long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
28 unsigned int cmd, unsigned long arg)
29{
30 struct eeh_pe *pe;
31 struct vfio_eeh_pe_op op;
32 unsigned long minsz;
33 long ret = -EINVAL;
34
35 switch (cmd) {
36 case VFIO_CHECK_EXTENSION:
37 if (arg == VFIO_EEH)
38 ret = eeh_enabled() ? 1 : 0;
39 else
40 ret = 0;
41 break;
42 case VFIO_EEH_PE_OP:
43 pe = eeh_iommu_group_to_pe(group);
44 if (!pe)
45 return -ENODEV;
46
47 minsz = offsetofend(struct vfio_eeh_pe_op, op);
48 if (copy_from_user(&op, (void __user *)arg, minsz))
49 return -EFAULT;
50 if (op.argsz < minsz || op.flags)
51 return -EINVAL;
52
53 switch (op.op) {
54 case VFIO_EEH_PE_DISABLE:
55 ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
56 break;
57 case VFIO_EEH_PE_ENABLE:
58 ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
59 break;
60 case VFIO_EEH_PE_UNFREEZE_IO:
61 ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
62 break;
63 case VFIO_EEH_PE_UNFREEZE_DMA:
64 ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
65 break;
66 case VFIO_EEH_PE_GET_STATE:
67 ret = eeh_pe_get_state(pe);
68 break;
69 case VFIO_EEH_PE_RESET_DEACTIVATE:
70 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
71 break;
72 case VFIO_EEH_PE_RESET_HOT:
73 ret = eeh_pe_reset(pe, EEH_RESET_HOT);
74 break;
75 case VFIO_EEH_PE_RESET_FUNDAMENTAL:
76 ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
77 break;
78 case VFIO_EEH_PE_CONFIGURE:
79 ret = eeh_pe_configure(pe);
80 break;
81 default:
82 ret = -EINVAL;
83 }
84 }
85
86 return ret;
87}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
98extern long vfio_external_check_extension(struct vfio_group *group, 98extern long vfio_external_check_extension(struct vfio_group *group,
99 unsigned long arg); 99 unsigned long arg);
100 100
101#ifdef CONFIG_EEH
102extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
103extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
104extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
105 unsigned int cmd,
106 unsigned long arg);
107#else
108static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
109{
110 return 0;
111}
112
113static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
114{
115}
116
117static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
118 unsigned int cmd,
119 unsigned long arg)
120{
121 return -ENOTTY;
122}
123#endif /* CONFIG_EEH */
101#endif /* VFIO_H */ 124#endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
30 */ 30 */
31#define VFIO_DMA_CC_IOMMU 4 31#define VFIO_DMA_CC_IOMMU 4
32 32
33/* Check if EEH is supported */
34#define VFIO_EEH 5
35
33/* 36/*
34 * The IOCTL interface is designed for extensibility by embedding the 37 * The IOCTL interface is designed for extensibility by embedding the
35 * structure length (argsz) and flags into structures passed between 38 * structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
455 458
456#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) 459#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
457 460
461/*
462 * EEH PE operation struct provides ways to:
463 * - enable/disable EEH functionality;
464 * - unfreeze IO/DMA for frozen PE;
465 * - read PE state;
466 * - reset PE;
467 * - configure PE.
468 */
469struct vfio_eeh_pe_op {
470 __u32 argsz;
471 __u32 flags;
472 __u32 op;
473};
474
475#define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */
476#define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */
477#define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */
478#define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */
479#define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */
480#define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */
481#define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */
482#define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */
483#define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */
484#define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */
485#define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */
486#define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */
487#define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */
488#define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */
489
490#define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21)
491
458/* ***************************************************************** */ 492/* ***************************************************************** */
459 493
460#endif /* _UAPIVFIO_H */ 494#endif /* _UAPIVFIO_H */