aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio/pci
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:02:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:02:32 -0400
commit0b2e3b6bb4a415379f16e38fc92db42379be47a1 (patch)
treeac6af620793ecd5e4b1d5523e0f431d2d8a5ef66 /drivers/vfio/pci
parente95893004104054d49406fd108fefa3ddc054366 (diff)
parent664e9386bd05dbdfecfb28d6cf2fde983aabc65c (diff)
Merge tag 'vfio-for-v3.10' of git://github.com/awilliam/linux-vfio
Pull vfio updates from Alex Williamson: "Changes include extension to support PCI AER notification to userspace, byte granularity of PCI config space and access to unarchitected PCI config space, better protection around IOMMU driver accesses, default file mode fix, and a few misc cleanups." * tag 'vfio-for-v3.10' of git://github.com/awilliam/linux-vfio: vfio: Set container device mode vfio: Use down_reads to protect iommu disconnects vfio: Convert container->group_lock to rwsem PCI/VFIO: use pcie_flags_reg instead of access PCI-E Capabilities Register vfio-pci: Enable raw access to unassigned config space vfio-pci: Use byte granularity in config map vfio: make local function vfio_pci_intx_unmask_handler() static VFIO-AER: Vfio-pci driver changes for supporting AER VFIO: Wrapper for getting reference to vfio_device
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r--drivers/vfio/pci/vfio_pci.c44
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c172
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c67
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h1
4 files changed, 204 insertions, 80 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 09d2e3ffd6fc..ac3725440d64 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -201,7 +201,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
201 201
202 return (flags & PCI_MSIX_FLAGS_QSIZE) + 1; 202 return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
203 } 203 }
204 } 204 } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
205 if (pci_is_pcie(vdev->pdev))
206 return 1;
205 207
206 return 0; 208 return 0;
207} 209}
@@ -317,6 +319,17 @@ static long vfio_pci_ioctl(void *device_data,
317 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS) 319 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
318 return -EINVAL; 320 return -EINVAL;
319 321
322 switch (info.index) {
323 case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
324 break;
325 case VFIO_PCI_ERR_IRQ_INDEX:
326 if (pci_is_pcie(vdev->pdev))
327 break;
328 /* pass thru to return error */
329 default:
330 return -EINVAL;
331 }
332
320 info.flags = VFIO_IRQ_INFO_EVENTFD; 333 info.flags = VFIO_IRQ_INFO_EVENTFD;
321 334
322 info.count = vfio_pci_get_irq_count(vdev, info.index); 335 info.count = vfio_pci_get_irq_count(vdev, info.index);
@@ -552,11 +565,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
552 kfree(vdev); 565 kfree(vdev);
553} 566}
554 567
568static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
569 pci_channel_state_t state)
570{
571 struct vfio_pci_device *vdev;
572 struct vfio_device *device;
573
574 device = vfio_device_get_from_dev(&pdev->dev);
575 if (device == NULL)
576 return PCI_ERS_RESULT_DISCONNECT;
577
578 vdev = vfio_device_data(device);
579 if (vdev == NULL) {
580 vfio_device_put(device);
581 return PCI_ERS_RESULT_DISCONNECT;
582 }
583
584 if (vdev->err_trigger)
585 eventfd_signal(vdev->err_trigger, 1);
586
587 vfio_device_put(device);
588
589 return PCI_ERS_RESULT_CAN_RECOVER;
590}
591
592static struct pci_error_handlers vfio_err_handlers = {
593 .error_detected = vfio_pci_aer_err_detected,
594};
595
555static struct pci_driver vfio_pci_driver = { 596static struct pci_driver vfio_pci_driver = {
556 .name = "vfio-pci", 597 .name = "vfio-pci",
557 .id_table = NULL, /* only dynamic ids */ 598 .id_table = NULL, /* only dynamic ids */
558 .probe = vfio_pci_probe, 599 .probe = vfio_pci_probe,
559 .remove = vfio_pci_remove, 600 .remove = vfio_pci_remove,
601 .err_handler = &vfio_err_handlers,
560}; 602};
561 603
562static void __exit vfio_pci_cleanup(void) 604static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index aeb00fc2d3be..affa34745be9 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -274,9 +274,10 @@ static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos,
274 return count; 274 return count;
275} 275}
276 276
277static int vfio_direct_config_write(struct vfio_pci_device *vdev, int pos, 277/* Raw access skips any kind of virtualization */
278 int count, struct perm_bits *perm, 278static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos,
279 int offset, __le32 val) 279 int count, struct perm_bits *perm,
280 int offset, __le32 val)
280{ 281{
281 int ret; 282 int ret;
282 283
@@ -287,13 +288,36 @@ static int vfio_direct_config_write(struct vfio_pci_device *vdev, int pos,
287 return count; 288 return count;
288} 289}
289 290
290/* Default all regions to read-only, no-virtualization */ 291static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
292 int count, struct perm_bits *perm,
293 int offset, __le32 *val)
294{
295 int ret;
296
297 ret = vfio_user_config_read(vdev->pdev, pos, val, count);
298 if (ret)
299 return pcibios_err_to_errno(ret);
300
301 return count;
302}
303
304/* Default capability regions to read-only, no-virtualization */
291static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { 305static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = {
292 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } 306 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read }
293}; 307};
294static struct perm_bits ecap_perms[PCI_EXT_CAP_ID_MAX + 1] = { 308static struct perm_bits ecap_perms[PCI_EXT_CAP_ID_MAX + 1] = {
295 [0 ... PCI_EXT_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } 309 [0 ... PCI_EXT_CAP_ID_MAX] = { .readfn = vfio_direct_config_read }
296}; 310};
311/*
312 * Default unassigned regions to raw read-write access. Some devices
313 * require this to function as they hide registers between the gaps in
314 * config space (be2net). Like MMIO and I/O port registers, we have
315 * to trust the hardware isolation.
316 */
317static struct perm_bits unassigned_perms = {
318 .readfn = vfio_raw_config_read,
319 .writefn = vfio_raw_config_write
320};
297 321
298static void free_perm_bits(struct perm_bits *perm) 322static void free_perm_bits(struct perm_bits *perm)
299{ 323{
@@ -779,16 +803,16 @@ int __init vfio_pci_init_perm_bits(void)
779 803
780 /* Capabilities */ 804 /* Capabilities */
781 ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]); 805 ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]);
782 cap_perms[PCI_CAP_ID_VPD].writefn = vfio_direct_config_write; 806 cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write;
783 ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]); 807 ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]);
784 cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_direct_config_write; 808 cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write;
785 ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]); 809 ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]);
786 ret |= init_pci_cap_af_perm(&cap_perms[PCI_CAP_ID_AF]); 810 ret |= init_pci_cap_af_perm(&cap_perms[PCI_CAP_ID_AF]);
787 811
788 /* Extended capabilities */ 812 /* Extended capabilities */
789 ret |= init_pci_ext_cap_err_perm(&ecap_perms[PCI_EXT_CAP_ID_ERR]); 813 ret |= init_pci_ext_cap_err_perm(&ecap_perms[PCI_EXT_CAP_ID_ERR]);
790 ret |= init_pci_ext_cap_pwr_perm(&ecap_perms[PCI_EXT_CAP_ID_PWR]); 814 ret |= init_pci_ext_cap_pwr_perm(&ecap_perms[PCI_EXT_CAP_ID_PWR]);
791 ecap_perms[PCI_EXT_CAP_ID_VNDR].writefn = vfio_direct_config_write; 815 ecap_perms[PCI_EXT_CAP_ID_VNDR].writefn = vfio_raw_config_write;
792 816
793 if (ret) 817 if (ret)
794 vfio_pci_uninit_perm_bits(); 818 vfio_pci_uninit_perm_bits();
@@ -801,9 +825,6 @@ static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos)
801 u8 cap; 825 u8 cap;
802 int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE : 826 int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE :
803 PCI_STD_HEADER_SIZEOF; 827 PCI_STD_HEADER_SIZEOF;
804 base /= 4;
805 pos /= 4;
806
807 cap = vdev->pci_config_map[pos]; 828 cap = vdev->pci_config_map[pos];
808 829
809 if (cap == PCI_CAP_ID_BASIC) 830 if (cap == PCI_CAP_ID_BASIC)
@@ -813,7 +834,7 @@ static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos)
813 while (pos - 1 >= base && vdev->pci_config_map[pos - 1] == cap) 834 while (pos - 1 >= base && vdev->pci_config_map[pos - 1] == cap)
814 pos--; 835 pos--;
815 836
816 return pos * 4; 837 return pos;
817} 838}
818 839
819static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos, 840static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos,
@@ -1017,13 +1038,9 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
1017 return byte; 1038 return byte;
1018 case PCI_CAP_ID_EXP: 1039 case PCI_CAP_ID_EXP:
1019 /* length based on version */ 1040 /* length based on version */
1020 ret = pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &word);
1021 if (ret)
1022 return pcibios_err_to_errno(ret);
1023
1024 vdev->extended_caps = true; 1041 vdev->extended_caps = true;
1025 1042
1026 if ((word & PCI_EXP_FLAGS_VERS) == 1) 1043 if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1)
1027 return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; 1044 return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1;
1028 else 1045 else
1029 return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2; 1046 return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2;
@@ -1230,8 +1247,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
1230 } 1247 }
1231 1248
1232 /* Sanity check, do we overlap other capabilities? */ 1249 /* Sanity check, do we overlap other capabilities? */
1233 for (i = 0; i < len; i += 4) { 1250 for (i = 0; i < len; i++) {
1234 if (likely(map[(pos + i) / 4] == PCI_CAP_ID_INVALID)) 1251 if (likely(map[pos + i] == PCI_CAP_ID_INVALID))
1235 continue; 1252 continue;
1236 1253
1237 pr_warn("%s: %s pci config conflict @0x%x, was cap 0x%x now cap 0x%x\n", 1254 pr_warn("%s: %s pci config conflict @0x%x, was cap 0x%x now cap 0x%x\n",
@@ -1239,7 +1256,7 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
1239 pos + i, map[pos + i], cap); 1256 pos + i, map[pos + i], cap);
1240 } 1257 }
1241 1258
1242 memset(map + (pos / 4), cap, len / 4); 1259 memset(map + pos, cap, len);
1243 ret = vfio_fill_vconfig_bytes(vdev, pos, len); 1260 ret = vfio_fill_vconfig_bytes(vdev, pos, len);
1244 if (ret) 1261 if (ret)
1245 return ret; 1262 return ret;
@@ -1314,8 +1331,8 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
1314 hidden = true; 1331 hidden = true;
1315 } 1332 }
1316 1333
1317 for (i = 0; i < len; i += 4) { 1334 for (i = 0; i < len; i++) {
1318 if (likely(map[(epos + i) / 4] == PCI_CAP_ID_INVALID)) 1335 if (likely(map[epos + i] == PCI_CAP_ID_INVALID))
1319 continue; 1336 continue;
1320 1337
1321 pr_warn("%s: %s pci config conflict @0x%x, was ecap 0x%x now ecap 0x%x\n", 1338 pr_warn("%s: %s pci config conflict @0x%x, was ecap 0x%x now ecap 0x%x\n",
@@ -1330,7 +1347,7 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
1330 */ 1347 */
1331 BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); 1348 BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID);
1332 1349
1333 memset(map + (epos / 4), ecap, len / 4); 1350 memset(map + epos, ecap, len);
1334 ret = vfio_fill_vconfig_bytes(vdev, epos, len); 1351 ret = vfio_fill_vconfig_bytes(vdev, epos, len);
1335 if (ret) 1352 if (ret)
1336 return ret; 1353 return ret;
@@ -1377,10 +1394,12 @@ int vfio_config_init(struct vfio_pci_device *vdev)
1377 int ret; 1394 int ret;
1378 1395
1379 /* 1396 /*
1380 * Config space, caps and ecaps are all dword aligned, so we can 1397 * Config space, caps and ecaps are all dword aligned, so we could
1381 * use one byte per dword to record the type. 1398 * use one byte per dword to record the type. However, there are
1399 * no requiremenst on the length of a capability, so the gap between
1400 * capabilities needs byte granularity.
1382 */ 1401 */
1383 map = kmalloc(pdev->cfg_size / 4, GFP_KERNEL); 1402 map = kmalloc(pdev->cfg_size, GFP_KERNEL);
1384 if (!map) 1403 if (!map)
1385 return -ENOMEM; 1404 return -ENOMEM;
1386 1405
@@ -1393,9 +1412,9 @@ int vfio_config_init(struct vfio_pci_device *vdev)
1393 vdev->pci_config_map = map; 1412 vdev->pci_config_map = map;
1394 vdev->vconfig = vconfig; 1413 vdev->vconfig = vconfig;
1395 1414
1396 memset(map, PCI_CAP_ID_BASIC, PCI_STD_HEADER_SIZEOF / 4); 1415 memset(map, PCI_CAP_ID_BASIC, PCI_STD_HEADER_SIZEOF);
1397 memset(map + (PCI_STD_HEADER_SIZEOF / 4), PCI_CAP_ID_INVALID, 1416 memset(map + PCI_STD_HEADER_SIZEOF, PCI_CAP_ID_INVALID,
1398 (pdev->cfg_size - PCI_STD_HEADER_SIZEOF) / 4); 1417 pdev->cfg_size - PCI_STD_HEADER_SIZEOF);
1399 1418
1400 ret = vfio_fill_vconfig_bytes(vdev, 0, PCI_STD_HEADER_SIZEOF); 1419 ret = vfio_fill_vconfig_bytes(vdev, 0, PCI_STD_HEADER_SIZEOF);
1401 if (ret) 1420 if (ret)
@@ -1450,6 +1469,22 @@ void vfio_config_free(struct vfio_pci_device *vdev)
1450 vdev->msi_perm = NULL; 1469 vdev->msi_perm = NULL;
1451} 1470}
1452 1471
1472/*
1473 * Find the remaining number of bytes in a dword that match the given
1474 * position. Stop at either the end of the capability or the dword boundary.
1475 */
1476static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev,
1477 loff_t pos)
1478{
1479 u8 cap = vdev->pci_config_map[pos];
1480 size_t i;
1481
1482 for (i = 1; (pos + i) % 4 && vdev->pci_config_map[pos + i] == cap; i++)
1483 /* nop */;
1484
1485 return i;
1486}
1487
1453static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, 1488static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
1454 size_t count, loff_t *ppos, bool iswrite) 1489 size_t count, loff_t *ppos, bool iswrite)
1455{ 1490{
@@ -1458,55 +1493,48 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
1458 __le32 val = 0; 1493 __le32 val = 0;
1459 int cap_start = 0, offset; 1494 int cap_start = 0, offset;
1460 u8 cap_id; 1495 u8 cap_id;
1461 ssize_t ret = count; 1496 ssize_t ret;
1462 1497
1463 if (*ppos < 0 || *ppos + count > pdev->cfg_size) 1498 if (*ppos < 0 || *ppos >= pdev->cfg_size ||
1499 *ppos + count > pdev->cfg_size)
1464 return -EFAULT; 1500 return -EFAULT;
1465 1501
1466 /* 1502 /*
1467 * gcc can't seem to figure out we're a static function, only called 1503 * Chop accesses into aligned chunks containing no more than a
1468 * with count of 1/2/4 and hits copy_from_user_overflow without this. 1504 * single capability. Caller increments to the next chunk.
1469 */ 1505 */
1470 if (count > sizeof(val)) 1506 count = min(count, vfio_pci_cap_remaining_dword(vdev, *ppos));
1471 return -EINVAL; 1507 if (count >= 4 && !(*ppos % 4))
1472 1508 count = 4;
1473 cap_id = vdev->pci_config_map[*ppos / 4]; 1509 else if (count >= 2 && !(*ppos % 2))
1474 1510 count = 2;
1475 if (cap_id == PCI_CAP_ID_INVALID) { 1511 else
1476 if (iswrite) 1512 count = 1;
1477 return ret; /* drop */
1478
1479 /*
1480 * Per PCI spec 3.0, section 6.1, reads from reserved and
1481 * unimplemented registers return 0
1482 */
1483 if (copy_to_user(buf, &val, count))
1484 return -EFAULT;
1485
1486 return ret;
1487 }
1488 1513
1489 /* 1514 ret = count;
1490 * All capabilities are minimum 4 bytes and aligned on dword
1491 * boundaries. Since we don't support unaligned accesses, we're
1492 * only ever accessing a single capability.
1493 */
1494 if (*ppos >= PCI_CFG_SPACE_SIZE) {
1495 WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX);
1496 1515
1497 perm = &ecap_perms[cap_id]; 1516 cap_id = vdev->pci_config_map[*ppos];
1498 cap_start = vfio_find_cap_start(vdev, *ppos);
1499 1517
1518 if (cap_id == PCI_CAP_ID_INVALID) {
1519 perm = &unassigned_perms;
1520 cap_start = *ppos;
1500 } else { 1521 } else {
1501 WARN_ON(cap_id > PCI_CAP_ID_MAX); 1522 if (*ppos >= PCI_CFG_SPACE_SIZE) {
1523 WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX);
1502 1524
1503 perm = &cap_perms[cap_id]; 1525 perm = &ecap_perms[cap_id];
1526 cap_start = vfio_find_cap_start(vdev, *ppos);
1527 } else {
1528 WARN_ON(cap_id > PCI_CAP_ID_MAX);
1504 1529
1505 if (cap_id == PCI_CAP_ID_MSI) 1530 perm = &cap_perms[cap_id];
1506 perm = vdev->msi_perm;
1507 1531
1508 if (cap_id > PCI_CAP_ID_BASIC) 1532 if (cap_id == PCI_CAP_ID_MSI)
1509 cap_start = vfio_find_cap_start(vdev, *ppos); 1533 perm = vdev->msi_perm;
1534
1535 if (cap_id > PCI_CAP_ID_BASIC)
1536 cap_start = vfio_find_cap_start(vdev, *ppos);
1537 }
1510 } 1538 }
1511 1539
1512 WARN_ON(!cap_start && cap_id != PCI_CAP_ID_BASIC); 1540 WARN_ON(!cap_start && cap_id != PCI_CAP_ID_BASIC);
@@ -1546,20 +1574,8 @@ ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf,
1546 1574
1547 pos &= VFIO_PCI_OFFSET_MASK; 1575 pos &= VFIO_PCI_OFFSET_MASK;
1548 1576
1549 /*
1550 * We want to both keep the access size the caller users as well as
1551 * support reading large chunks of config space in a single call.
1552 * PCI doesn't support unaligned accesses, so we can safely break
1553 * those apart.
1554 */
1555 while (count) { 1577 while (count) {
1556 if (count >= 4 && !(pos % 4)) 1578 ret = vfio_config_do_rw(vdev, buf, count, &pos, iswrite);
1557 ret = vfio_config_do_rw(vdev, buf, 4, &pos, iswrite);
1558 else if (count >= 2 && !(pos % 2))
1559 ret = vfio_config_do_rw(vdev, buf, 2, &pos, iswrite);
1560 else
1561 ret = vfio_config_do_rw(vdev, buf, 1, &pos, iswrite);
1562
1563 if (ret < 0) 1579 if (ret < 0)
1564 return ret; 1580 return ret;
1565 1581
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index a96509187deb..4bc704e1b7c7 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -287,7 +287,8 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
287 * a signal is necessary, which can then be handled via a work queue 287 * a signal is necessary, which can then be handled via a work queue
288 * or directly depending on the caller. 288 * or directly depending on the caller.
289 */ 289 */
290int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, void *unused) 290static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
291 void *unused)
291{ 292{
292 struct pci_dev *pdev = vdev->pdev; 293 struct pci_dev *pdev = vdev->pdev;
293 unsigned long flags; 294 unsigned long flags;
@@ -746,6 +747,63 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
746 return 0; 747 return 0;
747} 748}
748 749
750static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
751 unsigned index, unsigned start,
752 unsigned count, uint32_t flags, void *data)
753{
754 int32_t fd = *(int32_t *)data;
755 struct pci_dev *pdev = vdev->pdev;
756
757 if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
758 !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
759 return -EINVAL;
760
761 /*
762 * device_lock synchronizes setting and checking of
763 * err_trigger. The vfio_pci_aer_err_detected() is also
764 * called with device_lock held.
765 */
766
767 /* DATA_NONE/DATA_BOOL enables loopback testing */
768
769 if (flags & VFIO_IRQ_SET_DATA_NONE) {
770 device_lock(&pdev->dev);
771 if (vdev->err_trigger)
772 eventfd_signal(vdev->err_trigger, 1);
773 device_unlock(&pdev->dev);
774 return 0;
775 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
776 uint8_t trigger = *(uint8_t *)data;
777 device_lock(&pdev->dev);
778 if (trigger && vdev->err_trigger)
779 eventfd_signal(vdev->err_trigger, 1);
780 device_unlock(&pdev->dev);
781 return 0;
782 }
783
784 /* Handle SET_DATA_EVENTFD */
785
786 if (fd == -1) {
787 device_lock(&pdev->dev);
788 if (vdev->err_trigger)
789 eventfd_ctx_put(vdev->err_trigger);
790 vdev->err_trigger = NULL;
791 device_unlock(&pdev->dev);
792 return 0;
793 } else if (fd >= 0) {
794 struct eventfd_ctx *efdctx;
795 efdctx = eventfd_ctx_fdget(fd);
796 if (IS_ERR(efdctx))
797 return PTR_ERR(efdctx);
798 device_lock(&pdev->dev);
799 if (vdev->err_trigger)
800 eventfd_ctx_put(vdev->err_trigger);
801 vdev->err_trigger = efdctx;
802 device_unlock(&pdev->dev);
803 return 0;
804 } else
805 return -EINVAL;
806}
749int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, 807int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
750 unsigned index, unsigned start, unsigned count, 808 unsigned index, unsigned start, unsigned count,
751 void *data) 809 void *data)
@@ -780,6 +838,13 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
780 break; 838 break;
781 } 839 }
782 break; 840 break;
841 case VFIO_PCI_ERR_IRQ_INDEX:
842 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
843 case VFIO_IRQ_SET_ACTION_TRIGGER:
844 if (pci_is_pcie(vdev->pdev))
845 func = vfio_pci_set_err_trigger;
846 break;
847 }
783 } 848 }
784 849
785 if (!func) 850 if (!func)
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index d7e55d03f49e..9c6d5d0f3b02 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -56,6 +56,7 @@ struct vfio_pci_device {
56 bool has_vga; 56 bool has_vga;
57 struct pci_saved_state *pci_saved_state; 57 struct pci_saved_state *pci_saved_state;
58 atomic_t refcnt; 58 atomic_t refcnt;
59 struct eventfd_ctx *err_trigger;
59}; 60};
60 61
61#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) 62#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)