diff options
Diffstat (limited to 'Documentation/vfio.txt')
| -rw-r--r-- | Documentation/vfio.txt | 87 |
1 files changed, 84 insertions, 3 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index b9ca02370d46..96978eced341 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt | |||
| @@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides | |||
| 305 | an excellent performance which has limitations such as inability to do | 305 | an excellent performance which has limitations such as inability to do |
| 306 | locked pages accounting in real time. | 306 | locked pages accounting in real time. |
| 307 | 307 | ||
| 308 | So 3 additional ioctls have been added: | 308 | 4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O |
| 309 | subtree that can be treated as a unit for the purposes of partitioning and | ||
| 310 | error recovery. A PE may be a single or multi-function IOA (IO Adapter), a | ||
| 311 | function of a multi-function IOA, or multiple IOAs (possibly including switch | ||
| 312 | and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors | ||
| 313 | and recover from them via EEH RTAS services, which works on the basis of | ||
| 314 | additional ioctl commands. | ||
| 315 | |||
| 316 | So 4 additional ioctls have been added: | ||
| 309 | 317 | ||
| 310 | VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start | 318 | VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start |
| 311 | of the DMA window on the PCI bus. | 319 | of the DMA window on the PCI bus. |
| @@ -316,9 +324,12 @@ So 3 additional ioctls have been added: | |||
| 316 | 324 | ||
| 317 | VFIO_IOMMU_DISABLE - disables the container. | 325 | VFIO_IOMMU_DISABLE - disables the container. |
| 318 | 326 | ||
| 327 | VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery. | ||
| 319 | 328 | ||
| 320 | The code flow from the example above should be slightly changed: | 329 | The code flow from the example above should be slightly changed: |
| 321 | 330 | ||
| 331 | struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 }; | ||
| 332 | |||
| 322 | ..... | 333 | ..... |
| 323 | /* Add the group to the container */ | 334 | /* Add the group to the container */ |
| 324 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | 335 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); |
| @@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed: | |||
| 342 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | 353 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; |
| 343 | 354 | ||
| 344 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ | 355 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ |
| 345 | |||
| 346 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | 356 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); |
| 347 | ..... | 357 | |
| 358 | /* Get a file descriptor for the device */ | ||
| 359 | device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); | ||
| 360 | |||
| 361 | .... | ||
| 362 | |||
| 363 | /* Gratuitous device reset and go... */ | ||
| 364 | ioctl(device, VFIO_DEVICE_RESET); | ||
| 365 | |||
| 366 | /* Make sure EEH is supported */ | ||
| 367 | ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH); | ||
| 368 | |||
| 369 | /* Enable the EEH functionality on the device */ | ||
| 370 | pe_op.op = VFIO_EEH_PE_ENABLE; | ||
| 371 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 372 | |||
| 373 | /* You're suggested to create additional data struct to represent | ||
| 374 | * PE, and put child devices belonging to same IOMMU group to the | ||
| 375 | * PE instance for later reference. | ||
| 376 | */ | ||
| 377 | |||
| 378 | /* Check the PE's state and make sure it's in functional state */ | ||
| 379 | pe_op.op = VFIO_EEH_PE_GET_STATE; | ||
| 380 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 381 | |||
| 382 | /* Save device state using pci_save_state(). | ||
| 383 | * EEH should be enabled on the specified device. | ||
| 384 | */ | ||
| 385 | |||
| 386 | .... | ||
| 387 | |||
| 388 | /* When 0xFF's returned from reading PCI config space or IO BARs | ||
| 389 | * of the PCI device. Check the PE's state to see if that has been | ||
| 390 | * frozen. | ||
| 391 | */ | ||
| 392 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 393 | |||
| 394 | /* Waiting for pending PCI transactions to be completed and don't | ||
| 395 | * produce any more PCI traffic from/to the affected PE until | ||
| 396 | * recovery is finished. | ||
| 397 | */ | ||
| 398 | |||
| 399 | /* Enable IO for the affected PE and collect logs. Usually, the | ||
| 400 | * standard part of PCI config space, AER registers are dumped | ||
| 401 | * as logs for further analysis. | ||
| 402 | */ | ||
| 403 | pe_op.op = VFIO_EEH_PE_UNFREEZE_IO; | ||
| 404 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Issue PE reset: hot or fundamental reset. Usually, hot reset | ||
| 408 | * is enough. However, the firmware of some PCI adapters would | ||
| 409 | * require fundamental reset. | ||
| 410 | */ | ||
| 411 | pe_op.op = VFIO_EEH_PE_RESET_HOT; | ||
| 412 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 413 | pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE; | ||
| 414 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 415 | |||
| 416 | /* Configure the PCI bridges for the affected PE */ | ||
| 417 | pe_op.op = VFIO_EEH_PE_CONFIGURE; | ||
| 418 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
| 419 | |||
| 420 | /* Restored state we saved at initialization time. pci_restore_state() | ||
| 421 | * is good enough as an example. | ||
| 422 | */ | ||
| 423 | |||
| 424 | /* Hopefully, error is recovered successfully. Now, you can resume to | ||
| 425 | * start PCI traffic to/from the affected PE. | ||
| 426 | */ | ||
| 427 | |||
| 428 | .... | ||
| 348 | 429 | ||
| 349 | ------------------------------------------------------------------------------- | 430 | ------------------------------------------------------------------------------- |
| 350 | 431 | ||
