diff options
Diffstat (limited to 'Documentation/vfio.txt')
-rw-r--r-- | Documentation/vfio.txt | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 8eda3635a17d..c55533c0adb3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt | |||
@@ -283,6 +283,69 @@ a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap | |||
283 | interfaces implement the device region access defined by the device's | 283 | interfaces implement the device region access defined by the device's |
284 | own VFIO_DEVICE_GET_REGION_INFO ioctl. | 284 | own VFIO_DEVICE_GET_REGION_INFO ioctl. |
285 | 285 | ||
286 | |||
287 | PPC64 sPAPR implementation note | ||
288 | ------------------------------------------------------------------------------- | ||
289 | |||
290 | This implementation has some specifics: | ||
291 | |||
292 | 1) Only one IOMMU group per container is supported as an IOMMU group | ||
293 | represents the minimal entity which isolation can be guaranteed for and | ||
294 | groups are allocated statically, one per a Partitionable Endpoint (PE) | ||
295 | (PE is often a PCI domain but not always). | ||
296 | |||
297 | 2) The hardware supports so called DMA windows - the PCI address range | ||
298 | within which DMA transfer is allowed, any attempt to access address space | ||
299 | out of the window leads to the whole PE isolation. | ||
300 | |||
301 | 3) PPC64 guests are paravirtualized but not fully emulated. There is an API | ||
302 | to map/unmap pages for DMA, and it normally maps 1..32 pages per call and | ||
303 | currently there is no way to reduce the number of calls. In order to make things | ||
304 | faster, the map/unmap handling has been implemented in real mode which provides | ||
305 | an excellent performance which has limitations such as inability to do | ||
306 | locked pages accounting in real time. | ||
307 | |||
308 | So 3 additional ioctls have been added: | ||
309 | |||
310 | VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start | ||
311 | of the DMA window on the PCI bus. | ||
312 | |||
313 | VFIO_IOMMU_ENABLE - enables the container. The locked pages accounting | ||
314 | is done at this point. This lets user first to know what | ||
315 | the DMA window is and adjust rlimit before doing any real job. | ||
316 | |||
317 | VFIO_IOMMU_DISABLE - disables the container. | ||
318 | |||
319 | |||
320 | The code flow from the example above should be slightly changed: | ||
321 | |||
322 | ..... | ||
323 | /* Add the group to the container */ | ||
324 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | ||
325 | |||
326 | /* Enable the IOMMU model we want */ | ||
327 | ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU) | ||
328 | |||
329 | /* Get addition sPAPR IOMMU info */ | ||
330 | vfio_iommu_spapr_tce_info spapr_iommu_info; | ||
331 | ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info); | ||
332 | |||
333 | if (ioctl(container, VFIO_IOMMU_ENABLE)) | ||
334 | /* Cannot enable container, may be low rlimit */ | ||
335 | |||
336 | /* Allocate some space and setup a DMA mapping */ | ||
337 | dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, | ||
338 | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); | ||
339 | |||
340 | dma_map.size = 1024 * 1024; | ||
341 | dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ | ||
342 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | ||
343 | |||
344 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ | ||
345 | |||
346 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | ||
347 | ..... | ||
348 | |||
286 | ------------------------------------------------------------------------------- | 349 | ------------------------------------------------------------------------------- |
287 | 350 | ||
288 | [1] VFIO was originally an acronym for "Virtual Function I/O" in its | 351 | [1] VFIO was originally an acronym for "Virtual Function I/O" in its |