diff options
| -rw-r--r-- | Documentation/vfio.txt | 6 | ||||
| -rw-r--r-- | drivers/vfio/vfio.c | 14 | ||||
| -rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 626 | ||||
| -rw-r--r-- | include/uapi/linux/vfio.h | 8 |
4 files changed, 424 insertions, 230 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index c55533c0adb3..d7993dcf8537 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt | |||
| @@ -172,12 +172,12 @@ group and can access them as follows: | |||
| 172 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; | 172 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; |
| 173 | 173 | ||
| 174 | /* Create a new container */ | 174 | /* Create a new container */ |
| 175 | container = open("/dev/vfio/vfio, O_RDWR); | 175 | container = open("/dev/vfio/vfio", O_RDWR); |
| 176 | 176 | ||
| 177 | if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) | 177 | if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) |
| 178 | /* Unknown API version */ | 178 | /* Unknown API version */ |
| 179 | 179 | ||
| 180 | if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_X86_IOMMU)) | 180 | if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) |
| 181 | /* Doesn't support the IOMMU driver we want. */ | 181 | /* Doesn't support the IOMMU driver we want. */ |
| 182 | 182 | ||
| 183 | /* Open the group */ | 183 | /* Open the group */ |
| @@ -193,7 +193,7 @@ group and can access them as follows: | |||
| 193 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | 193 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); |
| 194 | 194 | ||
| 195 | /* Enable the IOMMU model we want */ | 195 | /* Enable the IOMMU model we want */ |
| 196 | ioctl(container, VFIO_SET_IOMMU, VFIO_X86_IOMMU) | 196 | ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) |
| 197 | 197 | ||
| 198 | /* Get addition IOMMU info */ | 198 | /* Get addition IOMMU info */ |
| 199 | ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info); | 199 | ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info); |
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 259ad282ae5d..c488da5db7c7 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c | |||
| @@ -76,6 +76,7 @@ struct vfio_group { | |||
| 76 | struct notifier_block nb; | 76 | struct notifier_block nb; |
| 77 | struct list_head vfio_next; | 77 | struct list_head vfio_next; |
| 78 | struct list_head container_next; | 78 | struct list_head container_next; |
| 79 | atomic_t opened; | ||
| 79 | }; | 80 | }; |
| 80 | 81 | ||
| 81 | struct vfio_device { | 82 | struct vfio_device { |
| @@ -206,6 +207,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) | |||
| 206 | INIT_LIST_HEAD(&group->device_list); | 207 | INIT_LIST_HEAD(&group->device_list); |
| 207 | mutex_init(&group->device_lock); | 208 | mutex_init(&group->device_lock); |
| 208 | atomic_set(&group->container_users, 0); | 209 | atomic_set(&group->container_users, 0); |
| 210 | atomic_set(&group->opened, 0); | ||
| 209 | group->iommu_group = iommu_group; | 211 | group->iommu_group = iommu_group; |
| 210 | 212 | ||
| 211 | group->nb.notifier_call = vfio_iommu_group_notifier; | 213 | group->nb.notifier_call = vfio_iommu_group_notifier; |
| @@ -1236,12 +1238,22 @@ static long vfio_group_fops_compat_ioctl(struct file *filep, | |||
| 1236 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) | 1238 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) |
| 1237 | { | 1239 | { |
| 1238 | struct vfio_group *group; | 1240 | struct vfio_group *group; |
| 1241 | int opened; | ||
| 1239 | 1242 | ||
| 1240 | group = vfio_group_get_from_minor(iminor(inode)); | 1243 | group = vfio_group_get_from_minor(iminor(inode)); |
| 1241 | if (!group) | 1244 | if (!group) |
| 1242 | return -ENODEV; | 1245 | return -ENODEV; |
| 1243 | 1246 | ||
| 1247 | /* Do we need multiple instances of the group open? Seems not. */ | ||
| 1248 | opened = atomic_cmpxchg(&group->opened, 0, 1); | ||
| 1249 | if (opened) { | ||
| 1250 | vfio_group_put(group); | ||
| 1251 | return -EBUSY; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | /* Is something still in use from a previous open? */ | ||
| 1244 | if (group->container) { | 1255 | if (group->container) { |
| 1256 | atomic_dec(&group->opened); | ||
| 1245 | vfio_group_put(group); | 1257 | vfio_group_put(group); |
| 1246 | return -EBUSY; | 1258 | return -EBUSY; |
| 1247 | } | 1259 | } |
| @@ -1259,6 +1271,8 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) | |||
| 1259 | 1271 | ||
| 1260 | vfio_group_try_dissolve_container(group); | 1272 | vfio_group_try_dissolve_container(group); |
| 1261 | 1273 | ||
| 1274 | atomic_dec(&group->opened); | ||
| 1275 | |||
| 1262 | vfio_group_put(group); | 1276 | vfio_group_put(group); |
| 1263 | 1277 | ||
| 1264 | return 0; | 1278 | return 0; |
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 6f3fbc48a6c7..a9807dea3887 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
| 32 | #include <linux/mm.h> | 32 | #include <linux/mm.h> |
| 33 | #include <linux/pci.h> /* pci_bus_type */ | 33 | #include <linux/pci.h> /* pci_bus_type */ |
| 34 | #include <linux/rbtree.h> | ||
| 34 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
| 35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
| 36 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
| @@ -47,19 +48,25 @@ module_param_named(allow_unsafe_interrupts, | |||
| 47 | MODULE_PARM_DESC(allow_unsafe_interrupts, | 48 | MODULE_PARM_DESC(allow_unsafe_interrupts, |
| 48 | "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); | 49 | "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); |
| 49 | 50 | ||
| 51 | static bool disable_hugepages; | ||
| 52 | module_param_named(disable_hugepages, | ||
| 53 | disable_hugepages, bool, S_IRUGO | S_IWUSR); | ||
| 54 | MODULE_PARM_DESC(disable_hugepages, | ||
| 55 | "Disable VFIO IOMMU support for IOMMU hugepages."); | ||
| 56 | |||
| 50 | struct vfio_iommu { | 57 | struct vfio_iommu { |
| 51 | struct iommu_domain *domain; | 58 | struct iommu_domain *domain; |
| 52 | struct mutex lock; | 59 | struct mutex lock; |
| 53 | struct list_head dma_list; | 60 | struct rb_root dma_list; |
| 54 | struct list_head group_list; | 61 | struct list_head group_list; |
| 55 | bool cache; | 62 | bool cache; |
| 56 | }; | 63 | }; |
| 57 | 64 | ||
| 58 | struct vfio_dma { | 65 | struct vfio_dma { |
| 59 | struct list_head next; | 66 | struct rb_node node; |
| 60 | dma_addr_t iova; /* Device address */ | 67 | dma_addr_t iova; /* Device address */ |
| 61 | unsigned long vaddr; /* Process virtual addr */ | 68 | unsigned long vaddr; /* Process virtual addr */ |
| 62 | long npage; /* Number of pages */ | 69 | size_t size; /* Map size (bytes) */ |
| 63 | int prot; /* IOMMU_READ/WRITE */ | 70 | int prot; /* IOMMU_READ/WRITE */ |
| 64 | }; | 71 | }; |
| 65 | 72 | ||
| @@ -73,7 +80,48 @@ struct vfio_group { | |||
| 73 | * into DMA'ble space using the IOMMU | 80 | * into DMA'ble space using the IOMMU |
| 74 | */ | 81 | */ |
| 75 | 82 | ||
| 76 | #define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT) | 83 | static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, |
| 84 | dma_addr_t start, size_t size) | ||
| 85 | { | ||
| 86 | struct rb_node *node = iommu->dma_list.rb_node; | ||
| 87 | |||
| 88 | while (node) { | ||
| 89 | struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); | ||
| 90 | |||
| 91 | if (start + size <= dma->iova) | ||
| 92 | node = node->rb_left; | ||
| 93 | else if (start >= dma->iova + dma->size) | ||
| 94 | node = node->rb_right; | ||
| 95 | else | ||
| 96 | return dma; | ||
| 97 | } | ||
| 98 | |||
| 99 | return NULL; | ||
| 100 | } | ||
| 101 | |||
| 102 | static void vfio_insert_dma(struct vfio_iommu *iommu, struct vfio_dma *new) | ||
| 103 | { | ||
| 104 | struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; | ||
| 105 | struct vfio_dma *dma; | ||
| 106 | |||
| 107 | while (*link) { | ||
| 108 | parent = *link; | ||
| 109 | dma = rb_entry(parent, struct vfio_dma, node); | ||
| 110 | |||
| 111 | if (new->iova + new->size <= dma->iova) | ||
| 112 | link = &(*link)->rb_left; | ||
| 113 | else | ||
| 114 | link = &(*link)->rb_right; | ||
| 115 | } | ||
| 116 | |||
| 117 | rb_link_node(&new->node, parent, link); | ||
| 118 | rb_insert_color(&new->node, &iommu->dma_list); | ||
| 119 | } | ||
| 120 | |||
| 121 | static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old) | ||
| 122 | { | ||
| 123 | rb_erase(&old->node, &iommu->dma_list); | ||
| 124 | } | ||
| 77 | 125 | ||
| 78 | struct vwork { | 126 | struct vwork { |
| 79 | struct mm_struct *mm; | 127 | struct mm_struct *mm; |
| @@ -100,8 +148,8 @@ static void vfio_lock_acct(long npage) | |||
| 100 | struct vwork *vwork; | 148 | struct vwork *vwork; |
| 101 | struct mm_struct *mm; | 149 | struct mm_struct *mm; |
| 102 | 150 | ||
| 103 | if (!current->mm) | 151 | if (!current->mm || !npage) |
| 104 | return; /* process exited */ | 152 | return; /* process exited or nothing to do */ |
| 105 | 153 | ||
| 106 | if (down_write_trylock(¤t->mm->mmap_sem)) { | 154 | if (down_write_trylock(¤t->mm->mmap_sem)) { |
| 107 | current->mm->locked_vm += npage; | 155 | current->mm->locked_vm += npage; |
| @@ -173,33 +221,6 @@ static int put_pfn(unsigned long pfn, int prot) | |||
| 173 | return 0; | 221 | return 0; |
| 174 | } | 222 | } |
| 175 | 223 | ||
| 176 | /* Unmap DMA region */ | ||
| 177 | static long __vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova, | ||
| 178 | long npage, int prot) | ||
| 179 | { | ||
| 180 | long i, unlocked = 0; | ||
| 181 | |||
| 182 | for (i = 0; i < npage; i++, iova += PAGE_SIZE) { | ||
| 183 | unsigned long pfn; | ||
| 184 | |||
| 185 | pfn = iommu_iova_to_phys(iommu->domain, iova) >> PAGE_SHIFT; | ||
| 186 | if (pfn) { | ||
| 187 | iommu_unmap(iommu->domain, iova, PAGE_SIZE); | ||
| 188 | unlocked += put_pfn(pfn, prot); | ||
| 189 | } | ||
| 190 | } | ||
| 191 | return unlocked; | ||
| 192 | } | ||
| 193 | |||
| 194 | static void vfio_dma_unmap(struct vfio_iommu *iommu, dma_addr_t iova, | ||
| 195 | long npage, int prot) | ||
| 196 | { | ||
| 197 | long unlocked; | ||
| 198 | |||
| 199 | unlocked = __vfio_dma_do_unmap(iommu, iova, npage, prot); | ||
| 200 | vfio_lock_acct(-unlocked); | ||
| 201 | } | ||
| 202 | |||
| 203 | static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) | 224 | static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) |
| 204 | { | 225 | { |
| 205 | struct page *page[1]; | 226 | struct page *page[1]; |
| @@ -226,198 +247,306 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) | |||
| 226 | return ret; | 247 | return ret; |
| 227 | } | 248 | } |
| 228 | 249 | ||
| 229 | /* Map DMA region */ | 250 | /* |
| 230 | static int __vfio_dma_map(struct vfio_iommu *iommu, dma_addr_t iova, | 251 | * Attempt to pin pages. We really don't want to track all the pfns and |
| 231 | unsigned long vaddr, long npage, int prot) | 252 | * the iommu can only map chunks of consecutive pfns anyway, so get the |
| 253 | * first page and all consecutive pages with the same locking. | ||
| 254 | */ | ||
| 255 | static long vfio_pin_pages(unsigned long vaddr, long npage, | ||
| 256 | int prot, unsigned long *pfn_base) | ||
| 232 | { | 257 | { |
| 233 | dma_addr_t start = iova; | 258 | unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
| 234 | long i, locked = 0; | 259 | bool lock_cap = capable(CAP_IPC_LOCK); |
| 235 | int ret; | 260 | long ret, i; |
| 236 | 261 | ||
| 237 | /* Verify that pages are not already mapped */ | 262 | if (!current->mm) |
| 238 | for (i = 0; i < npage; i++, iova += PAGE_SIZE) | 263 | return -ENODEV; |
| 239 | if (iommu_iova_to_phys(iommu->domain, iova)) | ||
| 240 | return -EBUSY; | ||
| 241 | 264 | ||
| 242 | iova = start; | 265 | ret = vaddr_get_pfn(vaddr, prot, pfn_base); |
| 266 | if (ret) | ||
| 267 | return ret; | ||
| 243 | 268 | ||
| 244 | if (iommu->cache) | 269 | if (is_invalid_reserved_pfn(*pfn_base)) |
| 245 | prot |= IOMMU_CACHE; | 270 | return 1; |
| 246 | 271 | ||
| 247 | /* | 272 | if (!lock_cap && current->mm->locked_vm + 1 > limit) { |
| 248 | * XXX We break mappings into pages and use get_user_pages_fast to | 273 | put_pfn(*pfn_base, prot); |
| 249 | * pin the pages in memory. It's been suggested that mlock might | 274 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, |
| 250 | * provide a more efficient mechanism, but nothing prevents the | 275 | limit << PAGE_SHIFT); |
| 251 | * user from munlocking the pages, which could then allow the user | 276 | return -ENOMEM; |
| 252 | * access to random host memory. We also have no guarantee from the | 277 | } |
| 253 | * IOMMU API that the iommu driver can unmap sub-pages of previous | 278 | |
| 254 | * mappings. This means we might lose an entire range if a single | 279 | if (unlikely(disable_hugepages)) { |
| 255 | * page within it is unmapped. Single page mappings are inefficient, | 280 | vfio_lock_acct(1); |
| 256 | * but provide the most flexibility for now. | 281 | return 1; |
| 257 | */ | 282 | } |
| 258 | for (i = 0; i < npage; i++, iova += PAGE_SIZE, vaddr += PAGE_SIZE) { | 283 | |
| 284 | /* Lock all the consecutive pages from pfn_base */ | ||
| 285 | for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { | ||
| 259 | unsigned long pfn = 0; | 286 | unsigned long pfn = 0; |
| 260 | 287 | ||
| 261 | ret = vaddr_get_pfn(vaddr, prot, &pfn); | 288 | ret = vaddr_get_pfn(vaddr, prot, &pfn); |
| 262 | if (ret) { | 289 | if (ret) |
| 263 | __vfio_dma_do_unmap(iommu, start, i, prot); | 290 | break; |
| 264 | return ret; | ||
| 265 | } | ||
| 266 | 291 | ||
| 267 | /* | 292 | if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { |
| 268 | * Only add actual locked pages to accounting | 293 | put_pfn(pfn, prot); |
| 269 | * XXX We're effectively marking a page locked for every | 294 | break; |
| 270 | * IOVA page even though it's possible the user could be | 295 | } |
| 271 | * backing multiple IOVAs with the same vaddr. This over- | ||
| 272 | * penalizes the user process, but we currently have no | ||
| 273 | * easy way to do this properly. | ||
| 274 | */ | ||
| 275 | if (!is_invalid_reserved_pfn(pfn)) | ||
| 276 | locked++; | ||
| 277 | 296 | ||
| 278 | ret = iommu_map(iommu->domain, iova, | 297 | if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { |
| 279 | (phys_addr_t)pfn << PAGE_SHIFT, | ||
| 280 | PAGE_SIZE, prot); | ||
| 281 | if (ret) { | ||
| 282 | /* Back out mappings on error */ | ||
| 283 | put_pfn(pfn, prot); | 298 | put_pfn(pfn, prot); |
| 284 | __vfio_dma_do_unmap(iommu, start, i, prot); | 299 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", |
| 285 | return ret; | 300 | __func__, limit << PAGE_SHIFT); |
| 301 | break; | ||
| 286 | } | 302 | } |
| 287 | } | 303 | } |
| 288 | vfio_lock_acct(locked); | 304 | |
| 289 | return 0; | 305 | vfio_lock_acct(i); |
| 306 | |||
| 307 | return i; | ||
| 290 | } | 308 | } |
| 291 | 309 | ||
| 292 | static inline bool ranges_overlap(dma_addr_t start1, size_t size1, | 310 | static long vfio_unpin_pages(unsigned long pfn, long npage, |
| 293 | dma_addr_t start2, size_t size2) | 311 | int prot, bool do_accounting) |
| 294 | { | 312 | { |
| 295 | if (start1 < start2) | 313 | unsigned long unlocked = 0; |
| 296 | return (start2 - start1 < size1); | 314 | long i; |
| 297 | else if (start2 < start1) | 315 | |
| 298 | return (start1 - start2 < size2); | 316 | for (i = 0; i < npage; i++) |
| 299 | return (size1 > 0 && size2 > 0); | 317 | unlocked += put_pfn(pfn++, prot); |
| 318 | |||
| 319 | if (do_accounting) | ||
| 320 | vfio_lock_acct(-unlocked); | ||
| 321 | |||
| 322 | return unlocked; | ||
| 300 | } | 323 | } |
| 301 | 324 | ||
| 302 | static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, | 325 | static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, |
| 303 | dma_addr_t start, size_t size) | 326 | dma_addr_t iova, size_t *size) |
| 304 | { | 327 | { |
| 305 | struct vfio_dma *dma; | 328 | dma_addr_t start = iova, end = iova + *size; |
| 329 | long unlocked = 0; | ||
| 306 | 330 | ||
| 307 | list_for_each_entry(dma, &iommu->dma_list, next) { | 331 | while (iova < end) { |
| 308 | if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), | 332 | size_t unmapped; |
| 309 | start, size)) | 333 | phys_addr_t phys; |
| 310 | return dma; | 334 | |
| 335 | /* | ||
| 336 | * We use the IOMMU to track the physical address. This | ||
| 337 | * saves us from having a lot more entries in our mapping | ||
| 338 | * tree. The downside is that we don't track the size | ||
| 339 | * used to do the mapping. We request unmap of a single | ||
| 340 | * page, but expect IOMMUs that support large pages to | ||
| 341 | * unmap a larger chunk. | ||
| 342 | */ | ||
| 343 | phys = iommu_iova_to_phys(iommu->domain, iova); | ||
| 344 | if (WARN_ON(!phys)) { | ||
| 345 | iova += PAGE_SIZE; | ||
| 346 | continue; | ||
| 347 | } | ||
| 348 | |||
| 349 | unmapped = iommu_unmap(iommu->domain, iova, PAGE_SIZE); | ||
| 350 | if (!unmapped) | ||
| 351 | break; | ||
| 352 | |||
| 353 | unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT, | ||
| 354 | unmapped >> PAGE_SHIFT, | ||
| 355 | dma->prot, false); | ||
| 356 | iova += unmapped; | ||
| 311 | } | 357 | } |
| 312 | return NULL; | 358 | |
| 359 | vfio_lock_acct(-unlocked); | ||
| 360 | |||
| 361 | *size = iova - start; | ||
| 362 | |||
| 363 | return 0; | ||
| 313 | } | 364 | } |
| 314 | 365 | ||
| 315 | static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start, | 366 | static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start, |
| 316 | size_t size, struct vfio_dma *dma) | 367 | size_t *size, struct vfio_dma *dma) |
| 317 | { | 368 | { |
| 369 | size_t offset, overlap, tmp; | ||
| 318 | struct vfio_dma *split; | 370 | struct vfio_dma *split; |
| 319 | long npage_lo, npage_hi; | 371 | int ret; |
| 320 | 372 | ||
| 321 | /* Existing dma region is completely covered, unmap all */ | 373 | if (!*size) |
| 322 | if (start <= dma->iova && | 374 | return 0; |
| 323 | start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { | 375 | |
| 324 | vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); | 376 | /* |
| 325 | list_del(&dma->next); | 377 | * Existing dma region is completely covered, unmap all. This is |
| 326 | npage_lo = dma->npage; | 378 | * the likely case since userspace tends to map and unmap buffers |
| 379 | * in one shot rather than multiple mappings within a buffer. | ||
| 380 | */ | ||
| 381 | if (likely(start <= dma->iova && | ||
| 382 | start + *size >= dma->iova + dma->size)) { | ||
| 383 | *size = dma->size; | ||
| 384 | ret = vfio_unmap_unpin(iommu, dma, dma->iova, size); | ||
| 385 | if (ret) | ||
| 386 | return ret; | ||
| 387 | |||
| 388 | /* | ||
| 389 | * Did we remove more than we have? Should never happen | ||
| 390 | * since a vfio_dma is contiguous in iova and vaddr. | ||
| 391 | */ | ||
| 392 | WARN_ON(*size != dma->size); | ||
| 393 | |||
| 394 | vfio_remove_dma(iommu, dma); | ||
| 327 | kfree(dma); | 395 | kfree(dma); |
| 328 | return npage_lo; | 396 | return 0; |
| 329 | } | 397 | } |
| 330 | 398 | ||
| 331 | /* Overlap low address of existing range */ | 399 | /* Overlap low address of existing range */ |
| 332 | if (start <= dma->iova) { | 400 | if (start <= dma->iova) { |
| 333 | size_t overlap; | 401 | overlap = start + *size - dma->iova; |
| 402 | ret = vfio_unmap_unpin(iommu, dma, dma->iova, &overlap); | ||
| 403 | if (ret) | ||
| 404 | return ret; | ||
| 334 | 405 | ||
| 335 | overlap = start + size - dma->iova; | 406 | vfio_remove_dma(iommu, dma); |
| 336 | npage_lo = overlap >> PAGE_SHIFT; | ||
| 337 | 407 | ||
| 338 | vfio_dma_unmap(iommu, dma->iova, npage_lo, dma->prot); | 408 | /* |
| 339 | dma->iova += overlap; | 409 | * Check, we may have removed to whole vfio_dma. If not |
| 340 | dma->vaddr += overlap; | 410 | * fixup and re-insert. |
| 341 | dma->npage -= npage_lo; | 411 | */ |
| 342 | return npage_lo; | 412 | if (overlap < dma->size) { |
| 413 | dma->iova += overlap; | ||
| 414 | dma->vaddr += overlap; | ||
| 415 | dma->size -= overlap; | ||
| 416 | vfio_insert_dma(iommu, dma); | ||
| 417 | } else | ||
| 418 | kfree(dma); | ||
| 419 | |||
| 420 | *size = overlap; | ||
| 421 | return 0; | ||
| 343 | } | 422 | } |
| 344 | 423 | ||
| 345 | /* Overlap high address of existing range */ | 424 | /* Overlap high address of existing range */ |
| 346 | if (start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { | 425 | if (start + *size >= dma->iova + dma->size) { |
| 347 | size_t overlap; | 426 | offset = start - dma->iova; |
| 427 | overlap = dma->size - offset; | ||
| 348 | 428 | ||
| 349 | overlap = dma->iova + NPAGE_TO_SIZE(dma->npage) - start; | 429 | ret = vfio_unmap_unpin(iommu, dma, start, &overlap); |
| 350 | npage_hi = overlap >> PAGE_SHIFT; | 430 | if (ret) |
| 431 | return ret; | ||
| 351 | 432 | ||
| 352 | vfio_dma_unmap(iommu, start, npage_hi, dma->prot); | 433 | dma->size -= overlap; |
| 353 | dma->npage -= npage_hi; | 434 | *size = overlap; |
| 354 | return npage_hi; | 435 | return 0; |
| 355 | } | 436 | } |
| 356 | 437 | ||
| 357 | /* Split existing */ | 438 | /* Split existing */ |
| 358 | npage_lo = (start - dma->iova) >> PAGE_SHIFT; | ||
| 359 | npage_hi = dma->npage - (size >> PAGE_SHIFT) - npage_lo; | ||
| 360 | 439 | ||
| 361 | split = kzalloc(sizeof *split, GFP_KERNEL); | 440 | /* |
| 441 | * Allocate our tracking structure early even though it may not | ||
| 442 | * be used. An Allocation failure later loses track of pages and | ||
| 443 | * is more difficult to unwind. | ||
| 444 | */ | ||
| 445 | split = kzalloc(sizeof(*split), GFP_KERNEL); | ||
| 362 | if (!split) | 446 | if (!split) |
| 363 | return -ENOMEM; | 447 | return -ENOMEM; |
| 364 | 448 | ||
| 365 | vfio_dma_unmap(iommu, start, size >> PAGE_SHIFT, dma->prot); | 449 | offset = start - dma->iova; |
| 450 | |||
| 451 | ret = vfio_unmap_unpin(iommu, dma, start, size); | ||
| 452 | if (ret || !*size) { | ||
| 453 | kfree(split); | ||
| 454 | return ret; | ||
| 455 | } | ||
| 456 | |||
| 457 | tmp = dma->size; | ||
| 366 | 458 | ||
| 367 | dma->npage = npage_lo; | 459 | /* Resize the lower vfio_dma in place, before the below insert */ |
| 460 | dma->size = offset; | ||
| 368 | 461 | ||
| 369 | split->npage = npage_hi; | 462 | /* Insert new for remainder, assuming it didn't all get unmapped */ |
| 370 | split->iova = start + size; | 463 | if (likely(offset + *size < tmp)) { |
| 371 | split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size; | 464 | split->size = tmp - offset - *size; |
| 372 | split->prot = dma->prot; | 465 | split->iova = dma->iova + offset + *size; |
| 373 | list_add(&split->next, &iommu->dma_list); | 466 | split->vaddr = dma->vaddr + offset + *size; |
| 374 | return size >> PAGE_SHIFT; | 467 | split->prot = dma->prot; |
| 468 | vfio_insert_dma(iommu, split); | ||
| 469 | } else | ||
| 470 | kfree(split); | ||
| 471 | |||
| 472 | return 0; | ||
| 375 | } | 473 | } |
| 376 | 474 | ||
| 377 | static int vfio_dma_do_unmap(struct vfio_iommu *iommu, | 475 | static int vfio_dma_do_unmap(struct vfio_iommu *iommu, |
| 378 | struct vfio_iommu_type1_dma_unmap *unmap) | 476 | struct vfio_iommu_type1_dma_unmap *unmap) |
| 379 | { | 477 | { |
| 380 | long ret = 0, npage = unmap->size >> PAGE_SHIFT; | ||
| 381 | struct vfio_dma *dma, *tmp; | ||
| 382 | uint64_t mask; | 478 | uint64_t mask; |
| 479 | struct vfio_dma *dma; | ||
| 480 | size_t unmapped = 0, size; | ||
| 481 | int ret = 0; | ||
| 383 | 482 | ||
| 384 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; | 483 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; |
| 385 | 484 | ||
| 386 | if (unmap->iova & mask) | 485 | if (unmap->iova & mask) |
| 387 | return -EINVAL; | 486 | return -EINVAL; |
| 388 | if (unmap->size & mask) | 487 | if (!unmap->size || unmap->size & mask) |
| 389 | return -EINVAL; | 488 | return -EINVAL; |
| 390 | 489 | ||
| 391 | /* XXX We still break these down into PAGE_SIZE */ | ||
| 392 | WARN_ON(mask & PAGE_MASK); | 490 | WARN_ON(mask & PAGE_MASK); |
| 393 | 491 | ||
| 394 | mutex_lock(&iommu->lock); | 492 | mutex_lock(&iommu->lock); |
| 395 | 493 | ||
| 396 | list_for_each_entry_safe(dma, tmp, &iommu->dma_list, next) { | 494 | while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { |
| 397 | if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), | 495 | size = unmap->size; |
| 398 | unmap->iova, unmap->size)) { | 496 | ret = vfio_remove_dma_overlap(iommu, unmap->iova, &size, dma); |
| 399 | ret = vfio_remove_dma_overlap(iommu, unmap->iova, | 497 | if (ret || !size) |
| 400 | unmap->size, dma); | 498 | break; |
| 401 | if (ret > 0) | 499 | unmapped += size; |
| 402 | npage -= ret; | ||
| 403 | if (ret < 0 || npage == 0) | ||
| 404 | break; | ||
| 405 | } | ||
| 406 | } | 500 | } |
| 501 | |||
| 407 | mutex_unlock(&iommu->lock); | 502 | mutex_unlock(&iommu->lock); |
| 408 | return ret > 0 ? 0 : (int)ret; | 503 | |
| 504 | /* | ||
| 505 | * We may unmap more than requested, update the unmap struct so | ||
| 506 | * userspace can know. | ||
| 507 | */ | ||
| 508 | unmap->size = unmapped; | ||
| 509 | |||
| 510 | return ret; | ||
| 511 | } | ||
| 512 | |||
| 513 | /* | ||
| 514 | * Turns out AMD IOMMU has a page table bug where it won't map large pages | ||
| 515 | * to a region that previously mapped smaller pages. This should be fixed | ||
| 516 | * soon, so this is just a temporary workaround to break mappings down into | ||
| 517 | * PAGE_SIZE. Better to map smaller pages than nothing. | ||
| 518 | */ | ||
| 519 | static int map_try_harder(struct vfio_iommu *iommu, dma_addr_t iova, | ||
| 520 | unsigned long pfn, long npage, int prot) | ||
| 521 | { | ||
| 522 | long i; | ||
| 523 | int ret; | ||
| 524 | |||
| 525 | for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) { | ||
| 526 | ret = iommu_map(iommu->domain, iova, | ||
| 527 | (phys_addr_t)pfn << PAGE_SHIFT, | ||
| 528 | PAGE_SIZE, prot); | ||
| 529 | if (ret) | ||
| 530 | break; | ||
| 531 | } | ||
| 532 | |||
| 533 | for (; i < npage && i > 0; i--, iova -= PAGE_SIZE) | ||
| 534 | iommu_unmap(iommu->domain, iova, PAGE_SIZE); | ||
| 535 | |||
| 536 | return ret; | ||
| 409 | } | 537 | } |
| 410 | 538 | ||
| 411 | static int vfio_dma_do_map(struct vfio_iommu *iommu, | 539 | static int vfio_dma_do_map(struct vfio_iommu *iommu, |
| 412 | struct vfio_iommu_type1_dma_map *map) | 540 | struct vfio_iommu_type1_dma_map *map) |
| 413 | { | 541 | { |
| 414 | struct vfio_dma *dma, *pdma = NULL; | 542 | dma_addr_t end, iova; |
| 415 | dma_addr_t iova = map->iova; | 543 | unsigned long vaddr = map->vaddr; |
| 416 | unsigned long locked, lock_limit, vaddr = map->vaddr; | ||
| 417 | size_t size = map->size; | 544 | size_t size = map->size; |
| 545 | long npage; | ||
| 418 | int ret = 0, prot = 0; | 546 | int ret = 0, prot = 0; |
| 419 | uint64_t mask; | 547 | uint64_t mask; |
| 420 | long npage; | 548 | |
| 549 | end = map->iova + map->size; | ||
| 421 | 550 | ||
| 422 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; | 551 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; |
| 423 | 552 | ||
| @@ -430,104 +559,144 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, | |||
| 430 | if (!prot) | 559 | if (!prot) |
| 431 | return -EINVAL; /* No READ/WRITE? */ | 560 | return -EINVAL; /* No READ/WRITE? */ |
| 432 | 561 | ||
| 562 | if (iommu->cache) | ||
| 563 | prot |= IOMMU_CACHE; | ||
| 564 | |||
| 433 | if (vaddr & mask) | 565 | if (vaddr & mask) |
| 434 | return -EINVAL; | 566 | return -EINVAL; |
| 435 | if (iova & mask) | 567 | if (map->iova & mask) |
| 436 | return -EINVAL; | 568 | return -EINVAL; |
| 437 | if (size & mask) | 569 | if (!map->size || map->size & mask) |
| 438 | return -EINVAL; | 570 | return -EINVAL; |
| 439 | 571 | ||
| 440 | /* XXX We still break these down into PAGE_SIZE */ | ||
| 441 | WARN_ON(mask & PAGE_MASK); | 572 | WARN_ON(mask & PAGE_MASK); |
| 442 | 573 | ||
| 443 | /* Don't allow IOVA wrap */ | 574 | /* Don't allow IOVA wrap */ |
| 444 | if (iova + size && iova + size < iova) | 575 | if (end && end < map->iova) |
| 445 | return -EINVAL; | 576 | return -EINVAL; |
| 446 | 577 | ||
| 447 | /* Don't allow virtual address wrap */ | 578 | /* Don't allow virtual address wrap */ |
| 448 | if (vaddr + size && vaddr + size < vaddr) | 579 | if (vaddr + map->size && vaddr + map->size < vaddr) |
| 449 | return -EINVAL; | ||
| 450 | |||
| 451 | npage = size >> PAGE_SHIFT; | ||
| 452 | if (!npage) | ||
| 453 | return -EINVAL; | 580 | return -EINVAL; |
| 454 | 581 | ||
| 455 | mutex_lock(&iommu->lock); | 582 | mutex_lock(&iommu->lock); |
| 456 | 583 | ||
| 457 | if (vfio_find_dma(iommu, iova, size)) { | 584 | if (vfio_find_dma(iommu, map->iova, map->size)) { |
| 458 | ret = -EBUSY; | 585 | mutex_unlock(&iommu->lock); |
| 459 | goto out_lock; | 586 | return -EEXIST; |
| 460 | } | 587 | } |
| 461 | 588 | ||
| 462 | /* account for locked pages */ | 589 | for (iova = map->iova; iova < end; iova += size, vaddr += size) { |
| 463 | locked = current->mm->locked_vm + npage; | 590 | struct vfio_dma *dma = NULL; |
| 464 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | 591 | unsigned long pfn; |
| 465 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { | 592 | long i; |
| 466 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", | 593 | |
| 467 | __func__, rlimit(RLIMIT_MEMLOCK)); | 594 | /* Pin a contiguous chunk of memory */ |
| 468 | ret = -ENOMEM; | 595 | npage = vfio_pin_pages(vaddr, (end - iova) >> PAGE_SHIFT, |
| 469 | goto out_lock; | 596 | prot, &pfn); |
| 470 | } | 597 | if (npage <= 0) { |
| 598 | WARN_ON(!npage); | ||
| 599 | ret = (int)npage; | ||
| 600 | break; | ||
| 601 | } | ||
| 471 | 602 | ||
| 472 | ret = __vfio_dma_map(iommu, iova, vaddr, npage, prot); | 603 | /* Verify pages are not already mapped */ |
| 473 | if (ret) | 604 | for (i = 0; i < npage; i++) { |
| 474 | goto out_lock; | 605 | if (iommu_iova_to_phys(iommu->domain, |
| 606 | iova + (i << PAGE_SHIFT))) { | ||
| 607 | vfio_unpin_pages(pfn, npage, prot, true); | ||
| 608 | ret = -EBUSY; | ||
| 609 | break; | ||
| 610 | } | ||
| 611 | } | ||
| 475 | 612 | ||
| 476 | /* Check if we abut a region below - nothing below 0 */ | 613 | ret = iommu_map(iommu->domain, iova, |
| 477 | if (iova) { | 614 | (phys_addr_t)pfn << PAGE_SHIFT, |
| 478 | dma = vfio_find_dma(iommu, iova - 1, 1); | 615 | npage << PAGE_SHIFT, prot); |
| 479 | if (dma && dma->prot == prot && | 616 | if (ret) { |
| 480 | dma->vaddr + NPAGE_TO_SIZE(dma->npage) == vaddr) { | 617 | if (ret != -EBUSY || |
| 618 | map_try_harder(iommu, iova, pfn, npage, prot)) { | ||
| 619 | vfio_unpin_pages(pfn, npage, prot, true); | ||
| 620 | break; | ||
| 621 | } | ||
| 622 | } | ||
| 481 | 623 | ||
| 482 | dma->npage += npage; | 624 | size = npage << PAGE_SHIFT; |
| 483 | iova = dma->iova; | ||
| 484 | vaddr = dma->vaddr; | ||
| 485 | npage = dma->npage; | ||
| 486 | size = NPAGE_TO_SIZE(npage); | ||
| 487 | 625 | ||
| 488 | pdma = dma; | 626 | /* |
| 627 | * Check if we abut a region below - nothing below 0. | ||
| 628 | * This is the most likely case when mapping chunks of | ||
| 629 | * physically contiguous regions within a virtual address | ||
| 630 | * range. Update the abutting entry in place since iova | ||
| 631 | * doesn't change. | ||
| 632 | */ | ||
| 633 | if (likely(iova)) { | ||
| 634 | struct vfio_dma *tmp; | ||
| 635 | tmp = vfio_find_dma(iommu, iova - 1, 1); | ||
| 636 | if (tmp && tmp->prot == prot && | ||
| 637 | tmp->vaddr + tmp->size == vaddr) { | ||
| 638 | tmp->size += size; | ||
| 639 | iova = tmp->iova; | ||
| 640 | size = tmp->size; | ||
| 641 | vaddr = tmp->vaddr; | ||
| 642 | dma = tmp; | ||
| 643 | } | ||
| 644 | } | ||
| 645 | |||
| 646 | /* | ||
| 647 | * Check if we abut a region above - nothing above ~0 + 1. | ||
| 648 | * If we abut above and below, remove and free. If only | ||
| 649 | * abut above, remove, modify, reinsert. | ||
| 650 | */ | ||
| 651 | if (likely(iova + size)) { | ||
| 652 | struct vfio_dma *tmp; | ||
| 653 | tmp = vfio_find_dma(iommu, iova + size, 1); | ||
| 654 | if (tmp && tmp->prot == prot && | ||
| 655 | tmp->vaddr == vaddr + size) { | ||
| 656 | vfio_remove_dma(iommu, tmp); | ||
| 657 | if (dma) { | ||
| 658 | dma->size += tmp->size; | ||
| 659 | kfree(tmp); | ||
| 660 | } else { | ||
| 661 | size += tmp->size; | ||
| 662 | tmp->size = size; | ||
| 663 | tmp->iova = iova; | ||
| 664 | tmp->vaddr = vaddr; | ||
| 665 | vfio_insert_dma(iommu, tmp); | ||
| 666 | dma = tmp; | ||
| 667 | } | ||
| 668 | } | ||
| 489 | } | 669 | } |
| 490 | } | ||
| 491 | 670 | ||
| 492 | /* Check if we abut a region above - nothing above ~0 + 1 */ | 671 | if (!dma) { |
| 493 | if (iova + size) { | 672 | dma = kzalloc(sizeof(*dma), GFP_KERNEL); |
| 494 | dma = vfio_find_dma(iommu, iova + size, 1); | 673 | if (!dma) { |
| 495 | if (dma && dma->prot == prot && | 674 | iommu_unmap(iommu->domain, iova, size); |
| 496 | dma->vaddr == vaddr + size) { | 675 | vfio_unpin_pages(pfn, npage, prot, true); |
| 676 | ret = -ENOMEM; | ||
| 677 | break; | ||
| 678 | } | ||
| 497 | 679 | ||
| 498 | dma->npage += npage; | 680 | dma->size = size; |
| 499 | dma->iova = iova; | 681 | dma->iova = iova; |
| 500 | dma->vaddr = vaddr; | 682 | dma->vaddr = vaddr; |
| 501 | 683 | dma->prot = prot; | |
| 502 | /* | 684 | vfio_insert_dma(iommu, dma); |
| 503 | * If merged above and below, remove previously | ||
| 504 | * merged entry. New entry covers it. | ||
| 505 | */ | ||
| 506 | if (pdma) { | ||
| 507 | list_del(&pdma->next); | ||
| 508 | kfree(pdma); | ||
| 509 | } | ||
| 510 | pdma = dma; | ||
| 511 | } | 685 | } |
| 512 | } | 686 | } |
| 513 | 687 | ||
| 514 | /* Isolated, new region */ | 688 | if (ret) { |
| 515 | if (!pdma) { | 689 | struct vfio_dma *tmp; |
| 516 | dma = kzalloc(sizeof *dma, GFP_KERNEL); | 690 | iova = map->iova; |
| 517 | if (!dma) { | 691 | size = map->size; |
| 518 | ret = -ENOMEM; | 692 | while ((tmp = vfio_find_dma(iommu, iova, size))) { |
| 519 | vfio_dma_unmap(iommu, iova, npage, prot); | 693 | int r = vfio_remove_dma_overlap(iommu, iova, |
| 520 | goto out_lock; | 694 | &size, tmp); |
| 695 | if (WARN_ON(r || !size)) | ||
| 696 | break; | ||
| 521 | } | 697 | } |
| 522 | |||
| 523 | dma->npage = npage; | ||
| 524 | dma->iova = iova; | ||
| 525 | dma->vaddr = vaddr; | ||
| 526 | dma->prot = prot; | ||
| 527 | list_add(&dma->next, &iommu->dma_list); | ||
| 528 | } | 698 | } |
| 529 | 699 | ||
| 530 | out_lock: | ||
| 531 | mutex_unlock(&iommu->lock); | 700 | mutex_unlock(&iommu->lock); |
| 532 | return ret; | 701 | return ret; |
| 533 | } | 702 | } |
| @@ -606,7 +775,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) | |||
| 606 | return ERR_PTR(-ENOMEM); | 775 | return ERR_PTR(-ENOMEM); |
| 607 | 776 | ||
| 608 | INIT_LIST_HEAD(&iommu->group_list); | 777 | INIT_LIST_HEAD(&iommu->group_list); |
| 609 | INIT_LIST_HEAD(&iommu->dma_list); | 778 | iommu->dma_list = RB_ROOT; |
| 610 | mutex_init(&iommu->lock); | 779 | mutex_init(&iommu->lock); |
| 611 | 780 | ||
| 612 | /* | 781 | /* |
| @@ -640,7 +809,7 @@ static void vfio_iommu_type1_release(void *iommu_data) | |||
| 640 | { | 809 | { |
| 641 | struct vfio_iommu *iommu = iommu_data; | 810 | struct vfio_iommu *iommu = iommu_data; |
| 642 | struct vfio_group *group, *group_tmp; | 811 | struct vfio_group *group, *group_tmp; |
| 643 | struct vfio_dma *dma, *dma_tmp; | 812 | struct rb_node *node; |
| 644 | 813 | ||
| 645 | list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) { | 814 | list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) { |
| 646 | iommu_detach_group(iommu->domain, group->iommu_group); | 815 | iommu_detach_group(iommu->domain, group->iommu_group); |
| @@ -648,10 +817,12 @@ static void vfio_iommu_type1_release(void *iommu_data) | |||
| 648 | kfree(group); | 817 | kfree(group); |
| 649 | } | 818 | } |
| 650 | 819 | ||
| 651 | list_for_each_entry_safe(dma, dma_tmp, &iommu->dma_list, next) { | 820 | while ((node = rb_first(&iommu->dma_list))) { |
| 652 | vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); | 821 | struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); |
| 653 | list_del(&dma->next); | 822 | size_t size = dma->size; |
| 654 | kfree(dma); | 823 | vfio_remove_dma_overlap(iommu, dma->iova, &size, dma); |
| 824 | if (WARN_ON(!size)) | ||
| 825 | break; | ||
| 655 | } | 826 | } |
| 656 | 827 | ||
| 657 | iommu_domain_free(iommu->domain); | 828 | iommu_domain_free(iommu->domain); |
| @@ -706,6 +877,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, | |||
| 706 | 877 | ||
| 707 | } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { | 878 | } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { |
| 708 | struct vfio_iommu_type1_dma_unmap unmap; | 879 | struct vfio_iommu_type1_dma_unmap unmap; |
| 880 | long ret; | ||
| 709 | 881 | ||
| 710 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); | 882 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); |
| 711 | 883 | ||
| @@ -715,7 +887,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, | |||
| 715 | if (unmap.argsz < minsz || unmap.flags) | 887 | if (unmap.argsz < minsz || unmap.flags) |
| 716 | return -EINVAL; | 888 | return -EINVAL; |
| 717 | 889 | ||
| 718 | return vfio_dma_do_unmap(iommu, &unmap); | 890 | ret = vfio_dma_do_unmap(iommu, &unmap); |
| 891 | if (ret) | ||
| 892 | return ret; | ||
| 893 | |||
| 894 | return copy_to_user((void __user *)arg, &unmap, minsz); | ||
| 719 | } | 895 | } |
| 720 | 896 | ||
| 721 | return -ENOTTY; | 897 | return -ENOTTY; |
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 87ee4f4cff25..916e444e6f74 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h | |||
| @@ -362,10 +362,14 @@ struct vfio_iommu_type1_dma_map { | |||
| 362 | #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) | 362 | #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) |
| 363 | 363 | ||
| 364 | /** | 364 | /** |
| 365 | * VFIO_IOMMU_UNMAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 14, struct vfio_dma_unmap) | 365 | * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, |
| 366 | * struct vfio_dma_unmap) | ||
| 366 | * | 367 | * |
| 367 | * Unmap IO virtual addresses using the provided struct vfio_dma_unmap. | 368 | * Unmap IO virtual addresses using the provided struct vfio_dma_unmap. |
| 368 | * Caller sets argsz. | 369 | * Caller sets argsz. The actual unmapped size is returned in the size |
| 370 | * field. No guarantee is made to the user that arbitrary unmaps of iova | ||
| 371 | * or size different from those used in the original mapping call will | ||
| 372 | * succeed. | ||
| 369 | */ | 373 | */ |
| 370 | struct vfio_iommu_type1_dma_unmap { | 374 | struct vfio_iommu_type1_dma_unmap { |
| 371 | __u32 argsz; | 375 | __u32 argsz; |
