aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2013-06-21 11:37:50 -0400
committerAlex Williamson <alex.williamson@redhat.com>2013-06-21 11:37:50 -0400
commitcd9b22685e4ccd728550d51fbe108c473f89df4f (patch)
tree36ca96bd395c621c33db63b8b5e7014f5005515b /drivers/vfio
parent7d132055814ef17a6c7b69f342244c410a5e000f (diff)
vfio: Convert type1 iommu to use rbtree
We need to keep track of all the DMA mappings of an iommu container so that it can be automatically unmapped when the user releases the file descriptor. We currently do this using a simple list, where we merge entries with contiguous iovas and virtual addresses. Using a tree for this is a bit more efficient and allows us to use common code instead of inventing our own. Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/vfio_iommu_type1.c190
1 files changed, 96 insertions, 94 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 6f3fbc48a6c7..0e863b3ddcab 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -31,6 +31,7 @@
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/mm.h> 32#include <linux/mm.h>
33#include <linux/pci.h> /* pci_bus_type */ 33#include <linux/pci.h> /* pci_bus_type */
34#include <linux/rbtree.h>
34#include <linux/sched.h> 35#include <linux/sched.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
36#include <linux/uaccess.h> 37#include <linux/uaccess.h>
@@ -50,13 +51,13 @@ MODULE_PARM_DESC(allow_unsafe_interrupts,
50struct vfio_iommu { 51struct vfio_iommu {
51 struct iommu_domain *domain; 52 struct iommu_domain *domain;
52 struct mutex lock; 53 struct mutex lock;
53 struct list_head dma_list; 54 struct rb_root dma_list;
54 struct list_head group_list; 55 struct list_head group_list;
55 bool cache; 56 bool cache;
56}; 57};
57 58
58struct vfio_dma { 59struct vfio_dma {
59 struct list_head next; 60 struct rb_node node;
60 dma_addr_t iova; /* Device address */ 61 dma_addr_t iova; /* Device address */
61 unsigned long vaddr; /* Process virtual addr */ 62 unsigned long vaddr; /* Process virtual addr */
62 long npage; /* Number of pages */ 63 long npage; /* Number of pages */
@@ -75,6 +76,49 @@ struct vfio_group {
75 76
76#define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT) 77#define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT)
77 78
79static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
80 dma_addr_t start, size_t size)
81{
82 struct rb_node *node = iommu->dma_list.rb_node;
83
84 while (node) {
85 struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
86
87 if (start + size <= dma->iova)
88 node = node->rb_left;
89 else if (start >= dma->iova + NPAGE_TO_SIZE(dma->npage))
90 node = node->rb_right;
91 else
92 return dma;
93 }
94
95 return NULL;
96}
97
98static void vfio_insert_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
99{
100 struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
101 struct vfio_dma *dma;
102
103 while (*link) {
104 parent = *link;
105 dma = rb_entry(parent, struct vfio_dma, node);
106
107 if (new->iova + NPAGE_TO_SIZE(new->npage) <= dma->iova)
108 link = &(*link)->rb_left;
109 else
110 link = &(*link)->rb_right;
111 }
112
113 rb_link_node(&new->node, parent, link);
114 rb_insert_color(&new->node, &iommu->dma_list);
115}
116
117static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
118{
119 rb_erase(&old->node, &iommu->dma_list);
120}
121
78struct vwork { 122struct vwork {
79 struct mm_struct *mm; 123 struct mm_struct *mm;
80 long npage; 124 long npage;
@@ -289,31 +333,8 @@ static int __vfio_dma_map(struct vfio_iommu *iommu, dma_addr_t iova,
289 return 0; 333 return 0;
290} 334}
291 335
292static inline bool ranges_overlap(dma_addr_t start1, size_t size1, 336static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
293 dma_addr_t start2, size_t size2) 337 size_t size, struct vfio_dma *dma)
294{
295 if (start1 < start2)
296 return (start2 - start1 < size1);
297 else if (start2 < start1)
298 return (start1 - start2 < size2);
299 return (size1 > 0 && size2 > 0);
300}
301
302static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
303 dma_addr_t start, size_t size)
304{
305 struct vfio_dma *dma;
306
307 list_for_each_entry(dma, &iommu->dma_list, next) {
308 if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage),
309 start, size))
310 return dma;
311 }
312 return NULL;
313}
314
315static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
316 size_t size, struct vfio_dma *dma)
317{ 338{
318 struct vfio_dma *split; 339 struct vfio_dma *split;
319 long npage_lo, npage_hi; 340 long npage_lo, npage_hi;
@@ -322,10 +343,9 @@ static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
322 if (start <= dma->iova && 343 if (start <= dma->iova &&
323 start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { 344 start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) {
324 vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); 345 vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot);
325 list_del(&dma->next); 346 vfio_remove_dma(iommu, dma);
326 npage_lo = dma->npage;
327 kfree(dma); 347 kfree(dma);
328 return npage_lo; 348 return 0;
329 } 349 }
330 350
331 /* Overlap low address of existing range */ 351 /* Overlap low address of existing range */
@@ -339,7 +359,7 @@ static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
339 dma->iova += overlap; 359 dma->iova += overlap;
340 dma->vaddr += overlap; 360 dma->vaddr += overlap;
341 dma->npage -= npage_lo; 361 dma->npage -= npage_lo;
342 return npage_lo; 362 return 0;
343 } 363 }
344 364
345 /* Overlap high address of existing range */ 365 /* Overlap high address of existing range */
@@ -351,7 +371,7 @@ static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
351 371
352 vfio_dma_unmap(iommu, start, npage_hi, dma->prot); 372 vfio_dma_unmap(iommu, start, npage_hi, dma->prot);
353 dma->npage -= npage_hi; 373 dma->npage -= npage_hi;
354 return npage_hi; 374 return 0;
355 } 375 }
356 376
357 /* Split existing */ 377 /* Split existing */
@@ -370,16 +390,16 @@ static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
370 split->iova = start + size; 390 split->iova = start + size;
371 split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size; 391 split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size;
372 split->prot = dma->prot; 392 split->prot = dma->prot;
373 list_add(&split->next, &iommu->dma_list); 393 vfio_insert_dma(iommu, split);
374 return size >> PAGE_SHIFT; 394 return 0;
375} 395}
376 396
377static int vfio_dma_do_unmap(struct vfio_iommu *iommu, 397static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
378 struct vfio_iommu_type1_dma_unmap *unmap) 398 struct vfio_iommu_type1_dma_unmap *unmap)
379{ 399{
380 long ret = 0, npage = unmap->size >> PAGE_SHIFT;
381 struct vfio_dma *dma, *tmp;
382 uint64_t mask; 400 uint64_t mask;
401 struct vfio_dma *dma;
402 int ret = 0;
383 403
384 mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; 404 mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1;
385 405
@@ -393,25 +413,19 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
393 413
394 mutex_lock(&iommu->lock); 414 mutex_lock(&iommu->lock);
395 415
396 list_for_each_entry_safe(dma, tmp, &iommu->dma_list, next) { 416 while (!ret && (dma = vfio_find_dma(iommu,
397 if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), 417 unmap->iova, unmap->size)))
398 unmap->iova, unmap->size)) { 418 ret = vfio_remove_dma_overlap(iommu, unmap->iova,
399 ret = vfio_remove_dma_overlap(iommu, unmap->iova, 419 unmap->size, dma);
400 unmap->size, dma); 420
401 if (ret > 0)
402 npage -= ret;
403 if (ret < 0 || npage == 0)
404 break;
405 }
406 }
407 mutex_unlock(&iommu->lock); 421 mutex_unlock(&iommu->lock);
408 return ret > 0 ? 0 : (int)ret; 422 return ret;
409} 423}
410 424
411static int vfio_dma_do_map(struct vfio_iommu *iommu, 425static int vfio_dma_do_map(struct vfio_iommu *iommu,
412 struct vfio_iommu_type1_dma_map *map) 426 struct vfio_iommu_type1_dma_map *map)
413{ 427{
414 struct vfio_dma *dma, *pdma = NULL; 428 struct vfio_dma *dma;
415 dma_addr_t iova = map->iova; 429 dma_addr_t iova = map->iova;
416 unsigned long locked, lock_limit, vaddr = map->vaddr; 430 unsigned long locked, lock_limit, vaddr = map->vaddr;
417 size_t size = map->size; 431 size_t size = map->size;
@@ -452,6 +466,10 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
452 if (!npage) 466 if (!npage)
453 return -EINVAL; 467 return -EINVAL;
454 468
469 dma = kzalloc(sizeof *dma, GFP_KERNEL);
470 if (!dma)
471 return -ENOMEM;
472
455 mutex_lock(&iommu->lock); 473 mutex_lock(&iommu->lock);
456 474
457 if (vfio_find_dma(iommu, iova, size)) { 475 if (vfio_find_dma(iommu, iova, size)) {
@@ -473,62 +491,45 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
473 if (ret) 491 if (ret)
474 goto out_lock; 492 goto out_lock;
475 493
494 dma->npage = npage;
495 dma->iova = iova;
496 dma->vaddr = vaddr;
497 dma->prot = prot;
498
476 /* Check if we abut a region below - nothing below 0 */ 499 /* Check if we abut a region below - nothing below 0 */
477 if (iova) { 500 if (iova) {
478 dma = vfio_find_dma(iommu, iova - 1, 1); 501 struct vfio_dma *tmp = vfio_find_dma(iommu, iova - 1, 1);
479 if (dma && dma->prot == prot && 502 if (tmp && tmp->prot == prot &&
480 dma->vaddr + NPAGE_TO_SIZE(dma->npage) == vaddr) { 503 tmp->vaddr + NPAGE_TO_SIZE(tmp->npage) == vaddr) {
481 504 vfio_remove_dma(iommu, tmp);
482 dma->npage += npage; 505 dma->npage += tmp->npage;
483 iova = dma->iova; 506 dma->iova = iova = tmp->iova;
484 vaddr = dma->vaddr; 507 dma->vaddr = vaddr = tmp->vaddr;
508 kfree(tmp);
485 npage = dma->npage; 509 npage = dma->npage;
486 size = NPAGE_TO_SIZE(npage); 510 size = NPAGE_TO_SIZE(npage);
487
488 pdma = dma;
489 } 511 }
490 } 512 }
491 513
492 /* Check if we abut a region above - nothing above ~0 + 1 */ 514 /* Check if we abut a region above - nothing above ~0 + 1 */
493 if (iova + size) { 515 if (iova + size) {
494 dma = vfio_find_dma(iommu, iova + size, 1); 516 struct vfio_dma *tmp = vfio_find_dma(iommu, iova + size, 1);
495 if (dma && dma->prot == prot && 517 if (tmp && tmp->prot == prot &&
496 dma->vaddr == vaddr + size) { 518 tmp->vaddr == vaddr + size) {
497 519 vfio_remove_dma(iommu, tmp);
498 dma->npage += npage; 520 dma->npage += tmp->npage;
499 dma->iova = iova; 521 kfree(tmp);
500 dma->vaddr = vaddr; 522 npage = dma->npage;
501 523 size = NPAGE_TO_SIZE(npage);
502 /*
503 * If merged above and below, remove previously
504 * merged entry. New entry covers it.
505 */
506 if (pdma) {
507 list_del(&pdma->next);
508 kfree(pdma);
509 }
510 pdma = dma;
511 } 524 }
512 } 525 }
513 526
514 /* Isolated, new region */ 527 vfio_insert_dma(iommu, dma);
515 if (!pdma) {
516 dma = kzalloc(sizeof *dma, GFP_KERNEL);
517 if (!dma) {
518 ret = -ENOMEM;
519 vfio_dma_unmap(iommu, iova, npage, prot);
520 goto out_lock;
521 }
522
523 dma->npage = npage;
524 dma->iova = iova;
525 dma->vaddr = vaddr;
526 dma->prot = prot;
527 list_add(&dma->next, &iommu->dma_list);
528 }
529 528
530out_lock: 529out_lock:
531 mutex_unlock(&iommu->lock); 530 mutex_unlock(&iommu->lock);
531 if (ret)
532 kfree(dma);
532 return ret; 533 return ret;
533} 534}
534 535
@@ -606,7 +607,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
606 return ERR_PTR(-ENOMEM); 607 return ERR_PTR(-ENOMEM);
607 608
608 INIT_LIST_HEAD(&iommu->group_list); 609 INIT_LIST_HEAD(&iommu->group_list);
609 INIT_LIST_HEAD(&iommu->dma_list); 610 iommu->dma_list = RB_ROOT;
610 mutex_init(&iommu->lock); 611 mutex_init(&iommu->lock);
611 612
612 /* 613 /*
@@ -640,7 +641,7 @@ static void vfio_iommu_type1_release(void *iommu_data)
640{ 641{
641 struct vfio_iommu *iommu = iommu_data; 642 struct vfio_iommu *iommu = iommu_data;
642 struct vfio_group *group, *group_tmp; 643 struct vfio_group *group, *group_tmp;
643 struct vfio_dma *dma, *dma_tmp; 644 struct rb_node *node;
644 645
645 list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) { 646 list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) {
646 iommu_detach_group(iommu->domain, group->iommu_group); 647 iommu_detach_group(iommu->domain, group->iommu_group);
@@ -648,9 +649,10 @@ static void vfio_iommu_type1_release(void *iommu_data)
648 kfree(group); 649 kfree(group);
649 } 650 }
650 651
651 list_for_each_entry_safe(dma, dma_tmp, &iommu->dma_list, next) { 652 while ((node = rb_first(&iommu->dma_list))) {
653 struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
652 vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); 654 vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot);
653 list_del(&dma->next); 655 vfio_remove_dma(iommu, dma);
654 kfree(dma); 656 kfree(dma);
655 } 657 }
656 658