diff options
author | Joerg Roedel <joerg.roedel@amd.com> | 2011-11-24 10:21:52 -0500 |
---|---|---|
committer | Joerg Roedel <joerg.roedel@amd.com> | 2011-12-14 06:09:17 -0500 |
commit | 8736b2c331030733c5d619170dc6e9ef211a4039 (patch) | |
tree | 33a16976842200dc173a55d13c61e7f592ef485d /drivers/iommu | |
parent | 028eeacc412a8bebf6711e58629b0cab56a9ba87 (diff) |
iommu/amd: Implement notifiers for IOMMUv2
Since pages are not pinned anymore we need notifications
when the VMM changes the page-tables. Use mmu_notifiers for
that.
Also use the task_exit notifier from the profiling subsystem
to shutdown all contexts related to this task.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 3 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 186 |
2 files changed, 178 insertions, 11 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e608a36bb0b1..6bea6962f8ee 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig | |||
@@ -62,7 +62,8 @@ config AMD_IOMMU_STATS | |||
62 | 62 | ||
63 | config AMD_IOMMU_V2 | 63 | config AMD_IOMMU_V2 |
64 | tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)" | 64 | tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)" |
65 | depends on AMD_IOMMU && EXPERIMENTAL | 65 | depends on AMD_IOMMU && PROFILING && EXPERIMENTAL |
66 | select MMU_NOTIFIER | ||
66 | ---help--- | 67 | ---help--- |
67 | This option enables support for the AMD IOMMUv2 features of the IOMMU | 68 | This option enables support for the AMD IOMMUv2 features of the IOMMU |
68 | hardware. Select this option if you want to use devices that support | 69 | hardware. Select this option if you want to use devices that support |
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 8804b2247694..abdb8396f89a 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c | |||
@@ -16,8 +16,10 @@ | |||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/mmu_notifier.h> | ||
19 | #include <linux/amd-iommu.h> | 20 | #include <linux/amd-iommu.h> |
20 | #include <linux/mm_types.h> | 21 | #include <linux/mm_types.h> |
22 | #include <linux/profile.h> | ||
21 | #include <linux/module.h> | 23 | #include <linux/module.h> |
22 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
23 | #include <linux/iommu.h> | 25 | #include <linux/iommu.h> |
@@ -45,6 +47,7 @@ struct pasid_state { | |||
45 | atomic_t count; /* Reference count */ | 47 | atomic_t count; /* Reference count */ |
46 | struct task_struct *task; /* Task bound to this PASID */ | 48 | struct task_struct *task; /* Task bound to this PASID */ |
47 | struct mm_struct *mm; /* mm_struct for the faults */ | 49 | struct mm_struct *mm; /* mm_struct for the faults */ |
50 | struct mmu_notifier mn; /* mmu_otifier handle */ | ||
48 | struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ | 51 | struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ |
49 | struct device_state *device_state; /* Link to our device_state */ | 52 | struct device_state *device_state; /* Link to our device_state */ |
50 | int pasid; /* PASID index */ | 53 | int pasid; /* PASID index */ |
@@ -85,8 +88,16 @@ static DEFINE_SPINLOCK(ps_lock); | |||
85 | 88 | ||
86 | static struct workqueue_struct *iommu_wq; | 89 | static struct workqueue_struct *iommu_wq; |
87 | 90 | ||
91 | /* | ||
92 | * Empty page table - Used between | ||
93 | * mmu_notifier_invalidate_range_start and | ||
94 | * mmu_notifier_invalidate_range_end | ||
95 | */ | ||
96 | static u64 *empty_page_table; | ||
97 | |||
88 | static void free_pasid_states(struct device_state *dev_state); | 98 | static void free_pasid_states(struct device_state *dev_state); |
89 | static void unbind_pasid(struct device_state *dev_state, int pasid); | 99 | static void unbind_pasid(struct device_state *dev_state, int pasid); |
100 | static int task_exit(struct notifier_block *nb, unsigned long e, void *data); | ||
90 | 101 | ||
91 | static u16 device_id(struct pci_dev *pdev) | 102 | static u16 device_id(struct pci_dev *pdev) |
92 | { | 103 | { |
@@ -144,6 +155,11 @@ static void put_device_state_wait(struct device_state *dev_state) | |||
144 | 155 | ||
145 | free_device_state(dev_state); | 156 | free_device_state(dev_state); |
146 | } | 157 | } |
158 | |||
159 | static struct notifier_block profile_nb = { | ||
160 | .notifier_call = task_exit, | ||
161 | }; | ||
162 | |||
147 | static void link_pasid_state(struct pasid_state *pasid_state) | 163 | static void link_pasid_state(struct pasid_state *pasid_state) |
148 | { | 164 | { |
149 | spin_lock(&ps_lock); | 165 | spin_lock(&ps_lock); |
@@ -294,6 +310,23 @@ static void put_pasid_state_wait(struct pasid_state *pasid_state) | |||
294 | free_pasid_state(pasid_state); | 310 | free_pasid_state(pasid_state); |
295 | } | 311 | } |
296 | 312 | ||
313 | static void __unbind_pasid(struct pasid_state *pasid_state) | ||
314 | { | ||
315 | struct iommu_domain *domain; | ||
316 | |||
317 | domain = pasid_state->device_state->domain; | ||
318 | |||
319 | amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); | ||
320 | clear_pasid_state(pasid_state->device_state, pasid_state->pasid); | ||
321 | |||
322 | /* Make sure no more pending faults are in the queue */ | ||
323 | flush_workqueue(iommu_wq); | ||
324 | |||
325 | mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); | ||
326 | |||
327 | put_pasid_state(pasid_state); /* Reference taken in bind() function */ | ||
328 | } | ||
329 | |||
297 | static void unbind_pasid(struct device_state *dev_state, int pasid) | 330 | static void unbind_pasid(struct device_state *dev_state, int pasid) |
298 | { | 331 | { |
299 | struct pasid_state *pasid_state; | 332 | struct pasid_state *pasid_state; |
@@ -303,12 +336,8 @@ static void unbind_pasid(struct device_state *dev_state, int pasid) | |||
303 | return; | 336 | return; |
304 | 337 | ||
305 | unlink_pasid_state(pasid_state); | 338 | unlink_pasid_state(pasid_state); |
306 | 339 | __unbind_pasid(pasid_state); | |
307 | amd_iommu_domain_clear_gcr3(dev_state->domain, pasid); | 340 | put_pasid_state_wait(pasid_state); /* Reference taken in this function */ |
308 | clear_pasid_state(dev_state, pasid); | ||
309 | |||
310 | put_pasid_state(pasid_state); /* Reference taken in this function */ | ||
311 | put_pasid_state_wait(pasid_state); /* Reference from bind() function */ | ||
312 | } | 341 | } |
313 | 342 | ||
314 | static void free_pasid_states_level1(struct pasid_state **tbl) | 343 | static void free_pasid_states_level1(struct pasid_state **tbl) |
@@ -361,6 +390,83 @@ static void free_pasid_states(struct device_state *dev_state) | |||
361 | free_page((unsigned long)dev_state->states); | 390 | free_page((unsigned long)dev_state->states); |
362 | } | 391 | } |
363 | 392 | ||
393 | static struct pasid_state *mn_to_state(struct mmu_notifier *mn) | ||
394 | { | ||
395 | return container_of(mn, struct pasid_state, mn); | ||
396 | } | ||
397 | |||
398 | static void __mn_flush_page(struct mmu_notifier *mn, | ||
399 | unsigned long address) | ||
400 | { | ||
401 | struct pasid_state *pasid_state; | ||
402 | struct device_state *dev_state; | ||
403 | |||
404 | pasid_state = mn_to_state(mn); | ||
405 | dev_state = pasid_state->device_state; | ||
406 | |||
407 | amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); | ||
408 | } | ||
409 | |||
410 | static int mn_clear_flush_young(struct mmu_notifier *mn, | ||
411 | struct mm_struct *mm, | ||
412 | unsigned long address) | ||
413 | { | ||
414 | __mn_flush_page(mn, address); | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | static void mn_change_pte(struct mmu_notifier *mn, | ||
420 | struct mm_struct *mm, | ||
421 | unsigned long address, | ||
422 | pte_t pte) | ||
423 | { | ||
424 | __mn_flush_page(mn, address); | ||
425 | } | ||
426 | |||
427 | static void mn_invalidate_page(struct mmu_notifier *mn, | ||
428 | struct mm_struct *mm, | ||
429 | unsigned long address) | ||
430 | { | ||
431 | __mn_flush_page(mn, address); | ||
432 | } | ||
433 | |||
434 | static void mn_invalidate_range_start(struct mmu_notifier *mn, | ||
435 | struct mm_struct *mm, | ||
436 | unsigned long start, unsigned long end) | ||
437 | { | ||
438 | struct pasid_state *pasid_state; | ||
439 | struct device_state *dev_state; | ||
440 | |||
441 | pasid_state = mn_to_state(mn); | ||
442 | dev_state = pasid_state->device_state; | ||
443 | |||
444 | amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, | ||
445 | __pa(empty_page_table)); | ||
446 | } | ||
447 | |||
448 | static void mn_invalidate_range_end(struct mmu_notifier *mn, | ||
449 | struct mm_struct *mm, | ||
450 | unsigned long start, unsigned long end) | ||
451 | { | ||
452 | struct pasid_state *pasid_state; | ||
453 | struct device_state *dev_state; | ||
454 | |||
455 | pasid_state = mn_to_state(mn); | ||
456 | dev_state = pasid_state->device_state; | ||
457 | |||
458 | amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, | ||
459 | __pa(pasid_state->mm->pgd)); | ||
460 | } | ||
461 | |||
462 | static struct mmu_notifier_ops iommu_mn = { | ||
463 | .clear_flush_young = mn_clear_flush_young, | ||
464 | .change_pte = mn_change_pte, | ||
465 | .invalidate_page = mn_invalidate_page, | ||
466 | .invalidate_range_start = mn_invalidate_range_start, | ||
467 | .invalidate_range_end = mn_invalidate_range_end, | ||
468 | }; | ||
469 | |||
364 | static void set_pri_tag_status(struct pasid_state *pasid_state, | 470 | static void set_pri_tag_status(struct pasid_state *pasid_state, |
365 | u16 tag, int status) | 471 | u16 tag, int status) |
366 | { | 472 | { |
@@ -475,6 +581,50 @@ static struct notifier_block ppr_nb = { | |||
475 | .notifier_call = ppr_notifier, | 581 | .notifier_call = ppr_notifier, |
476 | }; | 582 | }; |
477 | 583 | ||
584 | static int task_exit(struct notifier_block *nb, unsigned long e, void *data) | ||
585 | { | ||
586 | struct pasid_state *pasid_state; | ||
587 | struct task_struct *task; | ||
588 | |||
589 | task = data; | ||
590 | |||
591 | /* | ||
592 | * Using this notifier is a hack - but there is no other choice | ||
593 | * at the moment. What I really want is a sleeping notifier that | ||
594 | * is called when an MM goes down. But such a notifier doesn't | ||
595 | * exist yet. The notifier needs to sleep because it has to make | ||
596 | * sure that the device does not use the PASID and the address | ||
597 | * space anymore before it is destroyed. This includes waiting | ||
598 | * for pending PRI requests to pass the workqueue. The | ||
599 | * MMU-Notifiers would be a good fit, but they use RCU and so | ||
600 | * they are not allowed to sleep. Lets see how we can solve this | ||
601 | * in a more intelligent way in the future. | ||
602 | */ | ||
603 | again: | ||
604 | spin_lock(&ps_lock); | ||
605 | list_for_each_entry(pasid_state, &pasid_state_list, list) { | ||
606 | struct device_state *dev_state; | ||
607 | int pasid; | ||
608 | |||
609 | if (pasid_state->task != task) | ||
610 | continue; | ||
611 | |||
612 | /* Drop Lock and unbind */ | ||
613 | spin_unlock(&ps_lock); | ||
614 | |||
615 | dev_state = pasid_state->device_state; | ||
616 | pasid = pasid_state->pasid; | ||
617 | |||
618 | unbind_pasid(dev_state, pasid); | ||
619 | |||
620 | /* Task may be in the list multiple times */ | ||
621 | goto again; | ||
622 | } | ||
623 | spin_unlock(&ps_lock); | ||
624 | |||
625 | return NOTIFY_OK; | ||
626 | } | ||
627 | |||
478 | int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, | 628 | int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, |
479 | struct task_struct *task) | 629 | struct task_struct *task) |
480 | { | 630 | { |
@@ -509,13 +659,16 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, | |||
509 | pasid_state->mm = get_task_mm(task); | 659 | pasid_state->mm = get_task_mm(task); |
510 | pasid_state->device_state = dev_state; | 660 | pasid_state->device_state = dev_state; |
511 | pasid_state->pasid = pasid; | 661 | pasid_state->pasid = pasid; |
662 | pasid_state->mn.ops = &iommu_mn; | ||
512 | 663 | ||
513 | if (pasid_state->mm == NULL) | 664 | if (pasid_state->mm == NULL) |
514 | goto out_free; | 665 | goto out_free; |
515 | 666 | ||
667 | mmu_notifier_register(&pasid_state->mn, pasid_state->mm); | ||
668 | |||
516 | ret = set_pasid_state(dev_state, pasid_state, pasid); | 669 | ret = set_pasid_state(dev_state, pasid_state, pasid); |
517 | if (ret) | 670 | if (ret) |
518 | goto out_free; | 671 | goto out_unregister; |
519 | 672 | ||
520 | ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, | 673 | ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, |
521 | __pa(pasid_state->mm->pgd)); | 674 | __pa(pasid_state->mm->pgd)); |
@@ -529,6 +682,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, | |||
529 | out_clear_state: | 682 | out_clear_state: |
530 | clear_pasid_state(dev_state, pasid); | 683 | clear_pasid_state(dev_state, pasid); |
531 | 684 | ||
685 | out_unregister: | ||
686 | mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); | ||
687 | |||
532 | out_free: | 688 | out_free: |
533 | free_pasid_state(pasid_state); | 689 | free_pasid_state(pasid_state); |
534 | 690 | ||
@@ -689,15 +845,22 @@ static int __init amd_iommu_v2_init(void) | |||
689 | 845 | ||
690 | ret = -ENOMEM; | 846 | ret = -ENOMEM; |
691 | iommu_wq = create_workqueue("amd_iommu_v2"); | 847 | iommu_wq = create_workqueue("amd_iommu_v2"); |
692 | if (iommu_wq == NULL) { | 848 | if (iommu_wq == NULL) |
693 | ret = -ENOMEM; | ||
694 | goto out_free; | 849 | goto out_free; |
695 | } | 850 | |
851 | ret = -ENOMEM; | ||
852 | empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL); | ||
853 | if (empty_page_table == NULL) | ||
854 | goto out_destroy_wq; | ||
696 | 855 | ||
697 | amd_iommu_register_ppr_notifier(&ppr_nb); | 856 | amd_iommu_register_ppr_notifier(&ppr_nb); |
857 | profile_event_register(PROFILE_TASK_EXIT, &profile_nb); | ||
698 | 858 | ||
699 | return 0; | 859 | return 0; |
700 | 860 | ||
861 | out_destroy_wq: | ||
862 | destroy_workqueue(iommu_wq); | ||
863 | |||
701 | out_free: | 864 | out_free: |
702 | free_pages((unsigned long)state_table, get_order(state_table_size)); | 865 | free_pages((unsigned long)state_table, get_order(state_table_size)); |
703 | 866 | ||
@@ -710,6 +873,7 @@ static void __exit amd_iommu_v2_exit(void) | |||
710 | size_t state_table_size; | 873 | size_t state_table_size; |
711 | int i; | 874 | int i; |
712 | 875 | ||
876 | profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb); | ||
713 | amd_iommu_unregister_ppr_notifier(&ppr_nb); | 877 | amd_iommu_unregister_ppr_notifier(&ppr_nb); |
714 | 878 | ||
715 | flush_workqueue(iommu_wq); | 879 | flush_workqueue(iommu_wq); |
@@ -734,6 +898,8 @@ static void __exit amd_iommu_v2_exit(void) | |||
734 | 898 | ||
735 | state_table_size = MAX_DEVICES * sizeof(struct device_state *); | 899 | state_table_size = MAX_DEVICES * sizeof(struct device_state *); |
736 | free_pages((unsigned long)state_table, get_order(state_table_size)); | 900 | free_pages((unsigned long)state_table, get_order(state_table_size)); |
901 | |||
902 | free_page((unsigned long)empty_page_table); | ||
737 | } | 903 | } |
738 | 904 | ||
739 | module_init(amd_iommu_v2_init); | 905 | module_init(amd_iommu_v2_init); |