aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
authorJoerg Roedel <joerg.roedel@amd.com>2011-11-24 10:21:52 -0500
committerJoerg Roedel <joerg.roedel@amd.com>2011-12-14 06:09:17 -0500
commit8736b2c331030733c5d619170dc6e9ef211a4039 (patch)
tree33a16976842200dc173a55d13c61e7f592ef485d /drivers/iommu
parent028eeacc412a8bebf6711e58629b0cab56a9ba87 (diff)
iommu/amd: Implement notifiers for IOMMUv2
Since pages are not pinned anymore we need notifications when the VMM changes the page-tables. Use mmu_notifiers for that. Also use the task_exit notifier from the profiling subsystem to shutdown all contexts related to this task. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig3
-rw-r--r--drivers/iommu/amd_iommu_v2.c186
2 files changed, 178 insertions, 11 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e608a36bb0b..6bea6962f8e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -62,7 +62,8 @@ config AMD_IOMMU_STATS
62 62
63config AMD_IOMMU_V2 63config AMD_IOMMU_V2
64 tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)" 64 tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
65 depends on AMD_IOMMU && EXPERIMENTAL 65 depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
66 select MMU_NOTIFIER
66 ---help--- 67 ---help---
67 This option enables support for the AMD IOMMUv2 features of the IOMMU 68 This option enables support for the AMD IOMMUv2 features of the IOMMU
68 hardware. Select this option if you want to use devices that support 69 hardware. Select this option if you want to use devices that support
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 8804b224769..abdb8396f89 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,8 +16,10 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19#include <linux/mmu_notifier.h>
19#include <linux/amd-iommu.h> 20#include <linux/amd-iommu.h>
20#include <linux/mm_types.h> 21#include <linux/mm_types.h>
22#include <linux/profile.h>
21#include <linux/module.h> 23#include <linux/module.h>
22#include <linux/sched.h> 24#include <linux/sched.h>
23#include <linux/iommu.h> 25#include <linux/iommu.h>
@@ -45,6 +47,7 @@ struct pasid_state {
45 atomic_t count; /* Reference count */ 47 atomic_t count; /* Reference count */
46 struct task_struct *task; /* Task bound to this PASID */ 48 struct task_struct *task; /* Task bound to this PASID */
47 struct mm_struct *mm; /* mm_struct for the faults */ 49 struct mm_struct *mm; /* mm_struct for the faults */
50 struct mmu_notifier mn; /* mmu_otifier handle */
48 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ 51 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
49 struct device_state *device_state; /* Link to our device_state */ 52 struct device_state *device_state; /* Link to our device_state */
50 int pasid; /* PASID index */ 53 int pasid; /* PASID index */
@@ -85,8 +88,16 @@ static DEFINE_SPINLOCK(ps_lock);
85 88
86static struct workqueue_struct *iommu_wq; 89static struct workqueue_struct *iommu_wq;
87 90
91/*
92 * Empty page table - Used between
93 * mmu_notifier_invalidate_range_start and
94 * mmu_notifier_invalidate_range_end
95 */
96static u64 *empty_page_table;
97
88static void free_pasid_states(struct device_state *dev_state); 98static void free_pasid_states(struct device_state *dev_state);
89static void unbind_pasid(struct device_state *dev_state, int pasid); 99static void unbind_pasid(struct device_state *dev_state, int pasid);
100static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
90 101
91static u16 device_id(struct pci_dev *pdev) 102static u16 device_id(struct pci_dev *pdev)
92{ 103{
@@ -144,6 +155,11 @@ static void put_device_state_wait(struct device_state *dev_state)
144 155
145 free_device_state(dev_state); 156 free_device_state(dev_state);
146} 157}
158
159static struct notifier_block profile_nb = {
160 .notifier_call = task_exit,
161};
162
147static void link_pasid_state(struct pasid_state *pasid_state) 163static void link_pasid_state(struct pasid_state *pasid_state)
148{ 164{
149 spin_lock(&ps_lock); 165 spin_lock(&ps_lock);
@@ -294,6 +310,23 @@ static void put_pasid_state_wait(struct pasid_state *pasid_state)
294 free_pasid_state(pasid_state); 310 free_pasid_state(pasid_state);
295} 311}
296 312
313static void __unbind_pasid(struct pasid_state *pasid_state)
314{
315 struct iommu_domain *domain;
316
317 domain = pasid_state->device_state->domain;
318
319 amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
320 clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
321
322 /* Make sure no more pending faults are in the queue */
323 flush_workqueue(iommu_wq);
324
325 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
326
327 put_pasid_state(pasid_state); /* Reference taken in bind() function */
328}
329
297static void unbind_pasid(struct device_state *dev_state, int pasid) 330static void unbind_pasid(struct device_state *dev_state, int pasid)
298{ 331{
299 struct pasid_state *pasid_state; 332 struct pasid_state *pasid_state;
@@ -303,12 +336,8 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)
303 return; 336 return;
304 337
305 unlink_pasid_state(pasid_state); 338 unlink_pasid_state(pasid_state);
306 339 __unbind_pasid(pasid_state);
307 amd_iommu_domain_clear_gcr3(dev_state->domain, pasid); 340 put_pasid_state_wait(pasid_state); /* Reference taken in this function */
308 clear_pasid_state(dev_state, pasid);
309
310 put_pasid_state(pasid_state); /* Reference taken in this function */
311 put_pasid_state_wait(pasid_state); /* Reference from bind() function */
312} 341}
313 342
314static void free_pasid_states_level1(struct pasid_state **tbl) 343static void free_pasid_states_level1(struct pasid_state **tbl)
@@ -361,6 +390,83 @@ static void free_pasid_states(struct device_state *dev_state)
361 free_page((unsigned long)dev_state->states); 390 free_page((unsigned long)dev_state->states);
362} 391}
363 392
393static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
394{
395 return container_of(mn, struct pasid_state, mn);
396}
397
398static void __mn_flush_page(struct mmu_notifier *mn,
399 unsigned long address)
400{
401 struct pasid_state *pasid_state;
402 struct device_state *dev_state;
403
404 pasid_state = mn_to_state(mn);
405 dev_state = pasid_state->device_state;
406
407 amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
408}
409
410static int mn_clear_flush_young(struct mmu_notifier *mn,
411 struct mm_struct *mm,
412 unsigned long address)
413{
414 __mn_flush_page(mn, address);
415
416 return 0;
417}
418
419static void mn_change_pte(struct mmu_notifier *mn,
420 struct mm_struct *mm,
421 unsigned long address,
422 pte_t pte)
423{
424 __mn_flush_page(mn, address);
425}
426
427static void mn_invalidate_page(struct mmu_notifier *mn,
428 struct mm_struct *mm,
429 unsigned long address)
430{
431 __mn_flush_page(mn, address);
432}
433
434static void mn_invalidate_range_start(struct mmu_notifier *mn,
435 struct mm_struct *mm,
436 unsigned long start, unsigned long end)
437{
438 struct pasid_state *pasid_state;
439 struct device_state *dev_state;
440
441 pasid_state = mn_to_state(mn);
442 dev_state = pasid_state->device_state;
443
444 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
445 __pa(empty_page_table));
446}
447
448static void mn_invalidate_range_end(struct mmu_notifier *mn,
449 struct mm_struct *mm,
450 unsigned long start, unsigned long end)
451{
452 struct pasid_state *pasid_state;
453 struct device_state *dev_state;
454
455 pasid_state = mn_to_state(mn);
456 dev_state = pasid_state->device_state;
457
458 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
459 __pa(pasid_state->mm->pgd));
460}
461
462static struct mmu_notifier_ops iommu_mn = {
463 .clear_flush_young = mn_clear_flush_young,
464 .change_pte = mn_change_pte,
465 .invalidate_page = mn_invalidate_page,
466 .invalidate_range_start = mn_invalidate_range_start,
467 .invalidate_range_end = mn_invalidate_range_end,
468};
469
364static void set_pri_tag_status(struct pasid_state *pasid_state, 470static void set_pri_tag_status(struct pasid_state *pasid_state,
365 u16 tag, int status) 471 u16 tag, int status)
366{ 472{
@@ -475,6 +581,50 @@ static struct notifier_block ppr_nb = {
475 .notifier_call = ppr_notifier, 581 .notifier_call = ppr_notifier,
476}; 582};
477 583
584static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
585{
586 struct pasid_state *pasid_state;
587 struct task_struct *task;
588
589 task = data;
590
591 /*
592 * Using this notifier is a hack - but there is no other choice
593 * at the moment. What I really want is a sleeping notifier that
594 * is called when an MM goes down. But such a notifier doesn't
595 * exist yet. The notifier needs to sleep because it has to make
596 * sure that the device does not use the PASID and the address
597 * space anymore before it is destroyed. This includes waiting
598 * for pending PRI requests to pass the workqueue. The
599 * MMU-Notifiers would be a good fit, but they use RCU and so
600 * they are not allowed to sleep. Lets see how we can solve this
601 * in a more intelligent way in the future.
602 */
603again:
604 spin_lock(&ps_lock);
605 list_for_each_entry(pasid_state, &pasid_state_list, list) {
606 struct device_state *dev_state;
607 int pasid;
608
609 if (pasid_state->task != task)
610 continue;
611
612 /* Drop Lock and unbind */
613 spin_unlock(&ps_lock);
614
615 dev_state = pasid_state->device_state;
616 pasid = pasid_state->pasid;
617
618 unbind_pasid(dev_state, pasid);
619
620 /* Task may be in the list multiple times */
621 goto again;
622 }
623 spin_unlock(&ps_lock);
624
625 return NOTIFY_OK;
626}
627
478int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, 628int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
479 struct task_struct *task) 629 struct task_struct *task)
480{ 630{
@@ -509,13 +659,16 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
509 pasid_state->mm = get_task_mm(task); 659 pasid_state->mm = get_task_mm(task);
510 pasid_state->device_state = dev_state; 660 pasid_state->device_state = dev_state;
511 pasid_state->pasid = pasid; 661 pasid_state->pasid = pasid;
662 pasid_state->mn.ops = &iommu_mn;
512 663
513 if (pasid_state->mm == NULL) 664 if (pasid_state->mm == NULL)
514 goto out_free; 665 goto out_free;
515 666
667 mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
668
516 ret = set_pasid_state(dev_state, pasid_state, pasid); 669 ret = set_pasid_state(dev_state, pasid_state, pasid);
517 if (ret) 670 if (ret)
518 goto out_free; 671 goto out_unregister;
519 672
520 ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, 673 ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
521 __pa(pasid_state->mm->pgd)); 674 __pa(pasid_state->mm->pgd));
@@ -529,6 +682,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
529out_clear_state: 682out_clear_state:
530 clear_pasid_state(dev_state, pasid); 683 clear_pasid_state(dev_state, pasid);
531 684
685out_unregister:
686 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
687
532out_free: 688out_free:
533 free_pasid_state(pasid_state); 689 free_pasid_state(pasid_state);
534 690
@@ -689,15 +845,22 @@ static int __init amd_iommu_v2_init(void)
689 845
690 ret = -ENOMEM; 846 ret = -ENOMEM;
691 iommu_wq = create_workqueue("amd_iommu_v2"); 847 iommu_wq = create_workqueue("amd_iommu_v2");
692 if (iommu_wq == NULL) { 848 if (iommu_wq == NULL)
693 ret = -ENOMEM;
694 goto out_free; 849 goto out_free;
695 } 850
851 ret = -ENOMEM;
852 empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
853 if (empty_page_table == NULL)
854 goto out_destroy_wq;
696 855
697 amd_iommu_register_ppr_notifier(&ppr_nb); 856 amd_iommu_register_ppr_notifier(&ppr_nb);
857 profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
698 858
699 return 0; 859 return 0;
700 860
861out_destroy_wq:
862 destroy_workqueue(iommu_wq);
863
701out_free: 864out_free:
702 free_pages((unsigned long)state_table, get_order(state_table_size)); 865 free_pages((unsigned long)state_table, get_order(state_table_size));
703 866
@@ -710,6 +873,7 @@ static void __exit amd_iommu_v2_exit(void)
710 size_t state_table_size; 873 size_t state_table_size;
711 int i; 874 int i;
712 875
876 profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
713 amd_iommu_unregister_ppr_notifier(&ppr_nb); 877 amd_iommu_unregister_ppr_notifier(&ppr_nb);
714 878
715 flush_workqueue(iommu_wq); 879 flush_workqueue(iommu_wq);
@@ -734,6 +898,8 @@ static void __exit amd_iommu_v2_exit(void)
734 898
735 state_table_size = MAX_DEVICES * sizeof(struct device_state *); 899 state_table_size = MAX_DEVICES * sizeof(struct device_state *);
736 free_pages((unsigned long)state_table, get_order(state_table_size)); 900 free_pages((unsigned long)state_table, get_order(state_table_size));
901
902 free_page((unsigned long)empty_page_table);
737} 903}
738 904
739module_init(amd_iommu_v2_init); 905module_init(amd_iommu_v2_init);