diff options
author | Joerg Roedel <joerg.roedel@amd.com> | 2011-11-24 06:48:13 -0500 |
---|---|---|
committer | Joerg Roedel <joerg.roedel@amd.com> | 2011-12-12 09:34:50 -0500 |
commit | 028eeacc412a8bebf6711e58629b0cab56a9ba87 (patch) | |
tree | d5593a1b3cb6e924593f5646343a4432a41b6c8c /drivers | |
parent | 2d5503b624736abfe0e0bad281f9b8d8a705b930 (diff) |
iommu/amd: Implement IO page-fault handler
Register the notifier for PPR faults and handle them as
necessary.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 204 |
1 files changed, 196 insertions, 8 deletions
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index b5ee09ece651..8804b2247694 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c | |||
@@ -21,9 +21,11 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/iommu.h> | 23 | #include <linux/iommu.h> |
24 | #include <linux/wait.h> | ||
24 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
25 | #include <linux/gfp.h> | 26 | #include <linux/gfp.h> |
26 | 27 | ||
28 | #include "amd_iommu_types.h" | ||
27 | #include "amd_iommu_proto.h" | 29 | #include "amd_iommu_proto.h" |
28 | 30 | ||
29 | MODULE_LICENSE("GPL v2"); | 31 | MODULE_LICENSE("GPL v2"); |
@@ -35,6 +37,7 @@ MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>"); | |||
35 | struct pri_queue { | 37 | struct pri_queue { |
36 | atomic_t inflight; | 38 | atomic_t inflight; |
37 | bool finish; | 39 | bool finish; |
40 | int status; | ||
38 | }; | 41 | }; |
39 | 42 | ||
40 | struct pasid_state { | 43 | struct pasid_state { |
@@ -45,6 +48,8 @@ struct pasid_state { | |||
45 | struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ | 48 | struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ |
46 | struct device_state *device_state; /* Link to our device_state */ | 49 | struct device_state *device_state; /* Link to our device_state */ |
47 | int pasid; /* PASID index */ | 50 | int pasid; /* PASID index */ |
51 | spinlock_t lock; /* Protect pri_queues */ | ||
52 | wait_queue_head_t wq; /* To wait for count == 0 */ | ||
48 | }; | 53 | }; |
49 | 54 | ||
50 | struct device_state { | 55 | struct device_state { |
@@ -55,6 +60,20 @@ struct device_state { | |||
55 | int pasid_levels; | 60 | int pasid_levels; |
56 | int max_pasids; | 61 | int max_pasids; |
57 | spinlock_t lock; | 62 | spinlock_t lock; |
63 | wait_queue_head_t wq; | ||
64 | }; | ||
65 | |||
66 | struct fault { | ||
67 | struct work_struct work; | ||
68 | struct device_state *dev_state; | ||
69 | struct pasid_state *state; | ||
70 | struct mm_struct *mm; | ||
71 | u64 address; | ||
72 | u16 devid; | ||
73 | u16 pasid; | ||
74 | u16 tag; | ||
75 | u16 finish; | ||
76 | u16 flags; | ||
58 | }; | 77 | }; |
59 | 78 | ||
60 | struct device_state **state_table; | 79 | struct device_state **state_table; |
@@ -64,6 +83,8 @@ static spinlock_t state_lock; | |||
64 | static LIST_HEAD(pasid_state_list); | 83 | static LIST_HEAD(pasid_state_list); |
65 | static DEFINE_SPINLOCK(ps_lock); | 84 | static DEFINE_SPINLOCK(ps_lock); |
66 | 85 | ||
86 | static struct workqueue_struct *iommu_wq; | ||
87 | |||
67 | static void free_pasid_states(struct device_state *dev_state); | 88 | static void free_pasid_states(struct device_state *dev_state); |
68 | static void unbind_pasid(struct device_state *dev_state, int pasid); | 89 | static void unbind_pasid(struct device_state *dev_state, int pasid); |
69 | 90 | ||
@@ -109,9 +130,20 @@ static void free_device_state(struct device_state *dev_state) | |||
109 | static void put_device_state(struct device_state *dev_state) | 130 | static void put_device_state(struct device_state *dev_state) |
110 | { | 131 | { |
111 | if (atomic_dec_and_test(&dev_state->count)) | 132 | if (atomic_dec_and_test(&dev_state->count)) |
112 | free_device_state(dev_state); | 133 | wake_up(&dev_state->wq); |
113 | } | 134 | } |
114 | 135 | ||
136 | static void put_device_state_wait(struct device_state *dev_state) | ||
137 | { | ||
138 | DEFINE_WAIT(wait); | ||
139 | |||
140 | prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE); | ||
141 | if (!atomic_dec_and_test(&dev_state->count)) | ||
142 | schedule(); | ||
143 | finish_wait(&dev_state->wq, &wait); | ||
144 | |||
145 | free_device_state(dev_state); | ||
146 | } | ||
115 | static void link_pasid_state(struct pasid_state *pasid_state) | 147 | static void link_pasid_state(struct pasid_state *pasid_state) |
116 | { | 148 | { |
117 | spin_lock(&ps_lock); | 149 | spin_lock(&ps_lock); |
@@ -242,11 +274,26 @@ static void put_pasid_state(struct pasid_state *pasid_state) | |||
242 | { | 274 | { |
243 | if (atomic_dec_and_test(&pasid_state->count)) { | 275 | if (atomic_dec_and_test(&pasid_state->count)) { |
244 | put_device_state(pasid_state->device_state); | 276 | put_device_state(pasid_state->device_state); |
245 | mmput(pasid_state->mm); | 277 | wake_up(&pasid_state->wq); |
246 | free_pasid_state(pasid_state); | ||
247 | } | 278 | } |
248 | } | 279 | } |
249 | 280 | ||
281 | static void put_pasid_state_wait(struct pasid_state *pasid_state) | ||
282 | { | ||
283 | DEFINE_WAIT(wait); | ||
284 | |||
285 | prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE); | ||
286 | |||
287 | if (atomic_dec_and_test(&pasid_state->count)) | ||
288 | put_device_state(pasid_state->device_state); | ||
289 | else | ||
290 | schedule(); | ||
291 | |||
292 | finish_wait(&pasid_state->wq, &wait); | ||
293 | mmput(pasid_state->mm); | ||
294 | free_pasid_state(pasid_state); | ||
295 | } | ||
296 | |||
250 | static void unbind_pasid(struct device_state *dev_state, int pasid) | 297 | static void unbind_pasid(struct device_state *dev_state, int pasid) |
251 | { | 298 | { |
252 | struct pasid_state *pasid_state; | 299 | struct pasid_state *pasid_state; |
@@ -261,7 +308,7 @@ static void unbind_pasid(struct device_state *dev_state, int pasid) | |||
261 | clear_pasid_state(dev_state, pasid); | 308 | clear_pasid_state(dev_state, pasid); |
262 | 309 | ||
263 | put_pasid_state(pasid_state); /* Reference taken in this function */ | 310 | put_pasid_state(pasid_state); /* Reference taken in this function */ |
264 | put_pasid_state(pasid_state); /* Reference taken in bind() function */ | 311 | put_pasid_state_wait(pasid_state); /* Reference from bind() function */ |
265 | } | 312 | } |
266 | 313 | ||
267 | static void free_pasid_states_level1(struct pasid_state **tbl) | 314 | static void free_pasid_states_level1(struct pasid_state **tbl) |
@@ -300,8 +347,8 @@ static void free_pasid_states(struct device_state *dev_state) | |||
300 | if (pasid_state == NULL) | 347 | if (pasid_state == NULL) |
301 | continue; | 348 | continue; |
302 | 349 | ||
303 | unbind_pasid(dev_state, i); | ||
304 | put_pasid_state(pasid_state); | 350 | put_pasid_state(pasid_state); |
351 | unbind_pasid(dev_state, i); | ||
305 | } | 352 | } |
306 | 353 | ||
307 | if (dev_state->pasid_levels == 2) | 354 | if (dev_state->pasid_levels == 2) |
@@ -314,6 +361,120 @@ static void free_pasid_states(struct device_state *dev_state) | |||
314 | free_page((unsigned long)dev_state->states); | 361 | free_page((unsigned long)dev_state->states); |
315 | } | 362 | } |
316 | 363 | ||
364 | static void set_pri_tag_status(struct pasid_state *pasid_state, | ||
365 | u16 tag, int status) | ||
366 | { | ||
367 | unsigned long flags; | ||
368 | |||
369 | spin_lock_irqsave(&pasid_state->lock, flags); | ||
370 | pasid_state->pri[tag].status = status; | ||
371 | spin_unlock_irqrestore(&pasid_state->lock, flags); | ||
372 | } | ||
373 | |||
374 | static void finish_pri_tag(struct device_state *dev_state, | ||
375 | struct pasid_state *pasid_state, | ||
376 | u16 tag) | ||
377 | { | ||
378 | unsigned long flags; | ||
379 | |||
380 | spin_lock_irqsave(&pasid_state->lock, flags); | ||
381 | if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && | ||
382 | pasid_state->pri[tag].finish) { | ||
383 | amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, | ||
384 | pasid_state->pri[tag].status, tag); | ||
385 | pasid_state->pri[tag].finish = false; | ||
386 | pasid_state->pri[tag].status = PPR_SUCCESS; | ||
387 | } | ||
388 | spin_unlock_irqrestore(&pasid_state->lock, flags); | ||
389 | } | ||
390 | |||
391 | static void do_fault(struct work_struct *work) | ||
392 | { | ||
393 | struct fault *fault = container_of(work, struct fault, work); | ||
394 | int npages, write; | ||
395 | struct page *page; | ||
396 | |||
397 | write = !!(fault->flags & PPR_FAULT_WRITE); | ||
398 | |||
399 | npages = get_user_pages(fault->state->task, fault->state->mm, | ||
400 | fault->address, 1, write, 0, &page, NULL); | ||
401 | |||
402 | if (npages == 1) | ||
403 | put_page(page); | ||
404 | else | ||
405 | set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); | ||
406 | |||
407 | finish_pri_tag(fault->dev_state, fault->state, fault->tag); | ||
408 | |||
409 | put_pasid_state(fault->state); | ||
410 | |||
411 | kfree(fault); | ||
412 | } | ||
413 | |||
414 | static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) | ||
415 | { | ||
416 | struct amd_iommu_fault *iommu_fault; | ||
417 | struct pasid_state *pasid_state; | ||
418 | struct device_state *dev_state; | ||
419 | unsigned long flags; | ||
420 | struct fault *fault; | ||
421 | bool finish; | ||
422 | u16 tag; | ||
423 | int ret; | ||
424 | |||
425 | iommu_fault = data; | ||
426 | tag = iommu_fault->tag & 0x1ff; | ||
427 | finish = (iommu_fault->tag >> 9) & 1; | ||
428 | |||
429 | ret = NOTIFY_DONE; | ||
430 | dev_state = get_device_state(iommu_fault->device_id); | ||
431 | if (dev_state == NULL) | ||
432 | goto out; | ||
433 | |||
434 | pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); | ||
435 | if (pasid_state == NULL) { | ||
436 | /* We know the device but not the PASID -> send INVALID */ | ||
437 | amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, | ||
438 | PPR_INVALID, tag); | ||
439 | goto out_drop_state; | ||
440 | } | ||
441 | |||
442 | spin_lock_irqsave(&pasid_state->lock, flags); | ||
443 | atomic_inc(&pasid_state->pri[tag].inflight); | ||
444 | if (finish) | ||
445 | pasid_state->pri[tag].finish = true; | ||
446 | spin_unlock_irqrestore(&pasid_state->lock, flags); | ||
447 | |||
448 | fault = kzalloc(sizeof(*fault), GFP_ATOMIC); | ||
449 | if (fault == NULL) { | ||
450 | /* We are OOM - send success and let the device re-fault */ | ||
451 | finish_pri_tag(dev_state, pasid_state, tag); | ||
452 | goto out_drop_state; | ||
453 | } | ||
454 | |||
455 | fault->dev_state = dev_state; | ||
456 | fault->address = iommu_fault->address; | ||
457 | fault->state = pasid_state; | ||
458 | fault->tag = tag; | ||
459 | fault->finish = finish; | ||
460 | fault->flags = iommu_fault->flags; | ||
461 | INIT_WORK(&fault->work, do_fault); | ||
462 | |||
463 | queue_work(iommu_wq, &fault->work); | ||
464 | |||
465 | ret = NOTIFY_OK; | ||
466 | |||
467 | out_drop_state: | ||
468 | put_device_state(dev_state); | ||
469 | |||
470 | out: | ||
471 | return ret; | ||
472 | } | ||
473 | |||
474 | static struct notifier_block ppr_nb = { | ||
475 | .notifier_call = ppr_notifier, | ||
476 | }; | ||
477 | |||
317 | int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, | 478 | int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, |
318 | struct task_struct *task) | 479 | struct task_struct *task) |
319 | { | 480 | { |
@@ -343,6 +504,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, | |||
343 | goto out; | 504 | goto out; |
344 | 505 | ||
345 | atomic_set(&pasid_state->count, 1); | 506 | atomic_set(&pasid_state->count, 1); |
507 | init_waitqueue_head(&pasid_state->wq); | ||
346 | pasid_state->task = task; | 508 | pasid_state->task = task; |
347 | pasid_state->mm = get_task_mm(task); | 509 | pasid_state->mm = get_task_mm(task); |
348 | pasid_state->device_state = dev_state; | 510 | pasid_state->device_state = dev_state; |
@@ -368,7 +530,7 @@ out_clear_state: | |||
368 | clear_pasid_state(dev_state, pasid); | 530 | clear_pasid_state(dev_state, pasid); |
369 | 531 | ||
370 | out_free: | 532 | out_free: |
371 | put_pasid_state(pasid_state); | 533 | free_pasid_state(pasid_state); |
372 | 534 | ||
373 | out: | 535 | out: |
374 | put_device_state(dev_state); | 536 | put_device_state(dev_state); |
@@ -424,6 +586,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) | |||
424 | return -ENOMEM; | 586 | return -ENOMEM; |
425 | 587 | ||
426 | spin_lock_init(&dev_state->lock); | 588 | spin_lock_init(&dev_state->lock); |
589 | init_waitqueue_head(&dev_state->wq); | ||
427 | dev_state->pdev = pdev; | 590 | dev_state->pdev = pdev; |
428 | 591 | ||
429 | tmp = pasids; | 592 | tmp = pasids; |
@@ -505,13 +668,14 @@ void amd_iommu_free_device(struct pci_dev *pdev) | |||
505 | /* Get rid of any remaining pasid states */ | 668 | /* Get rid of any remaining pasid states */ |
506 | free_pasid_states(dev_state); | 669 | free_pasid_states(dev_state); |
507 | 670 | ||
508 | put_device_state(dev_state); | 671 | put_device_state_wait(dev_state); |
509 | } | 672 | } |
510 | EXPORT_SYMBOL(amd_iommu_free_device); | 673 | EXPORT_SYMBOL(amd_iommu_free_device); |
511 | 674 | ||
512 | static int __init amd_iommu_v2_init(void) | 675 | static int __init amd_iommu_v2_init(void) |
513 | { | 676 | { |
514 | size_t state_table_size; | 677 | size_t state_table_size; |
678 | int ret; | ||
515 | 679 | ||
516 | pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>"); | 680 | pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>"); |
517 | 681 | ||
@@ -523,7 +687,21 @@ static int __init amd_iommu_v2_init(void) | |||
523 | if (state_table == NULL) | 687 | if (state_table == NULL) |
524 | return -ENOMEM; | 688 | return -ENOMEM; |
525 | 689 | ||
690 | ret = -ENOMEM; | ||
691 | iommu_wq = create_workqueue("amd_iommu_v2"); | ||
692 | if (iommu_wq == NULL) { | ||
693 | ret = -ENOMEM; | ||
694 | goto out_free; | ||
695 | } | ||
696 | |||
697 | amd_iommu_register_ppr_notifier(&ppr_nb); | ||
698 | |||
526 | return 0; | 699 | return 0; |
700 | |||
701 | out_free: | ||
702 | free_pages((unsigned long)state_table, get_order(state_table_size)); | ||
703 | |||
704 | return ret; | ||
527 | } | 705 | } |
528 | 706 | ||
529 | static void __exit amd_iommu_v2_exit(void) | 707 | static void __exit amd_iommu_v2_exit(void) |
@@ -532,6 +710,14 @@ static void __exit amd_iommu_v2_exit(void) | |||
532 | size_t state_table_size; | 710 | size_t state_table_size; |
533 | int i; | 711 | int i; |
534 | 712 | ||
713 | amd_iommu_unregister_ppr_notifier(&ppr_nb); | ||
714 | |||
715 | flush_workqueue(iommu_wq); | ||
716 | |||
717 | /* | ||
718 | * The loop below might call flush_workqueue(), so call | ||
719 | * destroy_workqueue() after it | ||
720 | */ | ||
535 | for (i = 0; i < MAX_DEVICES; ++i) { | 721 | for (i = 0; i < MAX_DEVICES; ++i) { |
536 | dev_state = get_device_state(i); | 722 | dev_state = get_device_state(i); |
537 | 723 | ||
@@ -540,10 +726,12 @@ static void __exit amd_iommu_v2_exit(void) | |||
540 | 726 | ||
541 | WARN_ON_ONCE(1); | 727 | WARN_ON_ONCE(1); |
542 | 728 | ||
543 | amd_iommu_free_device(dev_state->pdev); | ||
544 | put_device_state(dev_state); | 729 | put_device_state(dev_state); |
730 | amd_iommu_free_device(dev_state->pdev); | ||
545 | } | 731 | } |
546 | 732 | ||
733 | destroy_workqueue(iommu_wq); | ||
734 | |||
547 | state_table_size = MAX_DEVICES * sizeof(struct device_state *); | 735 | state_table_size = MAX_DEVICES * sizeof(struct device_state *); |
548 | free_pages((unsigned long)state_table, get_order(state_table_size)); | 736 | free_pages((unsigned long)state_table, get_order(state_table_size)); |
549 | } | 737 | } |