aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoerg Roedel <joerg.roedel@amd.com>2011-11-24 06:48:13 -0500
committerJoerg Roedel <joerg.roedel@amd.com>2011-12-12 09:34:50 -0500
commit028eeacc412a8bebf6711e58629b0cab56a9ba87 (patch)
treed5593a1b3cb6e924593f5646343a4432a41b6c8c
parent2d5503b624736abfe0e0bad281f9b8d8a705b930 (diff)
iommu/amd: Implement IO page-fault handler
Register the notifier for PPR faults and handle them as necessary. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
-rw-r--r--drivers/iommu/amd_iommu_v2.c204
1 files changed, 196 insertions, 8 deletions
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index b5ee09ece651..8804b2247694 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -21,9 +21,11 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/iommu.h> 23#include <linux/iommu.h>
24#include <linux/wait.h>
24#include <linux/pci.h> 25#include <linux/pci.h>
25#include <linux/gfp.h> 26#include <linux/gfp.h>
26 27
28#include "amd_iommu_types.h"
27#include "amd_iommu_proto.h" 29#include "amd_iommu_proto.h"
28 30
29MODULE_LICENSE("GPL v2"); 31MODULE_LICENSE("GPL v2");
@@ -35,6 +37,7 @@ MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
35struct pri_queue { 37struct pri_queue {
36 atomic_t inflight; 38 atomic_t inflight;
37 bool finish; 39 bool finish;
40 int status;
38}; 41};
39 42
40struct pasid_state { 43struct pasid_state {
@@ -45,6 +48,8 @@ struct pasid_state {
45 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ 48 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
46 struct device_state *device_state; /* Link to our device_state */ 49 struct device_state *device_state; /* Link to our device_state */
47 int pasid; /* PASID index */ 50 int pasid; /* PASID index */
51 spinlock_t lock; /* Protect pri_queues */
52 wait_queue_head_t wq; /* To wait for count == 0 */
48}; 53};
49 54
50struct device_state { 55struct device_state {
@@ -55,6 +60,20 @@ struct device_state {
55 int pasid_levels; 60 int pasid_levels;
56 int max_pasids; 61 int max_pasids;
57 spinlock_t lock; 62 spinlock_t lock;
63 wait_queue_head_t wq;
64};
65
66struct fault {
67 struct work_struct work;
68 struct device_state *dev_state;
69 struct pasid_state *state;
70 struct mm_struct *mm;
71 u64 address;
72 u16 devid;
73 u16 pasid;
74 u16 tag;
75 u16 finish;
76 u16 flags;
58}; 77};
59 78
60struct device_state **state_table; 79struct device_state **state_table;
@@ -64,6 +83,8 @@ static spinlock_t state_lock;
64static LIST_HEAD(pasid_state_list); 83static LIST_HEAD(pasid_state_list);
65static DEFINE_SPINLOCK(ps_lock); 84static DEFINE_SPINLOCK(ps_lock);
66 85
86static struct workqueue_struct *iommu_wq;
87
67static void free_pasid_states(struct device_state *dev_state); 88static void free_pasid_states(struct device_state *dev_state);
68static void unbind_pasid(struct device_state *dev_state, int pasid); 89static void unbind_pasid(struct device_state *dev_state, int pasid);
69 90
@@ -109,9 +130,20 @@ static void free_device_state(struct device_state *dev_state)
109static void put_device_state(struct device_state *dev_state) 130static void put_device_state(struct device_state *dev_state)
110{ 131{
111 if (atomic_dec_and_test(&dev_state->count)) 132 if (atomic_dec_and_test(&dev_state->count))
112 free_device_state(dev_state); 133 wake_up(&dev_state->wq);
113} 134}
114 135
136static void put_device_state_wait(struct device_state *dev_state)
137{
138 DEFINE_WAIT(wait);
139
140 prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
141 if (!atomic_dec_and_test(&dev_state->count))
142 schedule();
143 finish_wait(&dev_state->wq, &wait);
144
145 free_device_state(dev_state);
146}
115static void link_pasid_state(struct pasid_state *pasid_state) 147static void link_pasid_state(struct pasid_state *pasid_state)
116{ 148{
117 spin_lock(&ps_lock); 149 spin_lock(&ps_lock);
@@ -242,11 +274,26 @@ static void put_pasid_state(struct pasid_state *pasid_state)
242{ 274{
243 if (atomic_dec_and_test(&pasid_state->count)) { 275 if (atomic_dec_and_test(&pasid_state->count)) {
244 put_device_state(pasid_state->device_state); 276 put_device_state(pasid_state->device_state);
245 mmput(pasid_state->mm); 277 wake_up(&pasid_state->wq);
246 free_pasid_state(pasid_state);
247 } 278 }
248} 279}
249 280
281static void put_pasid_state_wait(struct pasid_state *pasid_state)
282{
283 DEFINE_WAIT(wait);
284
285 prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
286
287 if (atomic_dec_and_test(&pasid_state->count))
288 put_device_state(pasid_state->device_state);
289 else
290 schedule();
291
292 finish_wait(&pasid_state->wq, &wait);
293 mmput(pasid_state->mm);
294 free_pasid_state(pasid_state);
295}
296
250static void unbind_pasid(struct device_state *dev_state, int pasid) 297static void unbind_pasid(struct device_state *dev_state, int pasid)
251{ 298{
252 struct pasid_state *pasid_state; 299 struct pasid_state *pasid_state;
@@ -261,7 +308,7 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)
261 clear_pasid_state(dev_state, pasid); 308 clear_pasid_state(dev_state, pasid);
262 309
263 put_pasid_state(pasid_state); /* Reference taken in this function */ 310 put_pasid_state(pasid_state); /* Reference taken in this function */
264 put_pasid_state(pasid_state); /* Reference taken in bind() function */ 311 put_pasid_state_wait(pasid_state); /* Reference from bind() function */
265} 312}
266 313
267static void free_pasid_states_level1(struct pasid_state **tbl) 314static void free_pasid_states_level1(struct pasid_state **tbl)
@@ -300,8 +347,8 @@ static void free_pasid_states(struct device_state *dev_state)
300 if (pasid_state == NULL) 347 if (pasid_state == NULL)
301 continue; 348 continue;
302 349
303 unbind_pasid(dev_state, i);
304 put_pasid_state(pasid_state); 350 put_pasid_state(pasid_state);
351 unbind_pasid(dev_state, i);
305 } 352 }
306 353
307 if (dev_state->pasid_levels == 2) 354 if (dev_state->pasid_levels == 2)
@@ -314,6 +361,120 @@ static void free_pasid_states(struct device_state *dev_state)
314 free_page((unsigned long)dev_state->states); 361 free_page((unsigned long)dev_state->states);
315} 362}
316 363
364static void set_pri_tag_status(struct pasid_state *pasid_state,
365 u16 tag, int status)
366{
367 unsigned long flags;
368
369 spin_lock_irqsave(&pasid_state->lock, flags);
370 pasid_state->pri[tag].status = status;
371 spin_unlock_irqrestore(&pasid_state->lock, flags);
372}
373
374static void finish_pri_tag(struct device_state *dev_state,
375 struct pasid_state *pasid_state,
376 u16 tag)
377{
378 unsigned long flags;
379
380 spin_lock_irqsave(&pasid_state->lock, flags);
381 if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
382 pasid_state->pri[tag].finish) {
383 amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
384 pasid_state->pri[tag].status, tag);
385 pasid_state->pri[tag].finish = false;
386 pasid_state->pri[tag].status = PPR_SUCCESS;
387 }
388 spin_unlock_irqrestore(&pasid_state->lock, flags);
389}
390
391static void do_fault(struct work_struct *work)
392{
393 struct fault *fault = container_of(work, struct fault, work);
394 int npages, write;
395 struct page *page;
396
397 write = !!(fault->flags & PPR_FAULT_WRITE);
398
399 npages = get_user_pages(fault->state->task, fault->state->mm,
400 fault->address, 1, write, 0, &page, NULL);
401
402 if (npages == 1)
403 put_page(page);
404 else
405 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
406
407 finish_pri_tag(fault->dev_state, fault->state, fault->tag);
408
409 put_pasid_state(fault->state);
410
411 kfree(fault);
412}
413
414static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
415{
416 struct amd_iommu_fault *iommu_fault;
417 struct pasid_state *pasid_state;
418 struct device_state *dev_state;
419 unsigned long flags;
420 struct fault *fault;
421 bool finish;
422 u16 tag;
423 int ret;
424
425 iommu_fault = data;
426 tag = iommu_fault->tag & 0x1ff;
427 finish = (iommu_fault->tag >> 9) & 1;
428
429 ret = NOTIFY_DONE;
430 dev_state = get_device_state(iommu_fault->device_id);
431 if (dev_state == NULL)
432 goto out;
433
434 pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
435 if (pasid_state == NULL) {
436 /* We know the device but not the PASID -> send INVALID */
437 amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
438 PPR_INVALID, tag);
439 goto out_drop_state;
440 }
441
442 spin_lock_irqsave(&pasid_state->lock, flags);
443 atomic_inc(&pasid_state->pri[tag].inflight);
444 if (finish)
445 pasid_state->pri[tag].finish = true;
446 spin_unlock_irqrestore(&pasid_state->lock, flags);
447
448 fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
449 if (fault == NULL) {
450 /* We are OOM - send success and let the device re-fault */
451 finish_pri_tag(dev_state, pasid_state, tag);
452 goto out_drop_state;
453 }
454
455 fault->dev_state = dev_state;
456 fault->address = iommu_fault->address;
457 fault->state = pasid_state;
458 fault->tag = tag;
459 fault->finish = finish;
460 fault->flags = iommu_fault->flags;
461 INIT_WORK(&fault->work, do_fault);
462
463 queue_work(iommu_wq, &fault->work);
464
465 ret = NOTIFY_OK;
466
467out_drop_state:
468 put_device_state(dev_state);
469
470out:
471 return ret;
472}
473
474static struct notifier_block ppr_nb = {
475 .notifier_call = ppr_notifier,
476};
477
317int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, 478int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
318 struct task_struct *task) 479 struct task_struct *task)
319{ 480{
@@ -343,6 +504,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
343 goto out; 504 goto out;
344 505
345 atomic_set(&pasid_state->count, 1); 506 atomic_set(&pasid_state->count, 1);
507 init_waitqueue_head(&pasid_state->wq);
346 pasid_state->task = task; 508 pasid_state->task = task;
347 pasid_state->mm = get_task_mm(task); 509 pasid_state->mm = get_task_mm(task);
348 pasid_state->device_state = dev_state; 510 pasid_state->device_state = dev_state;
@@ -368,7 +530,7 @@ out_clear_state:
368 clear_pasid_state(dev_state, pasid); 530 clear_pasid_state(dev_state, pasid);
369 531
370out_free: 532out_free:
371 put_pasid_state(pasid_state); 533 free_pasid_state(pasid_state);
372 534
373out: 535out:
374 put_device_state(dev_state); 536 put_device_state(dev_state);
@@ -424,6 +586,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
424 return -ENOMEM; 586 return -ENOMEM;
425 587
426 spin_lock_init(&dev_state->lock); 588 spin_lock_init(&dev_state->lock);
589 init_waitqueue_head(&dev_state->wq);
427 dev_state->pdev = pdev; 590 dev_state->pdev = pdev;
428 591
429 tmp = pasids; 592 tmp = pasids;
@@ -505,13 +668,14 @@ void amd_iommu_free_device(struct pci_dev *pdev)
505 /* Get rid of any remaining pasid states */ 668 /* Get rid of any remaining pasid states */
506 free_pasid_states(dev_state); 669 free_pasid_states(dev_state);
507 670
508 put_device_state(dev_state); 671 put_device_state_wait(dev_state);
509} 672}
510EXPORT_SYMBOL(amd_iommu_free_device); 673EXPORT_SYMBOL(amd_iommu_free_device);
511 674
512static int __init amd_iommu_v2_init(void) 675static int __init amd_iommu_v2_init(void)
513{ 676{
514 size_t state_table_size; 677 size_t state_table_size;
678 int ret;
515 679
516 pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>"); 680 pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
517 681
@@ -523,7 +687,21 @@ static int __init amd_iommu_v2_init(void)
523 if (state_table == NULL) 687 if (state_table == NULL)
524 return -ENOMEM; 688 return -ENOMEM;
525 689
690 ret = -ENOMEM;
691 iommu_wq = create_workqueue("amd_iommu_v2");
692 if (iommu_wq == NULL) {
693 ret = -ENOMEM;
694 goto out_free;
695 }
696
697 amd_iommu_register_ppr_notifier(&ppr_nb);
698
526 return 0; 699 return 0;
700
701out_free:
702 free_pages((unsigned long)state_table, get_order(state_table_size));
703
704 return ret;
527} 705}
528 706
529static void __exit amd_iommu_v2_exit(void) 707static void __exit amd_iommu_v2_exit(void)
@@ -532,6 +710,14 @@ static void __exit amd_iommu_v2_exit(void)
532 size_t state_table_size; 710 size_t state_table_size;
533 int i; 711 int i;
534 712
713 amd_iommu_unregister_ppr_notifier(&ppr_nb);
714
715 flush_workqueue(iommu_wq);
716
717 /*
718 * The loop below might call flush_workqueue(), so call
719 * destroy_workqueue() after it
720 */
535 for (i = 0; i < MAX_DEVICES; ++i) { 721 for (i = 0; i < MAX_DEVICES; ++i) {
536 dev_state = get_device_state(i); 722 dev_state = get_device_state(i);
537 723
@@ -540,10 +726,12 @@ static void __exit amd_iommu_v2_exit(void)
540 726
541 WARN_ON_ONCE(1); 727 WARN_ON_ONCE(1);
542 728
543 amd_iommu_free_device(dev_state->pdev);
544 put_device_state(dev_state); 729 put_device_state(dev_state);
730 amd_iommu_free_device(dev_state->pdev);
545 } 731 }
546 732
733 destroy_workqueue(iommu_wq);
734
547 state_table_size = MAX_DEVICES * sizeof(struct device_state *); 735 state_table_size = MAX_DEVICES * sizeof(struct device_state *);
548 free_pages((unsigned long)state_table, get_order(state_table_size)); 736 free_pages((unsigned long)state_table, get_order(state_table_size));
549} 737}