aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-03-14 19:51:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-03-14 19:51:33 -0400
commitfed8f5090ca647b7cf37c21f92053e517526133a (patch)
tree4cd2a81f2aa42c7636b263f02560b1b7b1af49f4
parent3e04040df6d4613a8af5a80882d5f7f298f49810 (diff)
parent74b44bbe80b4c62113ac1501482ea1ee40eb9d67 (diff)
Merge branch 'percpu_ref-rcu-audit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc
Pull percpu_ref rcu fixes from Tejun Heo: "Jann Horn found that aio was depending on the internal RCU grace periods of percpu-ref and that it's broken because aio uses regular RCU while percpu_ref uses sched-RCU. Depending on percpu_ref's internal grace periods isn't a good idea because - The RCU type might not match. - percpu_ref's grace periods are used to switch to atomic mode. They aren't between the last put and the invocation of the last release. This is easy to get confused about and can lead to subtle bugs. - percpu_ref might not have grace periods at all depending on its current operation mode. This patchset audits and fixes percpu_ref users for their RCU usages" [ There's a continuation of this series that clarifies percpu_ref documentation that the internal grace periods must not be depended upon, and introduces rcu_work to simplify bouncing to a workqueue after an RCU grace period. That will go in for 4.17 - this is just the minimal set with the fixes that are tagged for -stable ] * 'percpu_ref-rcu-audit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc: RDMAVT: Fix synchronization around percpu_ref fs/aio: Use RCU accessors for kioctx_table->table[] fs/aio: Add explicit RCU grace period when freeing kioctx
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c10
-rw-r--r--fs/aio.c44
2 files changed, 36 insertions, 18 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 1b2e5362a3ff..cc429b567d0a 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
489 unsigned long timeout; 489 unsigned long timeout;
490 struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); 490 struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
491 491
492 if (percpu_ref_is_zero(&mr->refcount)) 492 if (mr->lkey) {
493 return 0; 493 /* avoid dma mr */
494 /* avoid dma mr */
495 if (mr->lkey)
496 rvt_dereg_clean_qps(mr); 494 rvt_dereg_clean_qps(mr);
495 /* @mr was indexed on rcu protected @lkey_table */
496 synchronize_rcu();
497 }
498
497 timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); 499 timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
498 if (!timeout) { 500 if (!timeout) {
499 rvt_pr_err(rdi, 501 rvt_pr_err(rdi,
diff --git a/fs/aio.c b/fs/aio.c
index a062d75109cb..6bcd3fb5265a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
68#define AIO_RING_PAGES 8 68#define AIO_RING_PAGES 8
69 69
70struct kioctx_table { 70struct kioctx_table {
71 struct rcu_head rcu; 71 struct rcu_head rcu;
72 unsigned nr; 72 unsigned nr;
73 struct kioctx *table[]; 73 struct kioctx __rcu *table[];
74}; 74};
75 75
76struct kioctx_cpu { 76struct kioctx_cpu {
@@ -115,7 +115,8 @@ struct kioctx {
115 struct page **ring_pages; 115 struct page **ring_pages;
116 long nr_pages; 116 long nr_pages;
117 117
118 struct work_struct free_work; 118 struct rcu_head free_rcu;
119 struct work_struct free_work; /* see free_ioctx() */
119 120
120 /* 121 /*
121 * signals when all in-flight requests are done 122 * signals when all in-flight requests are done
@@ -329,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
329 for (i = 0; i < table->nr; i++) { 330 for (i = 0; i < table->nr; i++) {
330 struct kioctx *ctx; 331 struct kioctx *ctx;
331 332
332 ctx = table->table[i]; 333 ctx = rcu_dereference(table->table[i]);
333 if (ctx && ctx->aio_ring_file == file) { 334 if (ctx && ctx->aio_ring_file == file) {
334 if (!atomic_read(&ctx->dead)) { 335 if (!atomic_read(&ctx->dead)) {
335 ctx->user_id = ctx->mmap_base = vma->vm_start; 336 ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
588 return cancel(&kiocb->common); 589 return cancel(&kiocb->common);
589} 590}
590 591
592/*
593 * free_ioctx() should be RCU delayed to synchronize against the RCU
594 * protected lookup_ioctx() and also needs process context to call
595 * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
596 * ->free_work.
597 */
591static void free_ioctx(struct work_struct *work) 598static void free_ioctx(struct work_struct *work)
592{ 599{
593 struct kioctx *ctx = container_of(work, struct kioctx, free_work); 600 struct kioctx *ctx = container_of(work, struct kioctx, free_work);
@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work)
601 kmem_cache_free(kioctx_cachep, ctx); 608 kmem_cache_free(kioctx_cachep, ctx);
602} 609}
603 610
611static void free_ioctx_rcufn(struct rcu_head *head)
612{
613 struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
614
615 INIT_WORK(&ctx->free_work, free_ioctx);
616 schedule_work(&ctx->free_work);
617}
618
604static void free_ioctx_reqs(struct percpu_ref *ref) 619static void free_ioctx_reqs(struct percpu_ref *ref)
605{ 620{
606 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 621 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
609 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) 624 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
610 complete(&ctx->rq_wait->comp); 625 complete(&ctx->rq_wait->comp);
611 626
612 INIT_WORK(&ctx->free_work, free_ioctx); 627 /* Synchronize against RCU protected table->table[] dereferences */
613 schedule_work(&ctx->free_work); 628 call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
614} 629}
615 630
616/* 631/*
@@ -651,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
651 while (1) { 666 while (1) {
652 if (table) 667 if (table)
653 for (i = 0; i < table->nr; i++) 668 for (i = 0; i < table->nr; i++)
654 if (!table->table[i]) { 669 if (!rcu_access_pointer(table->table[i])) {
655 ctx->id = i; 670 ctx->id = i;
656 table->table[i] = ctx; 671 rcu_assign_pointer(table->table[i], ctx);
657 spin_unlock(&mm->ioctx_lock); 672 spin_unlock(&mm->ioctx_lock);
658 673
659 /* While kioctx setup is in progress, 674 /* While kioctx setup is in progress,
@@ -834,11 +849,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
834 } 849 }
835 850
836 table = rcu_dereference_raw(mm->ioctx_table); 851 table = rcu_dereference_raw(mm->ioctx_table);
837 WARN_ON(ctx != table->table[ctx->id]); 852 WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
838 table->table[ctx->id] = NULL; 853 RCU_INIT_POINTER(table->table[ctx->id], NULL);
839 spin_unlock(&mm->ioctx_lock); 854 spin_unlock(&mm->ioctx_lock);
840 855
841 /* percpu_ref_kill() will do the necessary call_rcu() */ 856 /* free_ioctx_reqs() will do the necessary RCU synchronization */
842 wake_up_all(&ctx->wait); 857 wake_up_all(&ctx->wait);
843 858
844 /* 859 /*
@@ -880,7 +895,8 @@ void exit_aio(struct mm_struct *mm)
880 895
881 skipped = 0; 896 skipped = 0;
882 for (i = 0; i < table->nr; ++i) { 897 for (i = 0; i < table->nr; ++i) {
883 struct kioctx *ctx = table->table[i]; 898 struct kioctx *ctx =
899 rcu_dereference_protected(table->table[i], true);
884 900
885 if (!ctx) { 901 if (!ctx) {
886 skipped++; 902 skipped++;
@@ -1069,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
1069 if (!table || id >= table->nr) 1085 if (!table || id >= table->nr)
1070 goto out; 1086 goto out;
1071 1087
1072 ctx = table->table[id]; 1088 ctx = rcu_dereference(table->table[id]);
1073 if (ctx && ctx->user_id == ctx_id) { 1089 if (ctx && ctx->user_id == ctx_id) {
1074 percpu_ref_get(&ctx->users); 1090 percpu_ref_get(&ctx->users);
1075 ret = ctx; 1091 ret = ctx;