aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/aio.c136
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--kernel/fork.c2
3 files changed, 118 insertions, 25 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 945dd0d072f3..52f200ebef07 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -66,6 +66,12 @@ struct aio_ring {
66 66
67#define AIO_RING_PAGES 8 67#define AIO_RING_PAGES 8
68 68
69struct kioctx_table {
70 struct rcu_head rcu;
71 unsigned nr;
72 struct kioctx *table[];
73};
74
69struct kioctx_cpu { 75struct kioctx_cpu {
70 unsigned reqs_available; 76 unsigned reqs_available;
71}; 77};
@@ -74,9 +80,7 @@ struct kioctx {
74 struct percpu_ref users; 80 struct percpu_ref users;
75 atomic_t dead; 81 atomic_t dead;
76 82
77 /* This needs improving */
78 unsigned long user_id; 83 unsigned long user_id;
79 struct hlist_node list;
80 84
81 struct __percpu kioctx_cpu *cpu; 85 struct __percpu kioctx_cpu *cpu;
82 86
@@ -135,6 +139,8 @@ struct kioctx {
135 139
136 struct page *internal_pages[AIO_RING_PAGES]; 140 struct page *internal_pages[AIO_RING_PAGES];
137 struct file *aio_ring_file; 141 struct file *aio_ring_file;
142
143 unsigned id;
138}; 144};
139 145
140/*------ sysctl variables----*/ 146/*------ sysctl variables----*/
@@ -326,7 +332,7 @@ static int aio_setup_ring(struct kioctx *ctx)
326 332
327 ring = kmap_atomic(ctx->ring_pages[0]); 333 ring = kmap_atomic(ctx->ring_pages[0]);
328 ring->nr = nr_events; /* user copy */ 334 ring->nr = nr_events; /* user copy */
329 ring->id = ctx->user_id; 335 ring->id = ~0U;
330 ring->head = ring->tail = 0; 336 ring->head = ring->tail = 0;
331 ring->magic = AIO_RING_MAGIC; 337 ring->magic = AIO_RING_MAGIC;
332 ring->compat_features = AIO_RING_COMPAT_FEATURES; 338 ring->compat_features = AIO_RING_COMPAT_FEATURES;
@@ -462,6 +468,58 @@ static void free_ioctx_ref(struct percpu_ref *ref)
462 schedule_work(&ctx->free_work); 468 schedule_work(&ctx->free_work);
463} 469}
464 470
471static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
472{
473 unsigned i, new_nr;
474 struct kioctx_table *table, *old;
475 struct aio_ring *ring;
476
477 spin_lock(&mm->ioctx_lock);
478 table = rcu_dereference(mm->ioctx_table);
479
480 while (1) {
481 if (table)
482 for (i = 0; i < table->nr; i++)
483 if (!table->table[i]) {
484 ctx->id = i;
485 table->table[i] = ctx;
486 spin_unlock(&mm->ioctx_lock);
487
488 ring = kmap_atomic(ctx->ring_pages[0]);
489 ring->id = ctx->id;
490 kunmap_atomic(ring);
491 return 0;
492 }
493
494 new_nr = (table ? table->nr : 1) * 4;
495
496 spin_unlock(&mm->ioctx_lock);
497
498 table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
499 new_nr, GFP_KERNEL);
500 if (!table)
501 return -ENOMEM;
502
503 table->nr = new_nr;
504
505 spin_lock(&mm->ioctx_lock);
506 old = rcu_dereference(mm->ioctx_table);
507
508 if (!old) {
509 rcu_assign_pointer(mm->ioctx_table, table);
510 } else if (table->nr > old->nr) {
511 memcpy(table->table, old->table,
512 old->nr * sizeof(struct kioctx *));
513
514 rcu_assign_pointer(mm->ioctx_table, table);
515 kfree_rcu(old, rcu);
516 } else {
517 kfree(table);
518 table = old;
519 }
520 }
521}
522
465/* ioctx_alloc 523/* ioctx_alloc
466 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 524 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
467 */ 525 */
@@ -520,6 +578,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
520 ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); 578 ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
521 BUG_ON(!ctx->req_batch); 579 BUG_ON(!ctx->req_batch);
522 580
581 err = ioctx_add_table(ctx, mm);
582 if (err)
583 goto out_cleanup_noerr;
584
523 /* limit the number of system wide aios */ 585 /* limit the number of system wide aios */
524 spin_lock(&aio_nr_lock); 586 spin_lock(&aio_nr_lock);
525 if (aio_nr + nr_events > (aio_max_nr * 2UL) || 587 if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
@@ -532,17 +594,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
532 594
533 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ 595 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
534 596
535 /* now link into global list. */
536 spin_lock(&mm->ioctx_lock);
537 hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
538 spin_unlock(&mm->ioctx_lock);
539
540 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 597 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
541 ctx, ctx->user_id, mm, ctx->nr_events); 598 ctx, ctx->user_id, mm, ctx->nr_events);
542 return ctx; 599 return ctx;
543 600
544out_cleanup: 601out_cleanup:
545 err = -EAGAIN; 602 err = -EAGAIN;
603out_cleanup_noerr:
546 aio_free_ring(ctx); 604 aio_free_ring(ctx);
547out_freepcpu: 605out_freepcpu:
548 free_percpu(ctx->cpu); 606 free_percpu(ctx->cpu);
@@ -561,10 +619,18 @@ out_freectx:
561 * when the processes owning a context have all exited to encourage 619 * when the processes owning a context have all exited to encourage
562 * the rapid destruction of the kioctx. 620 * the rapid destruction of the kioctx.
563 */ 621 */
564static void kill_ioctx(struct kioctx *ctx) 622static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
565{ 623{
566 if (!atomic_xchg(&ctx->dead, 1)) { 624 if (!atomic_xchg(&ctx->dead, 1)) {
567 hlist_del_rcu(&ctx->list); 625 struct kioctx_table *table;
626
627 spin_lock(&mm->ioctx_lock);
628 table = rcu_dereference(mm->ioctx_table);
629
630 WARN_ON(ctx != table->table[ctx->id]);
631 table->table[ctx->id] = NULL;
632 spin_unlock(&mm->ioctx_lock);
633
568 /* percpu_ref_kill() will do the necessary call_rcu() */ 634 /* percpu_ref_kill() will do the necessary call_rcu() */
569 wake_up_all(&ctx->wait); 635 wake_up_all(&ctx->wait);
570 636
@@ -613,10 +679,28 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
613 */ 679 */
614void exit_aio(struct mm_struct *mm) 680void exit_aio(struct mm_struct *mm)
615{ 681{
682 struct kioctx_table *table;
616 struct kioctx *ctx; 683 struct kioctx *ctx;
617 struct hlist_node *n; 684 unsigned i = 0;
685
686 while (1) {
687 rcu_read_lock();
688 table = rcu_dereference(mm->ioctx_table);
689
690 do {
691 if (!table || i >= table->nr) {
692 rcu_read_unlock();
693 rcu_assign_pointer(mm->ioctx_table, NULL);
694 if (table)
695 kfree(table);
696 return;
697 }
698
699 ctx = table->table[i++];
700 } while (!ctx);
701
702 rcu_read_unlock();
618 703
619 hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
620 /* 704 /*
621 * We don't need to bother with munmap() here - 705 * We don't need to bother with munmap() here -
622 * exit_mmap(mm) is coming and it'll unmap everything. 706 * exit_mmap(mm) is coming and it'll unmap everything.
@@ -627,7 +711,7 @@ void exit_aio(struct mm_struct *mm)
627 */ 711 */
628 ctx->mmap_size = 0; 712 ctx->mmap_size = 0;
629 713
630 kill_ioctx(ctx); 714 kill_ioctx(mm, ctx);
631 } 715 }
632} 716}
633 717
@@ -710,19 +794,27 @@ static void kiocb_free(struct kiocb *req)
710 794
711static struct kioctx *lookup_ioctx(unsigned long ctx_id) 795static struct kioctx *lookup_ioctx(unsigned long ctx_id)
712{ 796{
797 struct aio_ring __user *ring = (void __user *)ctx_id;
713 struct mm_struct *mm = current->mm; 798 struct mm_struct *mm = current->mm;
714 struct kioctx *ctx, *ret = NULL; 799 struct kioctx *ctx, *ret = NULL;
800 struct kioctx_table *table;
801 unsigned id;
802
803 if (get_user(id, &ring->id))
804 return NULL;
715 805
716 rcu_read_lock(); 806 rcu_read_lock();
807 table = rcu_dereference(mm->ioctx_table);
717 808
718 hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { 809 if (!table || id >= table->nr)
719 if (ctx->user_id == ctx_id) { 810 goto out;
720 percpu_ref_get(&ctx->users);
721 ret = ctx;
722 break;
723 }
724 }
725 811
812 ctx = table->table[id];
813 if (ctx->user_id == ctx_id) {
814 percpu_ref_get(&ctx->users);
815 ret = ctx;
816 }
817out:
726 rcu_read_unlock(); 818 rcu_read_unlock();
727 return ret; 819 return ret;
728} 820}
@@ -998,7 +1090,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
998 if (!IS_ERR(ioctx)) { 1090 if (!IS_ERR(ioctx)) {
999 ret = put_user(ioctx->user_id, ctxp); 1091 ret = put_user(ioctx->user_id, ctxp);
1000 if (ret) 1092 if (ret)
1001 kill_ioctx(ioctx); 1093 kill_ioctx(current->mm, ioctx);
1002 percpu_ref_put(&ioctx->users); 1094 percpu_ref_put(&ioctx->users);
1003 } 1095 }
1004 1096
@@ -1016,7 +1108,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1016{ 1108{
1017 struct kioctx *ioctx = lookup_ioctx(ctx); 1109 struct kioctx *ioctx = lookup_ioctx(ctx);
1018 if (likely(NULL != ioctx)) { 1110 if (likely(NULL != ioctx)) {
1019 kill_ioctx(ioctx); 1111 kill_ioctx(current->mm, ioctx);
1020 percpu_ref_put(&ioctx->users); 1112 percpu_ref_put(&ioctx->users);
1021 return 0; 1113 return 0;
1022 } 1114 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fb425aa16c01..da8cf5cc1aa6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -322,6 +322,7 @@ struct mm_rss_stat {
322 atomic_long_t count[NR_MM_COUNTERS]; 322 atomic_long_t count[NR_MM_COUNTERS];
323}; 323};
324 324
325struct kioctx_table;
325struct mm_struct { 326struct mm_struct {
326 struct vm_area_struct * mmap; /* list of VMAs */ 327 struct vm_area_struct * mmap; /* list of VMAs */
327 struct rb_root mm_rb; 328 struct rb_root mm_rb;
@@ -382,8 +383,8 @@ struct mm_struct {
382 383
383 struct core_state *core_state; /* coredumping support */ 384 struct core_state *core_state; /* coredumping support */
384#ifdef CONFIG_AIO 385#ifdef CONFIG_AIO
385 spinlock_t ioctx_lock; 386 spinlock_t ioctx_lock;
386 struct hlist_head ioctx_list; 387 struct kioctx_table __rcu *ioctx_table;
387#endif 388#endif
388#ifdef CONFIG_MM_OWNER 389#ifdef CONFIG_MM_OWNER
389 /* 390 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index 66635c80a813..db5f541c5488 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -522,7 +522,7 @@ static void mm_init_aio(struct mm_struct *mm)
522{ 522{
523#ifdef CONFIG_AIO 523#ifdef CONFIG_AIO
524 spin_lock_init(&mm->ioctx_lock); 524 spin_lock_init(&mm->ioctx_lock);
525 INIT_HLIST_HEAD(&mm->ioctx_list); 525 mm->ioctx_table = NULL;
526#endif 526#endif
527} 527}
528 528