diff options
-rw-r--r-- | fs/aio.c | 136 | ||||
-rw-r--r-- | include/linux/mm_types.h | 5 | ||||
-rw-r--r-- | kernel/fork.c | 2 |
3 files changed, 118 insertions, 25 deletions
@@ -66,6 +66,12 @@ struct aio_ring { | |||
66 | 66 | ||
67 | #define AIO_RING_PAGES 8 | 67 | #define AIO_RING_PAGES 8 |
68 | 68 | ||
69 | struct kioctx_table { | ||
70 | struct rcu_head rcu; | ||
71 | unsigned nr; | ||
72 | struct kioctx *table[]; | ||
73 | }; | ||
74 | |||
69 | struct kioctx_cpu { | 75 | struct kioctx_cpu { |
70 | unsigned reqs_available; | 76 | unsigned reqs_available; |
71 | }; | 77 | }; |
@@ -74,9 +80,7 @@ struct kioctx { | |||
74 | struct percpu_ref users; | 80 | struct percpu_ref users; |
75 | atomic_t dead; | 81 | atomic_t dead; |
76 | 82 | ||
77 | /* This needs improving */ | ||
78 | unsigned long user_id; | 83 | unsigned long user_id; |
79 | struct hlist_node list; | ||
80 | 84 | ||
81 | struct __percpu kioctx_cpu *cpu; | 85 | struct __percpu kioctx_cpu *cpu; |
82 | 86 | ||
@@ -135,6 +139,8 @@ struct kioctx { | |||
135 | 139 | ||
136 | struct page *internal_pages[AIO_RING_PAGES]; | 140 | struct page *internal_pages[AIO_RING_PAGES]; |
137 | struct file *aio_ring_file; | 141 | struct file *aio_ring_file; |
142 | |||
143 | unsigned id; | ||
138 | }; | 144 | }; |
139 | 145 | ||
140 | /*------ sysctl variables----*/ | 146 | /*------ sysctl variables----*/ |
@@ -326,7 +332,7 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
326 | 332 | ||
327 | ring = kmap_atomic(ctx->ring_pages[0]); | 333 | ring = kmap_atomic(ctx->ring_pages[0]); |
328 | ring->nr = nr_events; /* user copy */ | 334 | ring->nr = nr_events; /* user copy */ |
329 | ring->id = ctx->user_id; | 335 | ring->id = ~0U; |
330 | ring->head = ring->tail = 0; | 336 | ring->head = ring->tail = 0; |
331 | ring->magic = AIO_RING_MAGIC; | 337 | ring->magic = AIO_RING_MAGIC; |
332 | ring->compat_features = AIO_RING_COMPAT_FEATURES; | 338 | ring->compat_features = AIO_RING_COMPAT_FEATURES; |
@@ -462,6 +468,58 @@ static void free_ioctx_ref(struct percpu_ref *ref) | |||
462 | schedule_work(&ctx->free_work); | 468 | schedule_work(&ctx->free_work); |
463 | } | 469 | } |
464 | 470 | ||
471 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | ||
472 | { | ||
473 | unsigned i, new_nr; | ||
474 | struct kioctx_table *table, *old; | ||
475 | struct aio_ring *ring; | ||
476 | |||
477 | spin_lock(&mm->ioctx_lock); | ||
478 | table = rcu_dereference(mm->ioctx_table); | ||
479 | |||
480 | while (1) { | ||
481 | if (table) | ||
482 | for (i = 0; i < table->nr; i++) | ||
483 | if (!table->table[i]) { | ||
484 | ctx->id = i; | ||
485 | table->table[i] = ctx; | ||
486 | spin_unlock(&mm->ioctx_lock); | ||
487 | |||
488 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
489 | ring->id = ctx->id; | ||
490 | kunmap_atomic(ring); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | new_nr = (table ? table->nr : 1) * 4; | ||
495 | |||
496 | spin_unlock(&mm->ioctx_lock); | ||
497 | |||
498 | table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * | ||
499 | new_nr, GFP_KERNEL); | ||
500 | if (!table) | ||
501 | return -ENOMEM; | ||
502 | |||
503 | table->nr = new_nr; | ||
504 | |||
505 | spin_lock(&mm->ioctx_lock); | ||
506 | old = rcu_dereference(mm->ioctx_table); | ||
507 | |||
508 | if (!old) { | ||
509 | rcu_assign_pointer(mm->ioctx_table, table); | ||
510 | } else if (table->nr > old->nr) { | ||
511 | memcpy(table->table, old->table, | ||
512 | old->nr * sizeof(struct kioctx *)); | ||
513 | |||
514 | rcu_assign_pointer(mm->ioctx_table, table); | ||
515 | kfree_rcu(old, rcu); | ||
516 | } else { | ||
517 | kfree(table); | ||
518 | table = old; | ||
519 | } | ||
520 | } | ||
521 | } | ||
522 | |||
465 | /* ioctx_alloc | 523 | /* ioctx_alloc |
466 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. | 524 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. |
467 | */ | 525 | */ |
@@ -520,6 +578,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
520 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); | 578 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); |
521 | BUG_ON(!ctx->req_batch); | 579 | BUG_ON(!ctx->req_batch); |
522 | 580 | ||
581 | err = ioctx_add_table(ctx, mm); | ||
582 | if (err) | ||
583 | goto out_cleanup_noerr; | ||
584 | |||
523 | /* limit the number of system wide aios */ | 585 | /* limit the number of system wide aios */ |
524 | spin_lock(&aio_nr_lock); | 586 | spin_lock(&aio_nr_lock); |
525 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || | 587 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || |
@@ -532,17 +594,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
532 | 594 | ||
533 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ | 595 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ |
534 | 596 | ||
535 | /* now link into global list. */ | ||
536 | spin_lock(&mm->ioctx_lock); | ||
537 | hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); | ||
538 | spin_unlock(&mm->ioctx_lock); | ||
539 | |||
540 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", | 597 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", |
541 | ctx, ctx->user_id, mm, ctx->nr_events); | 598 | ctx, ctx->user_id, mm, ctx->nr_events); |
542 | return ctx; | 599 | return ctx; |
543 | 600 | ||
544 | out_cleanup: | 601 | out_cleanup: |
545 | err = -EAGAIN; | 602 | err = -EAGAIN; |
603 | out_cleanup_noerr: | ||
546 | aio_free_ring(ctx); | 604 | aio_free_ring(ctx); |
547 | out_freepcpu: | 605 | out_freepcpu: |
548 | free_percpu(ctx->cpu); | 606 | free_percpu(ctx->cpu); |
@@ -561,10 +619,18 @@ out_freectx: | |||
561 | * when the processes owning a context have all exited to encourage | 619 | * when the processes owning a context have all exited to encourage |
562 | * the rapid destruction of the kioctx. | 620 | * the rapid destruction of the kioctx. |
563 | */ | 621 | */ |
564 | static void kill_ioctx(struct kioctx *ctx) | 622 | static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) |
565 | { | 623 | { |
566 | if (!atomic_xchg(&ctx->dead, 1)) { | 624 | if (!atomic_xchg(&ctx->dead, 1)) { |
567 | hlist_del_rcu(&ctx->list); | 625 | struct kioctx_table *table; |
626 | |||
627 | spin_lock(&mm->ioctx_lock); | ||
628 | table = rcu_dereference(mm->ioctx_table); | ||
629 | |||
630 | WARN_ON(ctx != table->table[ctx->id]); | ||
631 | table->table[ctx->id] = NULL; | ||
632 | spin_unlock(&mm->ioctx_lock); | ||
633 | |||
568 | /* percpu_ref_kill() will do the necessary call_rcu() */ | 634 | /* percpu_ref_kill() will do the necessary call_rcu() */ |
569 | wake_up_all(&ctx->wait); | 635 | wake_up_all(&ctx->wait); |
570 | 636 | ||
@@ -613,10 +679,28 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); | |||
613 | */ | 679 | */ |
614 | void exit_aio(struct mm_struct *mm) | 680 | void exit_aio(struct mm_struct *mm) |
615 | { | 681 | { |
682 | struct kioctx_table *table; | ||
616 | struct kioctx *ctx; | 683 | struct kioctx *ctx; |
617 | struct hlist_node *n; | 684 | unsigned i = 0; |
685 | |||
686 | while (1) { | ||
687 | rcu_read_lock(); | ||
688 | table = rcu_dereference(mm->ioctx_table); | ||
689 | |||
690 | do { | ||
691 | if (!table || i >= table->nr) { | ||
692 | rcu_read_unlock(); | ||
693 | rcu_assign_pointer(mm->ioctx_table, NULL); | ||
694 | if (table) | ||
695 | kfree(table); | ||
696 | return; | ||
697 | } | ||
698 | |||
699 | ctx = table->table[i++]; | ||
700 | } while (!ctx); | ||
701 | |||
702 | rcu_read_unlock(); | ||
618 | 703 | ||
619 | hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { | ||
620 | /* | 704 | /* |
621 | * We don't need to bother with munmap() here - | 705 | * We don't need to bother with munmap() here - |
622 | * exit_mmap(mm) is coming and it'll unmap everything. | 706 | * exit_mmap(mm) is coming and it'll unmap everything. |
@@ -627,7 +711,7 @@ void exit_aio(struct mm_struct *mm) | |||
627 | */ | 711 | */ |
628 | ctx->mmap_size = 0; | 712 | ctx->mmap_size = 0; |
629 | 713 | ||
630 | kill_ioctx(ctx); | 714 | kill_ioctx(mm, ctx); |
631 | } | 715 | } |
632 | } | 716 | } |
633 | 717 | ||
@@ -710,19 +794,27 @@ static void kiocb_free(struct kiocb *req) | |||
710 | 794 | ||
711 | static struct kioctx *lookup_ioctx(unsigned long ctx_id) | 795 | static struct kioctx *lookup_ioctx(unsigned long ctx_id) |
712 | { | 796 | { |
797 | struct aio_ring __user *ring = (void __user *)ctx_id; | ||
713 | struct mm_struct *mm = current->mm; | 798 | struct mm_struct *mm = current->mm; |
714 | struct kioctx *ctx, *ret = NULL; | 799 | struct kioctx *ctx, *ret = NULL; |
800 | struct kioctx_table *table; | ||
801 | unsigned id; | ||
802 | |||
803 | if (get_user(id, &ring->id)) | ||
804 | return NULL; | ||
715 | 805 | ||
716 | rcu_read_lock(); | 806 | rcu_read_lock(); |
807 | table = rcu_dereference(mm->ioctx_table); | ||
717 | 808 | ||
718 | hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { | 809 | if (!table || id >= table->nr) |
719 | if (ctx->user_id == ctx_id) { | 810 | goto out; |
720 | percpu_ref_get(&ctx->users); | ||
721 | ret = ctx; | ||
722 | break; | ||
723 | } | ||
724 | } | ||
725 | 811 | ||
812 | ctx = table->table[id]; | ||
813 | if (ctx->user_id == ctx_id) { | ||
814 | percpu_ref_get(&ctx->users); | ||
815 | ret = ctx; | ||
816 | } | ||
817 | out: | ||
726 | rcu_read_unlock(); | 818 | rcu_read_unlock(); |
727 | return ret; | 819 | return ret; |
728 | } | 820 | } |
@@ -998,7 +1090,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) | |||
998 | if (!IS_ERR(ioctx)) { | 1090 | if (!IS_ERR(ioctx)) { |
999 | ret = put_user(ioctx->user_id, ctxp); | 1091 | ret = put_user(ioctx->user_id, ctxp); |
1000 | if (ret) | 1092 | if (ret) |
1001 | kill_ioctx(ioctx); | 1093 | kill_ioctx(current->mm, ioctx); |
1002 | percpu_ref_put(&ioctx->users); | 1094 | percpu_ref_put(&ioctx->users); |
1003 | } | 1095 | } |
1004 | 1096 | ||
@@ -1016,7 +1108,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
1016 | { | 1108 | { |
1017 | struct kioctx *ioctx = lookup_ioctx(ctx); | 1109 | struct kioctx *ioctx = lookup_ioctx(ctx); |
1018 | if (likely(NULL != ioctx)) { | 1110 | if (likely(NULL != ioctx)) { |
1019 | kill_ioctx(ioctx); | 1111 | kill_ioctx(current->mm, ioctx); |
1020 | percpu_ref_put(&ioctx->users); | 1112 | percpu_ref_put(&ioctx->users); |
1021 | return 0; | 1113 | return 0; |
1022 | } | 1114 | } |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fb425aa16c01..da8cf5cc1aa6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -322,6 +322,7 @@ struct mm_rss_stat { | |||
322 | atomic_long_t count[NR_MM_COUNTERS]; | 322 | atomic_long_t count[NR_MM_COUNTERS]; |
323 | }; | 323 | }; |
324 | 324 | ||
325 | struct kioctx_table; | ||
325 | struct mm_struct { | 326 | struct mm_struct { |
326 | struct vm_area_struct * mmap; /* list of VMAs */ | 327 | struct vm_area_struct * mmap; /* list of VMAs */ |
327 | struct rb_root mm_rb; | 328 | struct rb_root mm_rb; |
@@ -382,8 +383,8 @@ struct mm_struct { | |||
382 | 383 | ||
383 | struct core_state *core_state; /* coredumping support */ | 384 | struct core_state *core_state; /* coredumping support */ |
384 | #ifdef CONFIG_AIO | 385 | #ifdef CONFIG_AIO |
385 | spinlock_t ioctx_lock; | 386 | spinlock_t ioctx_lock; |
386 | struct hlist_head ioctx_list; | 387 | struct kioctx_table __rcu *ioctx_table; |
387 | #endif | 388 | #endif |
388 | #ifdef CONFIG_MM_OWNER | 389 | #ifdef CONFIG_MM_OWNER |
389 | /* | 390 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 66635c80a813..db5f541c5488 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -522,7 +522,7 @@ static void mm_init_aio(struct mm_struct *mm) | |||
522 | { | 522 | { |
523 | #ifdef CONFIG_AIO | 523 | #ifdef CONFIG_AIO |
524 | spin_lock_init(&mm->ioctx_lock); | 524 | spin_lock_init(&mm->ioctx_lock); |
525 | INIT_HLIST_HEAD(&mm->ioctx_list); | 525 | mm->ioctx_table = NULL; |
526 | #endif | 526 | #endif |
527 | } | 527 | } |
528 | 528 | ||