diff options
| -rw-r--r-- | fs/aio.c | 136 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 5 | ||||
| -rw-r--r-- | kernel/fork.c | 2 |
3 files changed, 118 insertions, 25 deletions
| @@ -66,6 +66,12 @@ struct aio_ring { | |||
| 66 | 66 | ||
| 67 | #define AIO_RING_PAGES 8 | 67 | #define AIO_RING_PAGES 8 |
| 68 | 68 | ||
| 69 | struct kioctx_table { | ||
| 70 | struct rcu_head rcu; | ||
| 71 | unsigned nr; | ||
| 72 | struct kioctx *table[]; | ||
| 73 | }; | ||
| 74 | |||
| 69 | struct kioctx_cpu { | 75 | struct kioctx_cpu { |
| 70 | unsigned reqs_available; | 76 | unsigned reqs_available; |
| 71 | }; | 77 | }; |
| @@ -74,9 +80,7 @@ struct kioctx { | |||
| 74 | struct percpu_ref users; | 80 | struct percpu_ref users; |
| 75 | atomic_t dead; | 81 | atomic_t dead; |
| 76 | 82 | ||
| 77 | /* This needs improving */ | ||
| 78 | unsigned long user_id; | 83 | unsigned long user_id; |
| 79 | struct hlist_node list; | ||
| 80 | 84 | ||
| 81 | struct __percpu kioctx_cpu *cpu; | 85 | struct __percpu kioctx_cpu *cpu; |
| 82 | 86 | ||
| @@ -135,6 +139,8 @@ struct kioctx { | |||
| 135 | 139 | ||
| 136 | struct page *internal_pages[AIO_RING_PAGES]; | 140 | struct page *internal_pages[AIO_RING_PAGES]; |
| 137 | struct file *aio_ring_file; | 141 | struct file *aio_ring_file; |
| 142 | |||
| 143 | unsigned id; | ||
| 138 | }; | 144 | }; |
| 139 | 145 | ||
| 140 | /*------ sysctl variables----*/ | 146 | /*------ sysctl variables----*/ |
| @@ -326,7 +332,7 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
| 326 | 332 | ||
| 327 | ring = kmap_atomic(ctx->ring_pages[0]); | 333 | ring = kmap_atomic(ctx->ring_pages[0]); |
| 328 | ring->nr = nr_events; /* user copy */ | 334 | ring->nr = nr_events; /* user copy */ |
| 329 | ring->id = ctx->user_id; | 335 | ring->id = ~0U; |
| 330 | ring->head = ring->tail = 0; | 336 | ring->head = ring->tail = 0; |
| 331 | ring->magic = AIO_RING_MAGIC; | 337 | ring->magic = AIO_RING_MAGIC; |
| 332 | ring->compat_features = AIO_RING_COMPAT_FEATURES; | 338 | ring->compat_features = AIO_RING_COMPAT_FEATURES; |
| @@ -462,6 +468,58 @@ static void free_ioctx_ref(struct percpu_ref *ref) | |||
| 462 | schedule_work(&ctx->free_work); | 468 | schedule_work(&ctx->free_work); |
| 463 | } | 469 | } |
| 464 | 470 | ||
| 471 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | ||
| 472 | { | ||
| 473 | unsigned i, new_nr; | ||
| 474 | struct kioctx_table *table, *old; | ||
| 475 | struct aio_ring *ring; | ||
| 476 | |||
| 477 | spin_lock(&mm->ioctx_lock); | ||
| 478 | table = rcu_dereference(mm->ioctx_table); | ||
| 479 | |||
| 480 | while (1) { | ||
| 481 | if (table) | ||
| 482 | for (i = 0; i < table->nr; i++) | ||
| 483 | if (!table->table[i]) { | ||
| 484 | ctx->id = i; | ||
| 485 | table->table[i] = ctx; | ||
| 486 | spin_unlock(&mm->ioctx_lock); | ||
| 487 | |||
| 488 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
| 489 | ring->id = ctx->id; | ||
| 490 | kunmap_atomic(ring); | ||
| 491 | return 0; | ||
| 492 | } | ||
| 493 | |||
| 494 | new_nr = (table ? table->nr : 1) * 4; | ||
| 495 | |||
| 496 | spin_unlock(&mm->ioctx_lock); | ||
| 497 | |||
| 498 | table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * | ||
| 499 | new_nr, GFP_KERNEL); | ||
| 500 | if (!table) | ||
| 501 | return -ENOMEM; | ||
| 502 | |||
| 503 | table->nr = new_nr; | ||
| 504 | |||
| 505 | spin_lock(&mm->ioctx_lock); | ||
| 506 | old = rcu_dereference(mm->ioctx_table); | ||
| 507 | |||
| 508 | if (!old) { | ||
| 509 | rcu_assign_pointer(mm->ioctx_table, table); | ||
| 510 | } else if (table->nr > old->nr) { | ||
| 511 | memcpy(table->table, old->table, | ||
| 512 | old->nr * sizeof(struct kioctx *)); | ||
| 513 | |||
| 514 | rcu_assign_pointer(mm->ioctx_table, table); | ||
| 515 | kfree_rcu(old, rcu); | ||
| 516 | } else { | ||
| 517 | kfree(table); | ||
| 518 | table = old; | ||
| 519 | } | ||
| 520 | } | ||
| 521 | } | ||
| 522 | |||
| 465 | /* ioctx_alloc | 523 | /* ioctx_alloc |
| 466 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. | 524 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. |
| 467 | */ | 525 | */ |
| @@ -520,6 +578,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 520 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); | 578 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); |
| 521 | BUG_ON(!ctx->req_batch); | 579 | BUG_ON(!ctx->req_batch); |
| 522 | 580 | ||
| 581 | err = ioctx_add_table(ctx, mm); | ||
| 582 | if (err) | ||
| 583 | goto out_cleanup_noerr; | ||
| 584 | |||
| 523 | /* limit the number of system wide aios */ | 585 | /* limit the number of system wide aios */ |
| 524 | spin_lock(&aio_nr_lock); | 586 | spin_lock(&aio_nr_lock); |
| 525 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || | 587 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || |
| @@ -532,17 +594,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 532 | 594 | ||
| 533 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ | 595 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ |
| 534 | 596 | ||
| 535 | /* now link into global list. */ | ||
| 536 | spin_lock(&mm->ioctx_lock); | ||
| 537 | hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); | ||
| 538 | spin_unlock(&mm->ioctx_lock); | ||
| 539 | |||
| 540 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", | 597 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", |
| 541 | ctx, ctx->user_id, mm, ctx->nr_events); | 598 | ctx, ctx->user_id, mm, ctx->nr_events); |
| 542 | return ctx; | 599 | return ctx; |
| 543 | 600 | ||
| 544 | out_cleanup: | 601 | out_cleanup: |
| 545 | err = -EAGAIN; | 602 | err = -EAGAIN; |
| 603 | out_cleanup_noerr: | ||
| 546 | aio_free_ring(ctx); | 604 | aio_free_ring(ctx); |
| 547 | out_freepcpu: | 605 | out_freepcpu: |
| 548 | free_percpu(ctx->cpu); | 606 | free_percpu(ctx->cpu); |
| @@ -561,10 +619,18 @@ out_freectx: | |||
| 561 | * when the processes owning a context have all exited to encourage | 619 | * when the processes owning a context have all exited to encourage |
| 562 | * the rapid destruction of the kioctx. | 620 | * the rapid destruction of the kioctx. |
| 563 | */ | 621 | */ |
| 564 | static void kill_ioctx(struct kioctx *ctx) | 622 | static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) |
| 565 | { | 623 | { |
| 566 | if (!atomic_xchg(&ctx->dead, 1)) { | 624 | if (!atomic_xchg(&ctx->dead, 1)) { |
| 567 | hlist_del_rcu(&ctx->list); | 625 | struct kioctx_table *table; |
| 626 | |||
| 627 | spin_lock(&mm->ioctx_lock); | ||
| 628 | table = rcu_dereference(mm->ioctx_table); | ||
| 629 | |||
| 630 | WARN_ON(ctx != table->table[ctx->id]); | ||
| 631 | table->table[ctx->id] = NULL; | ||
| 632 | spin_unlock(&mm->ioctx_lock); | ||
| 633 | |||
| 568 | /* percpu_ref_kill() will do the necessary call_rcu() */ | 634 | /* percpu_ref_kill() will do the necessary call_rcu() */ |
| 569 | wake_up_all(&ctx->wait); | 635 | wake_up_all(&ctx->wait); |
| 570 | 636 | ||
| @@ -613,10 +679,28 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); | |||
| 613 | */ | 679 | */ |
| 614 | void exit_aio(struct mm_struct *mm) | 680 | void exit_aio(struct mm_struct *mm) |
| 615 | { | 681 | { |
| 682 | struct kioctx_table *table; | ||
| 616 | struct kioctx *ctx; | 683 | struct kioctx *ctx; |
| 617 | struct hlist_node *n; | 684 | unsigned i = 0; |
| 685 | |||
| 686 | while (1) { | ||
| 687 | rcu_read_lock(); | ||
| 688 | table = rcu_dereference(mm->ioctx_table); | ||
| 689 | |||
| 690 | do { | ||
| 691 | if (!table || i >= table->nr) { | ||
| 692 | rcu_read_unlock(); | ||
| 693 | rcu_assign_pointer(mm->ioctx_table, NULL); | ||
| 694 | if (table) | ||
| 695 | kfree(table); | ||
| 696 | return; | ||
| 697 | } | ||
| 698 | |||
| 699 | ctx = table->table[i++]; | ||
| 700 | } while (!ctx); | ||
| 701 | |||
| 702 | rcu_read_unlock(); | ||
| 618 | 703 | ||
| 619 | hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { | ||
| 620 | /* | 704 | /* |
| 621 | * We don't need to bother with munmap() here - | 705 | * We don't need to bother with munmap() here - |
| 622 | * exit_mmap(mm) is coming and it'll unmap everything. | 706 | * exit_mmap(mm) is coming and it'll unmap everything. |
| @@ -627,7 +711,7 @@ void exit_aio(struct mm_struct *mm) | |||
| 627 | */ | 711 | */ |
| 628 | ctx->mmap_size = 0; | 712 | ctx->mmap_size = 0; |
| 629 | 713 | ||
| 630 | kill_ioctx(ctx); | 714 | kill_ioctx(mm, ctx); |
| 631 | } | 715 | } |
| 632 | } | 716 | } |
| 633 | 717 | ||
| @@ -710,19 +794,27 @@ static void kiocb_free(struct kiocb *req) | |||
| 710 | 794 | ||
| 711 | static struct kioctx *lookup_ioctx(unsigned long ctx_id) | 795 | static struct kioctx *lookup_ioctx(unsigned long ctx_id) |
| 712 | { | 796 | { |
| 797 | struct aio_ring __user *ring = (void __user *)ctx_id; | ||
| 713 | struct mm_struct *mm = current->mm; | 798 | struct mm_struct *mm = current->mm; |
| 714 | struct kioctx *ctx, *ret = NULL; | 799 | struct kioctx *ctx, *ret = NULL; |
| 800 | struct kioctx_table *table; | ||
| 801 | unsigned id; | ||
| 802 | |||
| 803 | if (get_user(id, &ring->id)) | ||
| 804 | return NULL; | ||
| 715 | 805 | ||
| 716 | rcu_read_lock(); | 806 | rcu_read_lock(); |
| 807 | table = rcu_dereference(mm->ioctx_table); | ||
| 717 | 808 | ||
| 718 | hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { | 809 | if (!table || id >= table->nr) |
| 719 | if (ctx->user_id == ctx_id) { | 810 | goto out; |
| 720 | percpu_ref_get(&ctx->users); | ||
| 721 | ret = ctx; | ||
| 722 | break; | ||
| 723 | } | ||
| 724 | } | ||
| 725 | 811 | ||
| 812 | ctx = table->table[id]; | ||
| 813 | if (ctx->user_id == ctx_id) { | ||
| 814 | percpu_ref_get(&ctx->users); | ||
| 815 | ret = ctx; | ||
| 816 | } | ||
| 817 | out: | ||
| 726 | rcu_read_unlock(); | 818 | rcu_read_unlock(); |
| 727 | return ret; | 819 | return ret; |
| 728 | } | 820 | } |
| @@ -998,7 +1090,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) | |||
| 998 | if (!IS_ERR(ioctx)) { | 1090 | if (!IS_ERR(ioctx)) { |
| 999 | ret = put_user(ioctx->user_id, ctxp); | 1091 | ret = put_user(ioctx->user_id, ctxp); |
| 1000 | if (ret) | 1092 | if (ret) |
| 1001 | kill_ioctx(ioctx); | 1093 | kill_ioctx(current->mm, ioctx); |
| 1002 | percpu_ref_put(&ioctx->users); | 1094 | percpu_ref_put(&ioctx->users); |
| 1003 | } | 1095 | } |
| 1004 | 1096 | ||
| @@ -1016,7 +1108,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
| 1016 | { | 1108 | { |
| 1017 | struct kioctx *ioctx = lookup_ioctx(ctx); | 1109 | struct kioctx *ioctx = lookup_ioctx(ctx); |
| 1018 | if (likely(NULL != ioctx)) { | 1110 | if (likely(NULL != ioctx)) { |
| 1019 | kill_ioctx(ioctx); | 1111 | kill_ioctx(current->mm, ioctx); |
| 1020 | percpu_ref_put(&ioctx->users); | 1112 | percpu_ref_put(&ioctx->users); |
| 1021 | return 0; | 1113 | return 0; |
| 1022 | } | 1114 | } |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fb425aa16c01..da8cf5cc1aa6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -322,6 +322,7 @@ struct mm_rss_stat { | |||
| 322 | atomic_long_t count[NR_MM_COUNTERS]; | 322 | atomic_long_t count[NR_MM_COUNTERS]; |
| 323 | }; | 323 | }; |
| 324 | 324 | ||
| 325 | struct kioctx_table; | ||
| 325 | struct mm_struct { | 326 | struct mm_struct { |
| 326 | struct vm_area_struct * mmap; /* list of VMAs */ | 327 | struct vm_area_struct * mmap; /* list of VMAs */ |
| 327 | struct rb_root mm_rb; | 328 | struct rb_root mm_rb; |
| @@ -382,8 +383,8 @@ struct mm_struct { | |||
| 382 | 383 | ||
| 383 | struct core_state *core_state; /* coredumping support */ | 384 | struct core_state *core_state; /* coredumping support */ |
| 384 | #ifdef CONFIG_AIO | 385 | #ifdef CONFIG_AIO |
| 385 | spinlock_t ioctx_lock; | 386 | spinlock_t ioctx_lock; |
| 386 | struct hlist_head ioctx_list; | 387 | struct kioctx_table __rcu *ioctx_table; |
| 387 | #endif | 388 | #endif |
| 388 | #ifdef CONFIG_MM_OWNER | 389 | #ifdef CONFIG_MM_OWNER |
| 389 | /* | 390 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 66635c80a813..db5f541c5488 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -522,7 +522,7 @@ static void mm_init_aio(struct mm_struct *mm) | |||
| 522 | { | 522 | { |
| 523 | #ifdef CONFIG_AIO | 523 | #ifdef CONFIG_AIO |
| 524 | spin_lock_init(&mm->ioctx_lock); | 524 | spin_lock_init(&mm->ioctx_lock); |
| 525 | INIT_HLIST_HEAD(&mm->ioctx_list); | 525 | mm->ioctx_table = NULL; |
| 526 | #endif | 526 | #endif |
| 527 | } | 527 | } |
| 528 | 528 | ||
