diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-22 11:42:14 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-22 11:42:14 -0500 |
| commit | d0f278c1dd0175093ed37ce132395dc689e6987e (patch) | |
| tree | c2a555e704cb71d56611c9ef7be03089d1ec03f4 | |
| parent | 533db9b3d4e1a0405161e66f4da66baf5863b863 (diff) | |
| parent | ddb8c45ba15149ebd41d7586261c05f7ca37f9a1 (diff) | |
Merge git://git.kvack.org/~bcrl/aio-next
Pull aio fixes from Benjamin LaHaise.
* git://git.kvack.org/~bcrl/aio-next:
aio: nullify aio->ring_pages after freeing it
aio: prevent double free in ioctx_alloc
aio: Fix a trinity splat
| -rw-r--r-- | fs/aio.c | 134 |
1 files changed, 51 insertions, 83 deletions
| @@ -80,6 +80,8 @@ struct kioctx { | |||
| 80 | struct percpu_ref users; | 80 | struct percpu_ref users; |
| 81 | atomic_t dead; | 81 | atomic_t dead; |
| 82 | 82 | ||
| 83 | struct percpu_ref reqs; | ||
| 84 | |||
| 83 | unsigned long user_id; | 85 | unsigned long user_id; |
| 84 | 86 | ||
| 85 | struct __percpu kioctx_cpu *cpu; | 87 | struct __percpu kioctx_cpu *cpu; |
| @@ -107,7 +109,6 @@ struct kioctx { | |||
| 107 | struct page **ring_pages; | 109 | struct page **ring_pages; |
| 108 | long nr_pages; | 110 | long nr_pages; |
| 109 | 111 | ||
| 110 | struct rcu_head rcu_head; | ||
| 111 | struct work_struct free_work; | 112 | struct work_struct free_work; |
| 112 | 113 | ||
| 113 | struct { | 114 | struct { |
| @@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx) | |||
| 250 | 251 | ||
| 251 | put_aio_ring_file(ctx); | 252 | put_aio_ring_file(ctx); |
| 252 | 253 | ||
| 253 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) | 254 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { |
| 254 | kfree(ctx->ring_pages); | 255 | kfree(ctx->ring_pages); |
| 256 | ctx->ring_pages = NULL; | ||
| 257 | } | ||
| 255 | } | 258 | } |
| 256 | 259 | ||
| 257 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) | 260 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) |
| @@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) | |||
| 463 | return cancel(kiocb); | 466 | return cancel(kiocb); |
| 464 | } | 467 | } |
| 465 | 468 | ||
| 466 | static void free_ioctx_rcu(struct rcu_head *head) | 469 | static void free_ioctx(struct work_struct *work) |
| 467 | { | 470 | { |
| 468 | struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); | 471 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); |
| 469 | 472 | ||
| 473 | pr_debug("freeing %p\n", ctx); | ||
| 474 | |||
| 475 | aio_free_ring(ctx); | ||
| 470 | free_percpu(ctx->cpu); | 476 | free_percpu(ctx->cpu); |
| 471 | kmem_cache_free(kioctx_cachep, ctx); | 477 | kmem_cache_free(kioctx_cachep, ctx); |
| 472 | } | 478 | } |
| 473 | 479 | ||
| 480 | static void free_ioctx_reqs(struct percpu_ref *ref) | ||
| 481 | { | ||
| 482 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); | ||
| 483 | |||
| 484 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
| 485 | schedule_work(&ctx->free_work); | ||
| 486 | } | ||
| 487 | |||
| 474 | /* | 488 | /* |
| 475 | * When this function runs, the kioctx has been removed from the "hash table" | 489 | * When this function runs, the kioctx has been removed from the "hash table" |
| 476 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - | 490 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - |
| 477 | * now it's safe to cancel any that need to be. | 491 | * now it's safe to cancel any that need to be. |
| 478 | */ | 492 | */ |
| 479 | static void free_ioctx(struct work_struct *work) | 493 | static void free_ioctx_users(struct percpu_ref *ref) |
| 480 | { | 494 | { |
| 481 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); | 495 | struct kioctx *ctx = container_of(ref, struct kioctx, users); |
| 482 | struct aio_ring *ring; | ||
| 483 | struct kiocb *req; | 496 | struct kiocb *req; |
| 484 | unsigned cpu, avail; | ||
| 485 | DEFINE_WAIT(wait); | ||
| 486 | 497 | ||
| 487 | spin_lock_irq(&ctx->ctx_lock); | 498 | spin_lock_irq(&ctx->ctx_lock); |
| 488 | 499 | ||
| @@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work) | |||
| 496 | 507 | ||
| 497 | spin_unlock_irq(&ctx->ctx_lock); | 508 | spin_unlock_irq(&ctx->ctx_lock); |
| 498 | 509 | ||
| 499 | for_each_possible_cpu(cpu) { | 510 | percpu_ref_kill(&ctx->reqs); |
| 500 | struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); | 511 | percpu_ref_put(&ctx->reqs); |
| 501 | |||
| 502 | atomic_add(kcpu->reqs_available, &ctx->reqs_available); | ||
| 503 | kcpu->reqs_available = 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | while (1) { | ||
| 507 | prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 508 | |||
| 509 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
| 510 | avail = (ring->head <= ring->tail) | ||
| 511 | ? ring->tail - ring->head | ||
| 512 | : ctx->nr_events - ring->head + ring->tail; | ||
| 513 | |||
| 514 | atomic_add(avail, &ctx->reqs_available); | ||
| 515 | ring->head = ring->tail; | ||
| 516 | kunmap_atomic(ring); | ||
| 517 | |||
| 518 | if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) | ||
| 519 | break; | ||
| 520 | |||
| 521 | schedule(); | ||
| 522 | } | ||
| 523 | finish_wait(&ctx->wait, &wait); | ||
| 524 | |||
| 525 | WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); | ||
| 526 | |||
| 527 | aio_free_ring(ctx); | ||
| 528 | |||
| 529 | pr_debug("freeing %p\n", ctx); | ||
| 530 | |||
| 531 | /* | ||
| 532 | * Here the call_rcu() is between the wait_event() for reqs_active to | ||
| 533 | * hit 0, and freeing the ioctx. | ||
| 534 | * | ||
| 535 | * aio_complete() decrements reqs_active, but it has to touch the ioctx | ||
| 536 | * after to issue a wakeup so we use rcu. | ||
| 537 | */ | ||
| 538 | call_rcu(&ctx->rcu_head, free_ioctx_rcu); | ||
| 539 | } | ||
| 540 | |||
| 541 | static void free_ioctx_ref(struct percpu_ref *ref) | ||
| 542 | { | ||
| 543 | struct kioctx *ctx = container_of(ref, struct kioctx, users); | ||
| 544 | |||
| 545 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
| 546 | schedule_work(&ctx->free_work); | ||
| 547 | } | 512 | } |
| 548 | 513 | ||
| 549 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | 514 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) |
| @@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |||
| 602 | } | 567 | } |
| 603 | } | 568 | } |
| 604 | 569 | ||
| 570 | static void aio_nr_sub(unsigned nr) | ||
| 571 | { | ||
| 572 | spin_lock(&aio_nr_lock); | ||
| 573 | if (WARN_ON(aio_nr - nr > aio_nr)) | ||
| 574 | aio_nr = 0; | ||
| 575 | else | ||
| 576 | aio_nr -= nr; | ||
| 577 | spin_unlock(&aio_nr_lock); | ||
| 578 | } | ||
| 579 | |||
| 605 | /* ioctx_alloc | 580 | /* ioctx_alloc |
| 606 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. | 581 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. |
| 607 | */ | 582 | */ |
| @@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 639 | 614 | ||
| 640 | ctx->max_reqs = nr_events; | 615 | ctx->max_reqs = nr_events; |
| 641 | 616 | ||
| 642 | if (percpu_ref_init(&ctx->users, free_ioctx_ref)) | 617 | if (percpu_ref_init(&ctx->users, free_ioctx_users)) |
| 643 | goto out_freectx; | 618 | goto err; |
| 619 | |||
| 620 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) | ||
| 621 | goto err; | ||
| 644 | 622 | ||
| 645 | spin_lock_init(&ctx->ctx_lock); | 623 | spin_lock_init(&ctx->ctx_lock); |
| 646 | spin_lock_init(&ctx->completion_lock); | 624 | spin_lock_init(&ctx->completion_lock); |
| @@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 651 | 629 | ||
| 652 | ctx->cpu = alloc_percpu(struct kioctx_cpu); | 630 | ctx->cpu = alloc_percpu(struct kioctx_cpu); |
| 653 | if (!ctx->cpu) | 631 | if (!ctx->cpu) |
| 654 | goto out_freeref; | 632 | goto err; |
| 655 | 633 | ||
| 656 | if (aio_setup_ring(ctx) < 0) | 634 | if (aio_setup_ring(ctx) < 0) |
| 657 | goto out_freepcpu; | 635 | goto err; |
| 658 | 636 | ||
| 659 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); | 637 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); |
| 660 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); | 638 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); |
| @@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 666 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || | 644 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || |
| 667 | aio_nr + nr_events < aio_nr) { | 645 | aio_nr + nr_events < aio_nr) { |
| 668 | spin_unlock(&aio_nr_lock); | 646 | spin_unlock(&aio_nr_lock); |
| 669 | goto out_cleanup; | 647 | err = -EAGAIN; |
| 648 | goto err; | ||
| 670 | } | 649 | } |
| 671 | aio_nr += ctx->max_reqs; | 650 | aio_nr += ctx->max_reqs; |
| 672 | spin_unlock(&aio_nr_lock); | 651 | spin_unlock(&aio_nr_lock); |
| @@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 675 | 654 | ||
| 676 | err = ioctx_add_table(ctx, mm); | 655 | err = ioctx_add_table(ctx, mm); |
| 677 | if (err) | 656 | if (err) |
| 678 | goto out_cleanup_put; | 657 | goto err_cleanup; |
| 679 | 658 | ||
| 680 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", | 659 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", |
| 681 | ctx, ctx->user_id, mm, ctx->nr_events); | 660 | ctx, ctx->user_id, mm, ctx->nr_events); |
| 682 | return ctx; | 661 | return ctx; |
| 683 | 662 | ||
| 684 | out_cleanup_put: | 663 | err_cleanup: |
| 685 | percpu_ref_put(&ctx->users); | 664 | aio_nr_sub(ctx->max_reqs); |
| 686 | out_cleanup: | 665 | err: |
| 687 | err = -EAGAIN; | ||
| 688 | aio_free_ring(ctx); | ||
| 689 | out_freepcpu: | ||
| 690 | free_percpu(ctx->cpu); | 666 | free_percpu(ctx->cpu); |
| 691 | out_freeref: | 667 | free_percpu(ctx->reqs.pcpu_count); |
| 692 | free_percpu(ctx->users.pcpu_count); | 668 | free_percpu(ctx->users.pcpu_count); |
| 693 | out_freectx: | ||
| 694 | put_aio_ring_file(ctx); | ||
| 695 | kmem_cache_free(kioctx_cachep, ctx); | 669 | kmem_cache_free(kioctx_cachep, ctx); |
| 696 | pr_debug("error allocating ioctx %d\n", err); | 670 | pr_debug("error allocating ioctx %d\n", err); |
| 697 | return ERR_PTR(err); | 671 | return ERR_PTR(err); |
| @@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) | |||
| 726 | * -EAGAIN with no ioctxs actually in use (as far as userspace | 700 | * -EAGAIN with no ioctxs actually in use (as far as userspace |
| 727 | * could tell). | 701 | * could tell). |
| 728 | */ | 702 | */ |
| 729 | spin_lock(&aio_nr_lock); | 703 | aio_nr_sub(ctx->max_reqs); |
| 730 | BUG_ON(aio_nr - ctx->max_reqs > aio_nr); | ||
| 731 | aio_nr -= ctx->max_reqs; | ||
| 732 | spin_unlock(&aio_nr_lock); | ||
| 733 | 704 | ||
| 734 | if (ctx->mmap_size) | 705 | if (ctx->mmap_size) |
| 735 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | 706 | vm_munmap(ctx->mmap_base, ctx->mmap_size); |
| @@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
| 861 | if (unlikely(!req)) | 832 | if (unlikely(!req)) |
| 862 | goto out_put; | 833 | goto out_put; |
| 863 | 834 | ||
| 835 | percpu_ref_get(&ctx->reqs); | ||
| 836 | |||
| 864 | req->ki_ctx = ctx; | 837 | req->ki_ctx = ctx; |
| 865 | return req; | 838 | return req; |
| 866 | out_put: | 839 | out_put: |
| @@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 930 | return; | 903 | return; |
| 931 | } | 904 | } |
| 932 | 905 | ||
| 933 | /* | ||
| 934 | * Take rcu_read_lock() in case the kioctx is being destroyed, as we | ||
| 935 | * need to issue a wakeup after incrementing reqs_available. | ||
| 936 | */ | ||
| 937 | rcu_read_lock(); | ||
| 938 | |||
| 939 | if (iocb->ki_list.next) { | 906 | if (iocb->ki_list.next) { |
| 940 | unsigned long flags; | 907 | unsigned long flags; |
| 941 | 908 | ||
| @@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1010 | if (waitqueue_active(&ctx->wait)) | 977 | if (waitqueue_active(&ctx->wait)) |
| 1011 | wake_up(&ctx->wait); | 978 | wake_up(&ctx->wait); |
| 1012 | 979 | ||
| 1013 | rcu_read_unlock(); | 980 | percpu_ref_put(&ctx->reqs); |
| 1014 | } | 981 | } |
| 1015 | EXPORT_SYMBOL(aio_complete); | 982 | EXPORT_SYMBOL(aio_complete); |
| 1016 | 983 | ||
| @@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
| 1421 | return 0; | 1388 | return 0; |
| 1422 | out_put_req: | 1389 | out_put_req: |
| 1423 | put_reqs_available(ctx, 1); | 1390 | put_reqs_available(ctx, 1); |
| 1391 | percpu_ref_put(&ctx->reqs); | ||
| 1424 | kiocb_free(req); | 1392 | kiocb_free(req); |
| 1425 | return ret; | 1393 | return ret; |
| 1426 | } | 1394 | } |
