diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-22 11:42:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-22 11:42:14 -0500 |
commit | d0f278c1dd0175093ed37ce132395dc689e6987e (patch) | |
tree | c2a555e704cb71d56611c9ef7be03089d1ec03f4 /fs/aio.c | |
parent | 533db9b3d4e1a0405161e66f4da66baf5863b863 (diff) | |
parent | ddb8c45ba15149ebd41d7586261c05f7ca37f9a1 (diff) |
Merge git://git.kvack.org/~bcrl/aio-next
Pull aio fixes from Benjamin LaHaise.
* git://git.kvack.org/~bcrl/aio-next:
aio: nullify aio->ring_pages after freeing it
aio: prevent double free in ioctx_alloc
aio: Fix a trinity splat
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 134 |
1 files changed, 51 insertions, 83 deletions
@@ -80,6 +80,8 @@ struct kioctx { | |||
80 | struct percpu_ref users; | 80 | struct percpu_ref users; |
81 | atomic_t dead; | 81 | atomic_t dead; |
82 | 82 | ||
83 | struct percpu_ref reqs; | ||
84 | |||
83 | unsigned long user_id; | 85 | unsigned long user_id; |
84 | 86 | ||
85 | struct __percpu kioctx_cpu *cpu; | 87 | struct __percpu kioctx_cpu *cpu; |
@@ -107,7 +109,6 @@ struct kioctx { | |||
107 | struct page **ring_pages; | 109 | struct page **ring_pages; |
108 | long nr_pages; | 110 | long nr_pages; |
109 | 111 | ||
110 | struct rcu_head rcu_head; | ||
111 | struct work_struct free_work; | 112 | struct work_struct free_work; |
112 | 113 | ||
113 | struct { | 114 | struct { |
@@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx) | |||
250 | 251 | ||
251 | put_aio_ring_file(ctx); | 252 | put_aio_ring_file(ctx); |
252 | 253 | ||
253 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) | 254 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { |
254 | kfree(ctx->ring_pages); | 255 | kfree(ctx->ring_pages); |
256 | ctx->ring_pages = NULL; | ||
257 | } | ||
255 | } | 258 | } |
256 | 259 | ||
257 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) | 260 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) | |||
463 | return cancel(kiocb); | 466 | return cancel(kiocb); |
464 | } | 467 | } |
465 | 468 | ||
466 | static void free_ioctx_rcu(struct rcu_head *head) | 469 | static void free_ioctx(struct work_struct *work) |
467 | { | 470 | { |
468 | struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); | 471 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); |
469 | 472 | ||
473 | pr_debug("freeing %p\n", ctx); | ||
474 | |||
475 | aio_free_ring(ctx); | ||
470 | free_percpu(ctx->cpu); | 476 | free_percpu(ctx->cpu); |
471 | kmem_cache_free(kioctx_cachep, ctx); | 477 | kmem_cache_free(kioctx_cachep, ctx); |
472 | } | 478 | } |
473 | 479 | ||
480 | static void free_ioctx_reqs(struct percpu_ref *ref) | ||
481 | { | ||
482 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); | ||
483 | |||
484 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
485 | schedule_work(&ctx->free_work); | ||
486 | } | ||
487 | |||
474 | /* | 488 | /* |
475 | * When this function runs, the kioctx has been removed from the "hash table" | 489 | * When this function runs, the kioctx has been removed from the "hash table" |
476 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - | 490 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - |
477 | * now it's safe to cancel any that need to be. | 491 | * now it's safe to cancel any that need to be. |
478 | */ | 492 | */ |
479 | static void free_ioctx(struct work_struct *work) | 493 | static void free_ioctx_users(struct percpu_ref *ref) |
480 | { | 494 | { |
481 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); | 495 | struct kioctx *ctx = container_of(ref, struct kioctx, users); |
482 | struct aio_ring *ring; | ||
483 | struct kiocb *req; | 496 | struct kiocb *req; |
484 | unsigned cpu, avail; | ||
485 | DEFINE_WAIT(wait); | ||
486 | 497 | ||
487 | spin_lock_irq(&ctx->ctx_lock); | 498 | spin_lock_irq(&ctx->ctx_lock); |
488 | 499 | ||
@@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work) | |||
496 | 507 | ||
497 | spin_unlock_irq(&ctx->ctx_lock); | 508 | spin_unlock_irq(&ctx->ctx_lock); |
498 | 509 | ||
499 | for_each_possible_cpu(cpu) { | 510 | percpu_ref_kill(&ctx->reqs); |
500 | struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); | 511 | percpu_ref_put(&ctx->reqs); |
501 | |||
502 | atomic_add(kcpu->reqs_available, &ctx->reqs_available); | ||
503 | kcpu->reqs_available = 0; | ||
504 | } | ||
505 | |||
506 | while (1) { | ||
507 | prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
508 | |||
509 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
510 | avail = (ring->head <= ring->tail) | ||
511 | ? ring->tail - ring->head | ||
512 | : ctx->nr_events - ring->head + ring->tail; | ||
513 | |||
514 | atomic_add(avail, &ctx->reqs_available); | ||
515 | ring->head = ring->tail; | ||
516 | kunmap_atomic(ring); | ||
517 | |||
518 | if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) | ||
519 | break; | ||
520 | |||
521 | schedule(); | ||
522 | } | ||
523 | finish_wait(&ctx->wait, &wait); | ||
524 | |||
525 | WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); | ||
526 | |||
527 | aio_free_ring(ctx); | ||
528 | |||
529 | pr_debug("freeing %p\n", ctx); | ||
530 | |||
531 | /* | ||
532 | * Here the call_rcu() is between the wait_event() for reqs_active to | ||
533 | * hit 0, and freeing the ioctx. | ||
534 | * | ||
535 | * aio_complete() decrements reqs_active, but it has to touch the ioctx | ||
536 | * after to issue a wakeup so we use rcu. | ||
537 | */ | ||
538 | call_rcu(&ctx->rcu_head, free_ioctx_rcu); | ||
539 | } | ||
540 | |||
541 | static void free_ioctx_ref(struct percpu_ref *ref) | ||
542 | { | ||
543 | struct kioctx *ctx = container_of(ref, struct kioctx, users); | ||
544 | |||
545 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
546 | schedule_work(&ctx->free_work); | ||
547 | } | 512 | } |
548 | 513 | ||
549 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | 514 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) |
@@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |||
602 | } | 567 | } |
603 | } | 568 | } |
604 | 569 | ||
570 | static void aio_nr_sub(unsigned nr) | ||
571 | { | ||
572 | spin_lock(&aio_nr_lock); | ||
573 | if (WARN_ON(aio_nr - nr > aio_nr)) | ||
574 | aio_nr = 0; | ||
575 | else | ||
576 | aio_nr -= nr; | ||
577 | spin_unlock(&aio_nr_lock); | ||
578 | } | ||
579 | |||
605 | /* ioctx_alloc | 580 | /* ioctx_alloc |
606 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. | 581 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. |
607 | */ | 582 | */ |
@@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
639 | 614 | ||
640 | ctx->max_reqs = nr_events; | 615 | ctx->max_reqs = nr_events; |
641 | 616 | ||
642 | if (percpu_ref_init(&ctx->users, free_ioctx_ref)) | 617 | if (percpu_ref_init(&ctx->users, free_ioctx_users)) |
643 | goto out_freectx; | 618 | goto err; |
619 | |||
620 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) | ||
621 | goto err; | ||
644 | 622 | ||
645 | spin_lock_init(&ctx->ctx_lock); | 623 | spin_lock_init(&ctx->ctx_lock); |
646 | spin_lock_init(&ctx->completion_lock); | 624 | spin_lock_init(&ctx->completion_lock); |
@@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
651 | 629 | ||
652 | ctx->cpu = alloc_percpu(struct kioctx_cpu); | 630 | ctx->cpu = alloc_percpu(struct kioctx_cpu); |
653 | if (!ctx->cpu) | 631 | if (!ctx->cpu) |
654 | goto out_freeref; | 632 | goto err; |
655 | 633 | ||
656 | if (aio_setup_ring(ctx) < 0) | 634 | if (aio_setup_ring(ctx) < 0) |
657 | goto out_freepcpu; | 635 | goto err; |
658 | 636 | ||
659 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); | 637 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); |
660 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); | 638 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); |
@@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
666 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || | 644 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || |
667 | aio_nr + nr_events < aio_nr) { | 645 | aio_nr + nr_events < aio_nr) { |
668 | spin_unlock(&aio_nr_lock); | 646 | spin_unlock(&aio_nr_lock); |
669 | goto out_cleanup; | 647 | err = -EAGAIN; |
648 | goto err; | ||
670 | } | 649 | } |
671 | aio_nr += ctx->max_reqs; | 650 | aio_nr += ctx->max_reqs; |
672 | spin_unlock(&aio_nr_lock); | 651 | spin_unlock(&aio_nr_lock); |
@@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
675 | 654 | ||
676 | err = ioctx_add_table(ctx, mm); | 655 | err = ioctx_add_table(ctx, mm); |
677 | if (err) | 656 | if (err) |
678 | goto out_cleanup_put; | 657 | goto err_cleanup; |
679 | 658 | ||
680 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", | 659 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", |
681 | ctx, ctx->user_id, mm, ctx->nr_events); | 660 | ctx, ctx->user_id, mm, ctx->nr_events); |
682 | return ctx; | 661 | return ctx; |
683 | 662 | ||
684 | out_cleanup_put: | 663 | err_cleanup: |
685 | percpu_ref_put(&ctx->users); | 664 | aio_nr_sub(ctx->max_reqs); |
686 | out_cleanup: | 665 | err: |
687 | err = -EAGAIN; | ||
688 | aio_free_ring(ctx); | ||
689 | out_freepcpu: | ||
690 | free_percpu(ctx->cpu); | 666 | free_percpu(ctx->cpu); |
691 | out_freeref: | 667 | free_percpu(ctx->reqs.pcpu_count); |
692 | free_percpu(ctx->users.pcpu_count); | 668 | free_percpu(ctx->users.pcpu_count); |
693 | out_freectx: | ||
694 | put_aio_ring_file(ctx); | ||
695 | kmem_cache_free(kioctx_cachep, ctx); | 669 | kmem_cache_free(kioctx_cachep, ctx); |
696 | pr_debug("error allocating ioctx %d\n", err); | 670 | pr_debug("error allocating ioctx %d\n", err); |
697 | return ERR_PTR(err); | 671 | return ERR_PTR(err); |
@@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) | |||
726 | * -EAGAIN with no ioctxs actually in use (as far as userspace | 700 | * -EAGAIN with no ioctxs actually in use (as far as userspace |
727 | * could tell). | 701 | * could tell). |
728 | */ | 702 | */ |
729 | spin_lock(&aio_nr_lock); | 703 | aio_nr_sub(ctx->max_reqs); |
730 | BUG_ON(aio_nr - ctx->max_reqs > aio_nr); | ||
731 | aio_nr -= ctx->max_reqs; | ||
732 | spin_unlock(&aio_nr_lock); | ||
733 | 704 | ||
734 | if (ctx->mmap_size) | 705 | if (ctx->mmap_size) |
735 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | 706 | vm_munmap(ctx->mmap_base, ctx->mmap_size); |
@@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
861 | if (unlikely(!req)) | 832 | if (unlikely(!req)) |
862 | goto out_put; | 833 | goto out_put; |
863 | 834 | ||
835 | percpu_ref_get(&ctx->reqs); | ||
836 | |||
864 | req->ki_ctx = ctx; | 837 | req->ki_ctx = ctx; |
865 | return req; | 838 | return req; |
866 | out_put: | 839 | out_put: |
@@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
930 | return; | 903 | return; |
931 | } | 904 | } |
932 | 905 | ||
933 | /* | ||
934 | * Take rcu_read_lock() in case the kioctx is being destroyed, as we | ||
935 | * need to issue a wakeup after incrementing reqs_available. | ||
936 | */ | ||
937 | rcu_read_lock(); | ||
938 | |||
939 | if (iocb->ki_list.next) { | 906 | if (iocb->ki_list.next) { |
940 | unsigned long flags; | 907 | unsigned long flags; |
941 | 908 | ||
@@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1010 | if (waitqueue_active(&ctx->wait)) | 977 | if (waitqueue_active(&ctx->wait)) |
1011 | wake_up(&ctx->wait); | 978 | wake_up(&ctx->wait); |
1012 | 979 | ||
1013 | rcu_read_unlock(); | 980 | percpu_ref_put(&ctx->reqs); |
1014 | } | 981 | } |
1015 | EXPORT_SYMBOL(aio_complete); | 982 | EXPORT_SYMBOL(aio_complete); |
1016 | 983 | ||
@@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1421 | return 0; | 1388 | return 0; |
1422 | out_put_req: | 1389 | out_put_req: |
1423 | put_reqs_available(ctx, 1); | 1390 | put_reqs_available(ctx, 1); |
1391 | percpu_ref_put(&ctx->reqs); | ||
1424 | kiocb_free(req); | 1392 | kiocb_free(req); |
1425 | return ret; | 1393 | return ret; |
1426 | } | 1394 | } |