aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-22 11:42:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-22 11:42:14 -0500
commitd0f278c1dd0175093ed37ce132395dc689e6987e (patch)
treec2a555e704cb71d56611c9ef7be03089d1ec03f4 /fs/aio.c
parent533db9b3d4e1a0405161e66f4da66baf5863b863 (diff)
parentddb8c45ba15149ebd41d7586261c05f7ca37f9a1 (diff)
Merge git://git.kvack.org/~bcrl/aio-next
Pull aio fixes from Benjamin LaHaise. * git://git.kvack.org/~bcrl/aio-next: aio: nullify aio->ring_pages after freeing it aio: prevent double free in ioctx_alloc aio: Fix a trinity splat
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c134
1 files changed, 51 insertions, 83 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 823efcbb6ccd..08159ed13649 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -80,6 +80,8 @@ struct kioctx {
80 struct percpu_ref users; 80 struct percpu_ref users;
81 atomic_t dead; 81 atomic_t dead;
82 82
83 struct percpu_ref reqs;
84
83 unsigned long user_id; 85 unsigned long user_id;
84 86
85 struct __percpu kioctx_cpu *cpu; 87 struct __percpu kioctx_cpu *cpu;
@@ -107,7 +109,6 @@ struct kioctx {
107 struct page **ring_pages; 109 struct page **ring_pages;
108 long nr_pages; 110 long nr_pages;
109 111
110 struct rcu_head rcu_head;
111 struct work_struct free_work; 112 struct work_struct free_work;
112 113
113 struct { 114 struct {
@@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx)
250 251
251 put_aio_ring_file(ctx); 252 put_aio_ring_file(ctx);
252 253
253 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) 254 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
254 kfree(ctx->ring_pages); 255 kfree(ctx->ring_pages);
256 ctx->ring_pages = NULL;
257 }
255} 258}
256 259
257static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) 260static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
463 return cancel(kiocb); 466 return cancel(kiocb);
464} 467}
465 468
466static void free_ioctx_rcu(struct rcu_head *head) 469static void free_ioctx(struct work_struct *work)
467{ 470{
468 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); 471 struct kioctx *ctx = container_of(work, struct kioctx, free_work);
469 472
473 pr_debug("freeing %p\n", ctx);
474
475 aio_free_ring(ctx);
470 free_percpu(ctx->cpu); 476 free_percpu(ctx->cpu);
471 kmem_cache_free(kioctx_cachep, ctx); 477 kmem_cache_free(kioctx_cachep, ctx);
472} 478}
473 479
480static void free_ioctx_reqs(struct percpu_ref *ref)
481{
482 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
483
484 INIT_WORK(&ctx->free_work, free_ioctx);
485 schedule_work(&ctx->free_work);
486}
487
474/* 488/*
475 * When this function runs, the kioctx has been removed from the "hash table" 489 * When this function runs, the kioctx has been removed from the "hash table"
476 * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - 490 * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
477 * now it's safe to cancel any that need to be. 491 * now it's safe to cancel any that need to be.
478 */ 492 */
479static void free_ioctx(struct work_struct *work) 493static void free_ioctx_users(struct percpu_ref *ref)
480{ 494{
481 struct kioctx *ctx = container_of(work, struct kioctx, free_work); 495 struct kioctx *ctx = container_of(ref, struct kioctx, users);
482 struct aio_ring *ring;
483 struct kiocb *req; 496 struct kiocb *req;
484 unsigned cpu, avail;
485 DEFINE_WAIT(wait);
486 497
487 spin_lock_irq(&ctx->ctx_lock); 498 spin_lock_irq(&ctx->ctx_lock);
488 499
@@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work)
496 507
497 spin_unlock_irq(&ctx->ctx_lock); 508 spin_unlock_irq(&ctx->ctx_lock);
498 509
499 for_each_possible_cpu(cpu) { 510 percpu_ref_kill(&ctx->reqs);
500 struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); 511 percpu_ref_put(&ctx->reqs);
501
502 atomic_add(kcpu->reqs_available, &ctx->reqs_available);
503 kcpu->reqs_available = 0;
504 }
505
506 while (1) {
507 prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
508
509 ring = kmap_atomic(ctx->ring_pages[0]);
510 avail = (ring->head <= ring->tail)
511 ? ring->tail - ring->head
512 : ctx->nr_events - ring->head + ring->tail;
513
514 atomic_add(avail, &ctx->reqs_available);
515 ring->head = ring->tail;
516 kunmap_atomic(ring);
517
518 if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
519 break;
520
521 schedule();
522 }
523 finish_wait(&ctx->wait, &wait);
524
525 WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
526
527 aio_free_ring(ctx);
528
529 pr_debug("freeing %p\n", ctx);
530
531 /*
532 * Here the call_rcu() is between the wait_event() for reqs_active to
533 * hit 0, and freeing the ioctx.
534 *
535 * aio_complete() decrements reqs_active, but it has to touch the ioctx
536 * after to issue a wakeup so we use rcu.
537 */
538 call_rcu(&ctx->rcu_head, free_ioctx_rcu);
539}
540
541static void free_ioctx_ref(struct percpu_ref *ref)
542{
543 struct kioctx *ctx = container_of(ref, struct kioctx, users);
544
545 INIT_WORK(&ctx->free_work, free_ioctx);
546 schedule_work(&ctx->free_work);
547} 512}
548 513
549static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) 514static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
@@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
602 } 567 }
603} 568}
604 569
570static void aio_nr_sub(unsigned nr)
571{
572 spin_lock(&aio_nr_lock);
573 if (WARN_ON(aio_nr - nr > aio_nr))
574 aio_nr = 0;
575 else
576 aio_nr -= nr;
577 spin_unlock(&aio_nr_lock);
578}
579
605/* ioctx_alloc 580/* ioctx_alloc
606 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 581 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
607 */ 582 */
@@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
639 614
640 ctx->max_reqs = nr_events; 615 ctx->max_reqs = nr_events;
641 616
642 if (percpu_ref_init(&ctx->users, free_ioctx_ref)) 617 if (percpu_ref_init(&ctx->users, free_ioctx_users))
643 goto out_freectx; 618 goto err;
619
620 if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
621 goto err;
644 622
645 spin_lock_init(&ctx->ctx_lock); 623 spin_lock_init(&ctx->ctx_lock);
646 spin_lock_init(&ctx->completion_lock); 624 spin_lock_init(&ctx->completion_lock);
@@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
651 629
652 ctx->cpu = alloc_percpu(struct kioctx_cpu); 630 ctx->cpu = alloc_percpu(struct kioctx_cpu);
653 if (!ctx->cpu) 631 if (!ctx->cpu)
654 goto out_freeref; 632 goto err;
655 633
656 if (aio_setup_ring(ctx) < 0) 634 if (aio_setup_ring(ctx) < 0)
657 goto out_freepcpu; 635 goto err;
658 636
659 atomic_set(&ctx->reqs_available, ctx->nr_events - 1); 637 atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
660 ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); 638 ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
@@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
666 if (aio_nr + nr_events > (aio_max_nr * 2UL) || 644 if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
667 aio_nr + nr_events < aio_nr) { 645 aio_nr + nr_events < aio_nr) {
668 spin_unlock(&aio_nr_lock); 646 spin_unlock(&aio_nr_lock);
669 goto out_cleanup; 647 err = -EAGAIN;
648 goto err;
670 } 649 }
671 aio_nr += ctx->max_reqs; 650 aio_nr += ctx->max_reqs;
672 spin_unlock(&aio_nr_lock); 651 spin_unlock(&aio_nr_lock);
@@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
675 654
676 err = ioctx_add_table(ctx, mm); 655 err = ioctx_add_table(ctx, mm);
677 if (err) 656 if (err)
678 goto out_cleanup_put; 657 goto err_cleanup;
679 658
680 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 659 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
681 ctx, ctx->user_id, mm, ctx->nr_events); 660 ctx, ctx->user_id, mm, ctx->nr_events);
682 return ctx; 661 return ctx;
683 662
684out_cleanup_put: 663err_cleanup:
685 percpu_ref_put(&ctx->users); 664 aio_nr_sub(ctx->max_reqs);
686out_cleanup: 665err:
687 err = -EAGAIN;
688 aio_free_ring(ctx);
689out_freepcpu:
690 free_percpu(ctx->cpu); 666 free_percpu(ctx->cpu);
691out_freeref: 667 free_percpu(ctx->reqs.pcpu_count);
692 free_percpu(ctx->users.pcpu_count); 668 free_percpu(ctx->users.pcpu_count);
693out_freectx:
694 put_aio_ring_file(ctx);
695 kmem_cache_free(kioctx_cachep, ctx); 669 kmem_cache_free(kioctx_cachep, ctx);
696 pr_debug("error allocating ioctx %d\n", err); 670 pr_debug("error allocating ioctx %d\n", err);
697 return ERR_PTR(err); 671 return ERR_PTR(err);
@@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
726 * -EAGAIN with no ioctxs actually in use (as far as userspace 700 * -EAGAIN with no ioctxs actually in use (as far as userspace
727 * could tell). 701 * could tell).
728 */ 702 */
729 spin_lock(&aio_nr_lock); 703 aio_nr_sub(ctx->max_reqs);
730 BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
731 aio_nr -= ctx->max_reqs;
732 spin_unlock(&aio_nr_lock);
733 704
734 if (ctx->mmap_size) 705 if (ctx->mmap_size)
735 vm_munmap(ctx->mmap_base, ctx->mmap_size); 706 vm_munmap(ctx->mmap_base, ctx->mmap_size);
@@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
861 if (unlikely(!req)) 832 if (unlikely(!req))
862 goto out_put; 833 goto out_put;
863 834
835 percpu_ref_get(&ctx->reqs);
836
864 req->ki_ctx = ctx; 837 req->ki_ctx = ctx;
865 return req; 838 return req;
866out_put: 839out_put:
@@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
930 return; 903 return;
931 } 904 }
932 905
933 /*
934 * Take rcu_read_lock() in case the kioctx is being destroyed, as we
935 * need to issue a wakeup after incrementing reqs_available.
936 */
937 rcu_read_lock();
938
939 if (iocb->ki_list.next) { 906 if (iocb->ki_list.next) {
940 unsigned long flags; 907 unsigned long flags;
941 908
@@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1010 if (waitqueue_active(&ctx->wait)) 977 if (waitqueue_active(&ctx->wait))
1011 wake_up(&ctx->wait); 978 wake_up(&ctx->wait);
1012 979
1013 rcu_read_unlock(); 980 percpu_ref_put(&ctx->reqs);
1014} 981}
1015EXPORT_SYMBOL(aio_complete); 982EXPORT_SYMBOL(aio_complete);
1016 983
@@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1421 return 0; 1388 return 0;
1422out_put_req: 1389out_put_req:
1423 put_reqs_available(ctx, 1); 1390 put_reqs_available(ctx, 1);
1391 percpu_ref_put(&ctx->reqs);
1424 kiocb_free(req); 1392 kiocb_free(req);
1425 return ret; 1393 return ret;
1426} 1394}