aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c94
1 files changed, 87 insertions, 7 deletions
diff --git a/fs/aio.c b/fs/aio.c
index ae635872affb..84a751005f5b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -141,6 +141,7 @@ struct kioctx {
141 141
142 struct { 142 struct {
143 unsigned tail; 143 unsigned tail;
144 unsigned completed_events;
144 spinlock_t completion_lock; 145 spinlock_t completion_lock;
145 } ____cacheline_aligned_in_smp; 146 } ____cacheline_aligned_in_smp;
146 147
@@ -660,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
660 661
661 INIT_LIST_HEAD(&ctx->active_reqs); 662 INIT_LIST_HEAD(&ctx->active_reqs);
662 663
663 if (percpu_ref_init(&ctx->users, free_ioctx_users)) 664 if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
664 goto err; 665 goto err;
665 666
666 if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) 667 if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
667 goto err; 668 goto err;
668 669
669 ctx->cpu = alloc_percpu(struct kioctx_cpu); 670 ctx->cpu = alloc_percpu(struct kioctx_cpu);
@@ -792,6 +793,8 @@ void exit_aio(struct mm_struct *mm)
792 793
793 for (i = 0; i < table->nr; ++i) { 794 for (i = 0; i < table->nr; ++i) {
794 struct kioctx *ctx = table->table[i]; 795 struct kioctx *ctx = table->table[i];
796 struct completion requests_done =
797 COMPLETION_INITIALIZER_ONSTACK(requests_done);
795 798
796 if (!ctx) 799 if (!ctx)
797 continue; 800 continue;
@@ -803,7 +806,10 @@ void exit_aio(struct mm_struct *mm)
803 * that it needs to unmap the area, just set it to 0. 806 * that it needs to unmap the area, just set it to 0.
804 */ 807 */
805 ctx->mmap_size = 0; 808 ctx->mmap_size = 0;
806 kill_ioctx(mm, ctx, NULL); 809 kill_ioctx(mm, ctx, &requests_done);
810
811 /* Wait until all IO for the context are done. */
812 wait_for_completion(&requests_done);
807 } 813 }
808 814
809 RCU_INIT_POINTER(mm->ioctx_table, NULL); 815 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -857,6 +863,68 @@ out:
857 return ret; 863 return ret;
858} 864}
859 865
866/* refill_reqs_available
867 * Updates the reqs_available reference counts used for tracking the
868 * number of free slots in the completion ring. This can be called
869 * from aio_complete() (to optimistically update reqs_available) or
870 * from aio_get_req() (the we're out of events case). It must be
871 * called holding ctx->completion_lock.
872 */
873static void refill_reqs_available(struct kioctx *ctx, unsigned head,
874 unsigned tail)
875{
876 unsigned events_in_ring, completed;
877
878 /* Clamp head since userland can write to it. */
879 head %= ctx->nr_events;
880 if (head <= tail)
881 events_in_ring = tail - head;
882 else
883 events_in_ring = ctx->nr_events - (head - tail);
884
885 completed = ctx->completed_events;
886 if (events_in_ring < completed)
887 completed -= events_in_ring;
888 else
889 completed = 0;
890
891 if (!completed)
892 return;
893
894 ctx->completed_events -= completed;
895 put_reqs_available(ctx, completed);
896}
897
898/* user_refill_reqs_available
899 * Called to refill reqs_available when aio_get_req() encounters an
900 * out of space in the completion ring.
901 */
902static void user_refill_reqs_available(struct kioctx *ctx)
903{
904 spin_lock_irq(&ctx->completion_lock);
905 if (ctx->completed_events) {
906 struct aio_ring *ring;
907 unsigned head;
908
909 /* Access of ring->head may race with aio_read_events_ring()
910 * here, but that's okay since whether we read the old version
911 * or the new version, and either will be valid. The important
912 * part is that head cannot pass tail since we prevent
913 * aio_complete() from updating tail by holding
914 * ctx->completion_lock. Even if head is invalid, the check
915 * against ctx->completed_events below will make sure we do the
916 * safe/right thing.
917 */
918 ring = kmap_atomic(ctx->ring_pages[0]);
919 head = ring->head;
920 kunmap_atomic(ring);
921
922 refill_reqs_available(ctx, head, ctx->tail);
923 }
924
925 spin_unlock_irq(&ctx->completion_lock);
926}
927
860/* aio_get_req 928/* aio_get_req
861 * Allocate a slot for an aio request. 929 * Allocate a slot for an aio request.
862 * Returns NULL if no requests are free. 930 * Returns NULL if no requests are free.
@@ -865,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
865{ 933{
866 struct kiocb *req; 934 struct kiocb *req;
867 935
868 if (!get_reqs_available(ctx)) 936 if (!get_reqs_available(ctx)) {
869 return NULL; 937 user_refill_reqs_available(ctx);
938 if (!get_reqs_available(ctx))
939 return NULL;
940 }
870 941
871 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); 942 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
872 if (unlikely(!req)) 943 if (unlikely(!req))
@@ -925,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
925 struct kioctx *ctx = iocb->ki_ctx; 996 struct kioctx *ctx = iocb->ki_ctx;
926 struct aio_ring *ring; 997 struct aio_ring *ring;
927 struct io_event *ev_page, *event; 998 struct io_event *ev_page, *event;
999 unsigned tail, pos, head;
928 unsigned long flags; 1000 unsigned long flags;
929 unsigned tail, pos;
930 1001
931 /* 1002 /*
932 * Special case handling for sync iocbs: 1003 * Special case handling for sync iocbs:
@@ -987,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
987 ctx->tail = tail; 1058 ctx->tail = tail;
988 1059
989 ring = kmap_atomic(ctx->ring_pages[0]); 1060 ring = kmap_atomic(ctx->ring_pages[0]);
1061 head = ring->head;
990 ring->tail = tail; 1062 ring->tail = tail;
991 kunmap_atomic(ring); 1063 kunmap_atomic(ring);
992 flush_dcache_page(ctx->ring_pages[0]); 1064 flush_dcache_page(ctx->ring_pages[0]);
993 1065
1066 ctx->completed_events++;
1067 if (ctx->completed_events > 1)
1068 refill_reqs_available(ctx, head, tail);
994 spin_unlock_irqrestore(&ctx->completion_lock, flags); 1069 spin_unlock_irqrestore(&ctx->completion_lock, flags);
995 1070
996 pr_debug("added to ring %p at [%u]\n", iocb, tail); 1071 pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1005,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1005 1080
1006 /* everything turned out well, dispose of the aiocb. */ 1081 /* everything turned out well, dispose of the aiocb. */
1007 kiocb_free(iocb); 1082 kiocb_free(iocb);
1008 put_reqs_available(ctx, 1);
1009 1083
1010 /* 1084 /*
1011 * We have to order our ring_info tail store above and test 1085 * We have to order our ring_info tail store above and test
@@ -1042,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
1042 tail = ring->tail; 1116 tail = ring->tail;
1043 kunmap_atomic(ring); 1117 kunmap_atomic(ring);
1044 1118
1119 /*
1120 * Ensure that once we've read the current tail pointer, that
1121 * we also see the events that were stored up to the tail.
1122 */
1123 smp_rmb();
1124
1045 pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); 1125 pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
1046 1126
1047 if (head == tail) 1127 if (head == tail)