aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBenjamin LaHaise <bcrl@kvack.org>2014-08-24 13:14:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-24 18:47:27 -0400
commitd856f32a86b2b015ab180ab7a55e455ed8d3ccc5 (patch)
tree885097a564287bad1cc32039d89501e5a2e8b867 /fs
parent451fd72219dd6f3355e2d036c598544c760ee532 (diff)
aio: fix reqs_available handling
As reported by Dan Aloni, commit f8567a3845ac ("aio: fix aio request leak when events are reaped by userspace") introduces a regression when user code attempts to perform io_submit() with more events than are available in the ring buffer. Reverting that commit would reintroduce a regression when user space event reaping is used. Fixing this bug is a bit more involved than the previous attempts to fix this regression. Since we do not have a single point at which we can count events as being reaped by user space and io_getevents(), we have to track event completion by looking at the number of events left in the event ring. So long as there are as many events in the ring buffer as there have been completion events generate, we cannot call put_reqs_available(). The code to check for this is now placed in refill_reqs_available(). A test program from Dan and modified by me for verifying this bug is available at http://www.kvack.org/~bcrl/20140824-aio_bug.c . Reported-by: Dan Aloni <dan@kernelim.com> Signed-off-by: Benjamin LaHaise <bcrl@kvack.org> Acked-by: Dan Aloni <dan@kernelim.com> Cc: Kent Overstreet <kmo@daterainc.com> Cc: Mateusz Guzik <mguzik@redhat.com> Cc: Petr Matousek <pmatouse@redhat.com> Cc: stable@vger.kernel.org # v3.16 and anything that f8567a3845ac was backported to Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c77
1 files changed, 73 insertions, 4 deletions
diff --git a/fs/aio.c b/fs/aio.c
index ae635872affb..97bc62cbe2da 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -141,6 +141,7 @@ struct kioctx {
141 141
142 struct { 142 struct {
143 unsigned tail; 143 unsigned tail;
144 unsigned completed_events;
144 spinlock_t completion_lock; 145 spinlock_t completion_lock;
145 } ____cacheline_aligned_in_smp; 146 } ____cacheline_aligned_in_smp;
146 147
@@ -857,6 +858,68 @@ out:
857 return ret; 858 return ret;
858} 859}
859 860
861/* refill_reqs_available
862 * Updates the reqs_available reference counts used for tracking the
863 * number of free slots in the completion ring. This can be called
864 * from aio_complete() (to optimistically update reqs_available) or
865 * from aio_get_req() (the we're out of events case). It must be
866 * called holding ctx->completion_lock.
867 */
868static void refill_reqs_available(struct kioctx *ctx, unsigned head,
869 unsigned tail)
870{
871 unsigned events_in_ring, completed;
872
873 /* Clamp head since userland can write to it. */
874 head %= ctx->nr_events;
875 if (head <= tail)
876 events_in_ring = tail - head;
877 else
878 events_in_ring = ctx->nr_events - (head - tail);
879
880 completed = ctx->completed_events;
881 if (events_in_ring < completed)
882 completed -= events_in_ring;
883 else
884 completed = 0;
885
886 if (!completed)
887 return;
888
889 ctx->completed_events -= completed;
890 put_reqs_available(ctx, completed);
891}
892
893/* user_refill_reqs_available
894 * Called to refill reqs_available when aio_get_req() encounters an
895 * out of space in the completion ring.
896 */
897static void user_refill_reqs_available(struct kioctx *ctx)
898{
899 spin_lock_irq(&ctx->completion_lock);
900 if (ctx->completed_events) {
901 struct aio_ring *ring;
902 unsigned head;
903
904 /* Access of ring->head may race with aio_read_events_ring()
905 * here, but that's okay since whether we read the old version
906 * or the new version, and either will be valid. The important
907 * part is that head cannot pass tail since we prevent
908 * aio_complete() from updating tail by holding
909 * ctx->completion_lock. Even if head is invalid, the check
910 * against ctx->completed_events below will make sure we do the
911 * safe/right thing.
912 */
913 ring = kmap_atomic(ctx->ring_pages[0]);
914 head = ring->head;
915 kunmap_atomic(ring);
916
917 refill_reqs_available(ctx, head, ctx->tail);
918 }
919
920 spin_unlock_irq(&ctx->completion_lock);
921}
922
860/* aio_get_req 923/* aio_get_req
861 * Allocate a slot for an aio request. 924 * Allocate a slot for an aio request.
862 * Returns NULL if no requests are free. 925 * Returns NULL if no requests are free.
@@ -865,8 +928,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
865{ 928{
866 struct kiocb *req; 929 struct kiocb *req;
867 930
868 if (!get_reqs_available(ctx)) 931 if (!get_reqs_available(ctx)) {
869 return NULL; 932 user_refill_reqs_available(ctx);
933 if (!get_reqs_available(ctx))
934 return NULL;
935 }
870 936
871 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); 937 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
872 if (unlikely(!req)) 938 if (unlikely(!req))
@@ -925,8 +991,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
925 struct kioctx *ctx = iocb->ki_ctx; 991 struct kioctx *ctx = iocb->ki_ctx;
926 struct aio_ring *ring; 992 struct aio_ring *ring;
927 struct io_event *ev_page, *event; 993 struct io_event *ev_page, *event;
994 unsigned tail, pos, head;
928 unsigned long flags; 995 unsigned long flags;
929 unsigned tail, pos;
930 996
931 /* 997 /*
932 * Special case handling for sync iocbs: 998 * Special case handling for sync iocbs:
@@ -987,10 +1053,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
987 ctx->tail = tail; 1053 ctx->tail = tail;
988 1054
989 ring = kmap_atomic(ctx->ring_pages[0]); 1055 ring = kmap_atomic(ctx->ring_pages[0]);
1056 head = ring->head;
990 ring->tail = tail; 1057 ring->tail = tail;
991 kunmap_atomic(ring); 1058 kunmap_atomic(ring);
992 flush_dcache_page(ctx->ring_pages[0]); 1059 flush_dcache_page(ctx->ring_pages[0]);
993 1060
1061 ctx->completed_events++;
1062 if (ctx->completed_events > 1)
1063 refill_reqs_available(ctx, head, tail);
994 spin_unlock_irqrestore(&ctx->completion_lock, flags); 1064 spin_unlock_irqrestore(&ctx->completion_lock, flags);
995 1065
996 pr_debug("added to ring %p at [%u]\n", iocb, tail); 1066 pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1005,7 +1075,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1005 1075
1006 /* everything turned out well, dispose of the aiocb. */ 1076 /* everything turned out well, dispose of the aiocb. */
1007 kiocb_free(iocb); 1077 kiocb_free(iocb);
1008 put_reqs_available(ctx, 1);
1009 1078
1010 /* 1079 /*
1011 * We have to order our ring_info tail store above and test 1080 * We have to order our ring_info tail store above and test