diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 94 |
1 files changed, 87 insertions, 7 deletions
@@ -141,6 +141,7 @@ struct kioctx { | |||
141 | 141 | ||
142 | struct { | 142 | struct { |
143 | unsigned tail; | 143 | unsigned tail; |
144 | unsigned completed_events; | ||
144 | spinlock_t completion_lock; | 145 | spinlock_t completion_lock; |
145 | } ____cacheline_aligned_in_smp; | 146 | } ____cacheline_aligned_in_smp; |
146 | 147 | ||
@@ -660,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
660 | 661 | ||
661 | INIT_LIST_HEAD(&ctx->active_reqs); | 662 | INIT_LIST_HEAD(&ctx->active_reqs); |
662 | 663 | ||
663 | if (percpu_ref_init(&ctx->users, free_ioctx_users)) | 664 | if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL)) |
664 | goto err; | 665 | goto err; |
665 | 666 | ||
666 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) | 667 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL)) |
667 | goto err; | 668 | goto err; |
668 | 669 | ||
669 | ctx->cpu = alloc_percpu(struct kioctx_cpu); | 670 | ctx->cpu = alloc_percpu(struct kioctx_cpu); |
@@ -792,6 +793,8 @@ void exit_aio(struct mm_struct *mm) | |||
792 | 793 | ||
793 | for (i = 0; i < table->nr; ++i) { | 794 | for (i = 0; i < table->nr; ++i) { |
794 | struct kioctx *ctx = table->table[i]; | 795 | struct kioctx *ctx = table->table[i]; |
796 | struct completion requests_done = | ||
797 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | ||
795 | 798 | ||
796 | if (!ctx) | 799 | if (!ctx) |
797 | continue; | 800 | continue; |
@@ -803,7 +806,10 @@ void exit_aio(struct mm_struct *mm) | |||
803 | * that it needs to unmap the area, just set it to 0. | 806 | * that it needs to unmap the area, just set it to 0. |
804 | */ | 807 | */ |
805 | ctx->mmap_size = 0; | 808 | ctx->mmap_size = 0; |
806 | kill_ioctx(mm, ctx, NULL); | 809 | kill_ioctx(mm, ctx, &requests_done); |
810 | |||
811 | /* Wait until all IO for the context are done. */ | ||
812 | wait_for_completion(&requests_done); | ||
807 | } | 813 | } |
808 | 814 | ||
809 | RCU_INIT_POINTER(mm->ioctx_table, NULL); | 815 | RCU_INIT_POINTER(mm->ioctx_table, NULL); |
@@ -857,6 +863,68 @@ out: | |||
857 | return ret; | 863 | return ret; |
858 | } | 864 | } |
859 | 865 | ||
866 | /* refill_reqs_available | ||
867 | * Updates the reqs_available reference counts used for tracking the | ||
868 | * number of free slots in the completion ring. This can be called | ||
869 | * from aio_complete() (to optimistically update reqs_available) or | ||
870 | * from aio_get_req() (the we're out of events case). It must be | ||
871 | * called holding ctx->completion_lock. | ||
872 | */ | ||
873 | static void refill_reqs_available(struct kioctx *ctx, unsigned head, | ||
874 | unsigned tail) | ||
875 | { | ||
876 | unsigned events_in_ring, completed; | ||
877 | |||
878 | /* Clamp head since userland can write to it. */ | ||
879 | head %= ctx->nr_events; | ||
880 | if (head <= tail) | ||
881 | events_in_ring = tail - head; | ||
882 | else | ||
883 | events_in_ring = ctx->nr_events - (head - tail); | ||
884 | |||
885 | completed = ctx->completed_events; | ||
886 | if (events_in_ring < completed) | ||
887 | completed -= events_in_ring; | ||
888 | else | ||
889 | completed = 0; | ||
890 | |||
891 | if (!completed) | ||
892 | return; | ||
893 | |||
894 | ctx->completed_events -= completed; | ||
895 | put_reqs_available(ctx, completed); | ||
896 | } | ||
897 | |||
898 | /* user_refill_reqs_available | ||
899 | * Called to refill reqs_available when aio_get_req() encounters an | ||
900 | * out of space in the completion ring. | ||
901 | */ | ||
902 | static void user_refill_reqs_available(struct kioctx *ctx) | ||
903 | { | ||
904 | spin_lock_irq(&ctx->completion_lock); | ||
905 | if (ctx->completed_events) { | ||
906 | struct aio_ring *ring; | ||
907 | unsigned head; | ||
908 | |||
909 | /* Access of ring->head may race with aio_read_events_ring() | ||
910 | * here, but that's okay since whether we read the old version | ||
911 | * or the new version, and either will be valid. The important | ||
912 | * part is that head cannot pass tail since we prevent | ||
913 | * aio_complete() from updating tail by holding | ||
914 | * ctx->completion_lock. Even if head is invalid, the check | ||
915 | * against ctx->completed_events below will make sure we do the | ||
916 | * safe/right thing. | ||
917 | */ | ||
918 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
919 | head = ring->head; | ||
920 | kunmap_atomic(ring); | ||
921 | |||
922 | refill_reqs_available(ctx, head, ctx->tail); | ||
923 | } | ||
924 | |||
925 | spin_unlock_irq(&ctx->completion_lock); | ||
926 | } | ||
927 | |||
860 | /* aio_get_req | 928 | /* aio_get_req |
861 | * Allocate a slot for an aio request. | 929 | * Allocate a slot for an aio request. |
862 | * Returns NULL if no requests are free. | 930 | * Returns NULL if no requests are free. |
@@ -865,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
865 | { | 933 | { |
866 | struct kiocb *req; | 934 | struct kiocb *req; |
867 | 935 | ||
868 | if (!get_reqs_available(ctx)) | 936 | if (!get_reqs_available(ctx)) { |
869 | return NULL; | 937 | user_refill_reqs_available(ctx); |
938 | if (!get_reqs_available(ctx)) | ||
939 | return NULL; | ||
940 | } | ||
870 | 941 | ||
871 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); | 942 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); |
872 | if (unlikely(!req)) | 943 | if (unlikely(!req)) |
@@ -925,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
925 | struct kioctx *ctx = iocb->ki_ctx; | 996 | struct kioctx *ctx = iocb->ki_ctx; |
926 | struct aio_ring *ring; | 997 | struct aio_ring *ring; |
927 | struct io_event *ev_page, *event; | 998 | struct io_event *ev_page, *event; |
999 | unsigned tail, pos, head; | ||
928 | unsigned long flags; | 1000 | unsigned long flags; |
929 | unsigned tail, pos; | ||
930 | 1001 | ||
931 | /* | 1002 | /* |
932 | * Special case handling for sync iocbs: | 1003 | * Special case handling for sync iocbs: |
@@ -987,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
987 | ctx->tail = tail; | 1058 | ctx->tail = tail; |
988 | 1059 | ||
989 | ring = kmap_atomic(ctx->ring_pages[0]); | 1060 | ring = kmap_atomic(ctx->ring_pages[0]); |
1061 | head = ring->head; | ||
990 | ring->tail = tail; | 1062 | ring->tail = tail; |
991 | kunmap_atomic(ring); | 1063 | kunmap_atomic(ring); |
992 | flush_dcache_page(ctx->ring_pages[0]); | 1064 | flush_dcache_page(ctx->ring_pages[0]); |
993 | 1065 | ||
1066 | ctx->completed_events++; | ||
1067 | if (ctx->completed_events > 1) | ||
1068 | refill_reqs_available(ctx, head, tail); | ||
994 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | 1069 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
995 | 1070 | ||
996 | pr_debug("added to ring %p at [%u]\n", iocb, tail); | 1071 | pr_debug("added to ring %p at [%u]\n", iocb, tail); |
@@ -1005,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1005 | 1080 | ||
1006 | /* everything turned out well, dispose of the aiocb. */ | 1081 | /* everything turned out well, dispose of the aiocb. */ |
1007 | kiocb_free(iocb); | 1082 | kiocb_free(iocb); |
1008 | put_reqs_available(ctx, 1); | ||
1009 | 1083 | ||
1010 | /* | 1084 | /* |
1011 | * We have to order our ring_info tail store above and test | 1085 | * We have to order our ring_info tail store above and test |
@@ -1042,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
1042 | tail = ring->tail; | 1116 | tail = ring->tail; |
1043 | kunmap_atomic(ring); | 1117 | kunmap_atomic(ring); |
1044 | 1118 | ||
1119 | /* | ||
1120 | * Ensure that once we've read the current tail pointer, that | ||
1121 | * we also see the events that were stored up to the tail. | ||
1122 | */ | ||
1123 | smp_rmb(); | ||
1124 | |||
1045 | pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); | 1125 | pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); |
1046 | 1126 | ||
1047 | if (head == tail) | 1127 | if (head == tail) |