diff options
Diffstat (limited to 'fs/aio.c')
| -rw-r--r-- | fs/aio.c | 90 |
1 files changed, 85 insertions, 5 deletions
| @@ -141,6 +141,7 @@ struct kioctx { | |||
| 141 | 141 | ||
| 142 | struct { | 142 | struct { |
| 143 | unsigned tail; | 143 | unsigned tail; |
| 144 | unsigned completed_events; | ||
| 144 | spinlock_t completion_lock; | 145 | spinlock_t completion_lock; |
| 145 | } ____cacheline_aligned_in_smp; | 146 | } ____cacheline_aligned_in_smp; |
| 146 | 147 | ||
| @@ -792,6 +793,8 @@ void exit_aio(struct mm_struct *mm) | |||
| 792 | 793 | ||
| 793 | for (i = 0; i < table->nr; ++i) { | 794 | for (i = 0; i < table->nr; ++i) { |
| 794 | struct kioctx *ctx = table->table[i]; | 795 | struct kioctx *ctx = table->table[i]; |
| 796 | struct completion requests_done = | ||
| 797 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | ||
| 795 | 798 | ||
| 796 | if (!ctx) | 799 | if (!ctx) |
| 797 | continue; | 800 | continue; |
| @@ -803,7 +806,10 @@ void exit_aio(struct mm_struct *mm) | |||
| 803 | * that it needs to unmap the area, just set it to 0. | 806 | * that it needs to unmap the area, just set it to 0. |
| 804 | */ | 807 | */ |
| 805 | ctx->mmap_size = 0; | 808 | ctx->mmap_size = 0; |
| 806 | kill_ioctx(mm, ctx, NULL); | 809 | kill_ioctx(mm, ctx, &requests_done); |
| 810 | |||
| 811 | /* Wait until all IO for the context are done. */ | ||
| 812 | wait_for_completion(&requests_done); | ||
| 807 | } | 813 | } |
| 808 | 814 | ||
| 809 | RCU_INIT_POINTER(mm->ioctx_table, NULL); | 815 | RCU_INIT_POINTER(mm->ioctx_table, NULL); |
| @@ -857,6 +863,68 @@ out: | |||
| 857 | return ret; | 863 | return ret; |
| 858 | } | 864 | } |
| 859 | 865 | ||
| 866 | /* refill_reqs_available | ||
| 867 | * Updates the reqs_available reference counts used for tracking the | ||
| 868 | * number of free slots in the completion ring. This can be called | ||
| 869 | * from aio_complete() (to optimistically update reqs_available) or | ||
| 870 | * from aio_get_req() (the we're out of events case). It must be | ||
| 871 | * called holding ctx->completion_lock. | ||
| 872 | */ | ||
| 873 | static void refill_reqs_available(struct kioctx *ctx, unsigned head, | ||
| 874 | unsigned tail) | ||
| 875 | { | ||
| 876 | unsigned events_in_ring, completed; | ||
| 877 | |||
| 878 | /* Clamp head since userland can write to it. */ | ||
| 879 | head %= ctx->nr_events; | ||
| 880 | if (head <= tail) | ||
| 881 | events_in_ring = tail - head; | ||
| 882 | else | ||
| 883 | events_in_ring = ctx->nr_events - (head - tail); | ||
| 884 | |||
| 885 | completed = ctx->completed_events; | ||
| 886 | if (events_in_ring < completed) | ||
| 887 | completed -= events_in_ring; | ||
| 888 | else | ||
| 889 | completed = 0; | ||
| 890 | |||
| 891 | if (!completed) | ||
| 892 | return; | ||
| 893 | |||
| 894 | ctx->completed_events -= completed; | ||
| 895 | put_reqs_available(ctx, completed); | ||
| 896 | } | ||
| 897 | |||
| 898 | /* user_refill_reqs_available | ||
| 899 | * Called to refill reqs_available when aio_get_req() encounters an | ||
| 900 | * out of space in the completion ring. | ||
| 901 | */ | ||
| 902 | static void user_refill_reqs_available(struct kioctx *ctx) | ||
| 903 | { | ||
| 904 | spin_lock_irq(&ctx->completion_lock); | ||
| 905 | if (ctx->completed_events) { | ||
| 906 | struct aio_ring *ring; | ||
| 907 | unsigned head; | ||
| 908 | |||
| 909 | /* Access of ring->head may race with aio_read_events_ring() | ||
| 910 | * here, but that's okay since whether we read the old version | ||
| 911 | * or the new version, and either will be valid. The important | ||
| 912 | * part is that head cannot pass tail since we prevent | ||
| 913 | * aio_complete() from updating tail by holding | ||
| 914 | * ctx->completion_lock. Even if head is invalid, the check | ||
| 915 | * against ctx->completed_events below will make sure we do the | ||
| 916 | * safe/right thing. | ||
| 917 | */ | ||
| 918 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
| 919 | head = ring->head; | ||
| 920 | kunmap_atomic(ring); | ||
| 921 | |||
| 922 | refill_reqs_available(ctx, head, ctx->tail); | ||
| 923 | } | ||
| 924 | |||
| 925 | spin_unlock_irq(&ctx->completion_lock); | ||
| 926 | } | ||
| 927 | |||
| 860 | /* aio_get_req | 928 | /* aio_get_req |
| 861 | * Allocate a slot for an aio request. | 929 | * Allocate a slot for an aio request. |
| 862 | * Returns NULL if no requests are free. | 930 | * Returns NULL if no requests are free. |
| @@ -865,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
| 865 | { | 933 | { |
| 866 | struct kiocb *req; | 934 | struct kiocb *req; |
| 867 | 935 | ||
| 868 | if (!get_reqs_available(ctx)) | 936 | if (!get_reqs_available(ctx)) { |
| 869 | return NULL; | 937 | user_refill_reqs_available(ctx); |
| 938 | if (!get_reqs_available(ctx)) | ||
| 939 | return NULL; | ||
| 940 | } | ||
| 870 | 941 | ||
| 871 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); | 942 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); |
| 872 | if (unlikely(!req)) | 943 | if (unlikely(!req)) |
| @@ -925,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 925 | struct kioctx *ctx = iocb->ki_ctx; | 996 | struct kioctx *ctx = iocb->ki_ctx; |
| 926 | struct aio_ring *ring; | 997 | struct aio_ring *ring; |
| 927 | struct io_event *ev_page, *event; | 998 | struct io_event *ev_page, *event; |
| 999 | unsigned tail, pos, head; | ||
| 928 | unsigned long flags; | 1000 | unsigned long flags; |
| 929 | unsigned tail, pos; | ||
| 930 | 1001 | ||
| 931 | /* | 1002 | /* |
| 932 | * Special case handling for sync iocbs: | 1003 | * Special case handling for sync iocbs: |
| @@ -987,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 987 | ctx->tail = tail; | 1058 | ctx->tail = tail; |
| 988 | 1059 | ||
| 989 | ring = kmap_atomic(ctx->ring_pages[0]); | 1060 | ring = kmap_atomic(ctx->ring_pages[0]); |
| 1061 | head = ring->head; | ||
| 990 | ring->tail = tail; | 1062 | ring->tail = tail; |
| 991 | kunmap_atomic(ring); | 1063 | kunmap_atomic(ring); |
| 992 | flush_dcache_page(ctx->ring_pages[0]); | 1064 | flush_dcache_page(ctx->ring_pages[0]); |
| 993 | 1065 | ||
| 1066 | ctx->completed_events++; | ||
| 1067 | if (ctx->completed_events > 1) | ||
| 1068 | refill_reqs_available(ctx, head, tail); | ||
| 994 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | 1069 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
| 995 | 1070 | ||
| 996 | pr_debug("added to ring %p at [%u]\n", iocb, tail); | 1071 | pr_debug("added to ring %p at [%u]\n", iocb, tail); |
| @@ -1005,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1005 | 1080 | ||
| 1006 | /* everything turned out well, dispose of the aiocb. */ | 1081 | /* everything turned out well, dispose of the aiocb. */ |
| 1007 | kiocb_free(iocb); | 1082 | kiocb_free(iocb); |
| 1008 | put_reqs_available(ctx, 1); | ||
| 1009 | 1083 | ||
| 1010 | /* | 1084 | /* |
| 1011 | * We have to order our ring_info tail store above and test | 1085 | * We have to order our ring_info tail store above and test |
| @@ -1042,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
| 1042 | tail = ring->tail; | 1116 | tail = ring->tail; |
| 1043 | kunmap_atomic(ring); | 1117 | kunmap_atomic(ring); |
| 1044 | 1118 | ||
| 1119 | /* | ||
| 1120 | * Ensure that once we've read the current tail pointer, that | ||
| 1121 | * we also see the events that were stored up to the tail. | ||
| 1122 | */ | ||
| 1123 | smp_rmb(); | ||
| 1124 | |||
| 1045 | pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); | 1125 | pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); |
| 1046 | 1126 | ||
| 1047 | if (head == tail) | 1127 | if (head == tail) |
