aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c155
1 files changed, 74 insertions, 81 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 263ebce940c0..16239fbd6458 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -58,18 +58,6 @@ struct aio_ring {
58}; /* 128 bytes + ring size */ 58}; /* 128 bytes + ring size */
59 59
60#define AIO_RING_PAGES 8 60#define AIO_RING_PAGES 8
61struct aio_ring_info {
62 unsigned long mmap_base;
63 unsigned long mmap_size;
64
65 struct page **ring_pages;
66 struct mutex ring_lock;
67 long nr_pages;
68
69 unsigned nr, tail;
70
71 struct page *internal_pages[AIO_RING_PAGES];
72};
73 61
74struct kioctx { 62struct kioctx {
75 atomic_t users; 63 atomic_t users;
@@ -90,14 +78,30 @@ struct kioctx {
90 * This is what userspace passed to io_setup(), it's not used for 78 * This is what userspace passed to io_setup(), it's not used for
91 * anything but counting against the global max_reqs quota. 79 * anything but counting against the global max_reqs quota.
92 * 80 *
93 * The real limit is ring->nr - 1, which will be larger (see 81 * The real limit is nr_events - 1, which will be larger (see
94 * aio_setup_ring()) 82 * aio_setup_ring())
95 */ 83 */
96 unsigned max_reqs; 84 unsigned max_reqs;
97 85
98 struct aio_ring_info ring_info; 86 /* Size of ringbuffer, in units of struct io_event */
87 unsigned nr_events;
99 88
100 spinlock_t completion_lock; 89 unsigned long mmap_base;
90 unsigned long mmap_size;
91
92 struct page **ring_pages;
93 long nr_pages;
94
95 struct {
96 struct mutex ring_lock;
97 } ____cacheline_aligned;
98
99 struct {
100 unsigned tail;
101 spinlock_t completion_lock;
102 } ____cacheline_aligned;
103
104 struct page *internal_pages[AIO_RING_PAGES];
101 105
102 struct rcu_head rcu_head; 106 struct rcu_head rcu_head;
103 struct work_struct rcu_work; 107 struct work_struct rcu_work;
@@ -129,26 +133,21 @@ __initcall(aio_setup);
129 133
130static void aio_free_ring(struct kioctx *ctx) 134static void aio_free_ring(struct kioctx *ctx)
131{ 135{
132 struct aio_ring_info *info = &ctx->ring_info;
133 long i; 136 long i;
134 137
135 for (i=0; i<info->nr_pages; i++) 138 for (i = 0; i < ctx->nr_pages; i++)
136 put_page(info->ring_pages[i]); 139 put_page(ctx->ring_pages[i]);
137 140
138 if (info->mmap_size) { 141 if (ctx->mmap_size)
139 vm_munmap(info->mmap_base, info->mmap_size); 142 vm_munmap(ctx->mmap_base, ctx->mmap_size);
140 }
141 143
142 if (info->ring_pages && info->ring_pages != info->internal_pages) 144 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
143 kfree(info->ring_pages); 145 kfree(ctx->ring_pages);
144 info->ring_pages = NULL;
145 info->nr = 0;
146} 146}
147 147
148static int aio_setup_ring(struct kioctx *ctx) 148static int aio_setup_ring(struct kioctx *ctx)
149{ 149{
150 struct aio_ring *ring; 150 struct aio_ring *ring;
151 struct aio_ring_info *info = &ctx->ring_info;
152 unsigned nr_events = ctx->max_reqs; 151 unsigned nr_events = ctx->max_reqs;
153 struct mm_struct *mm = current->mm; 152 struct mm_struct *mm = current->mm;
154 unsigned long size, populate; 153 unsigned long size, populate;
@@ -166,45 +165,44 @@ static int aio_setup_ring(struct kioctx *ctx)
166 165
167 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); 166 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
168 167
169 info->nr = 0; 168 ctx->nr_events = 0;
170 info->ring_pages = info->internal_pages; 169 ctx->ring_pages = ctx->internal_pages;
171 if (nr_pages > AIO_RING_PAGES) { 170 if (nr_pages > AIO_RING_PAGES) {
172 info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 171 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
173 if (!info->ring_pages) 172 GFP_KERNEL);
173 if (!ctx->ring_pages)
174 return -ENOMEM; 174 return -ENOMEM;
175 } 175 }
176 176
177 info->mmap_size = nr_pages * PAGE_SIZE; 177 ctx->mmap_size = nr_pages * PAGE_SIZE;
178 pr_debug("attempting mmap of %lu bytes\n", info->mmap_size); 178 pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
179 down_write(&mm->mmap_sem); 179 down_write(&mm->mmap_sem);
180 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 180 ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size,
181 PROT_READ|PROT_WRITE, 181 PROT_READ|PROT_WRITE,
182 MAP_ANONYMOUS|MAP_PRIVATE, 0, 182 MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate);
183 &populate); 183 if (IS_ERR((void *)ctx->mmap_base)) {
184 if (IS_ERR((void *)info->mmap_base)) {
185 up_write(&mm->mmap_sem); 184 up_write(&mm->mmap_sem);
186 info->mmap_size = 0; 185 ctx->mmap_size = 0;
187 aio_free_ring(ctx); 186 aio_free_ring(ctx);
188 return -EAGAIN; 187 return -EAGAIN;
189 } 188 }
190 189
191 pr_debug("mmap address: 0x%08lx\n", info->mmap_base); 190 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
192 info->nr_pages = get_user_pages(current, mm, info->mmap_base, nr_pages, 191 ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
193 1, 0, info->ring_pages, NULL); 192 1, 0, ctx->ring_pages, NULL);
194 up_write(&mm->mmap_sem); 193 up_write(&mm->mmap_sem);
195 194
196 if (unlikely(info->nr_pages != nr_pages)) { 195 if (unlikely(ctx->nr_pages != nr_pages)) {
197 aio_free_ring(ctx); 196 aio_free_ring(ctx);
198 return -EAGAIN; 197 return -EAGAIN;
199 } 198 }
200 if (populate) 199 if (populate)
201 mm_populate(info->mmap_base, populate); 200 mm_populate(ctx->mmap_base, populate);
202 201
203 ctx->user_id = info->mmap_base; 202 ctx->user_id = ctx->mmap_base;
203 ctx->nr_events = nr_events; /* trusted copy */
204 204
205 info->nr = nr_events; /* trusted copy */ 205 ring = kmap_atomic(ctx->ring_pages[0]);
206
207 ring = kmap_atomic(info->ring_pages[0]);
208 ring->nr = nr_events; /* user copy */ 206 ring->nr = nr_events; /* user copy */
209 ring->id = ctx->user_id; 207 ring->id = ctx->user_id;
210 ring->head = ring->tail = 0; 208 ring->head = ring->tail = 0;
@@ -213,7 +211,7 @@ static int aio_setup_ring(struct kioctx *ctx)
213 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 211 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
214 ring->header_length = sizeof(struct aio_ring); 212 ring->header_length = sizeof(struct aio_ring);
215 kunmap_atomic(ring); 213 kunmap_atomic(ring);
216 flush_dcache_page(info->ring_pages[0]); 214 flush_dcache_page(ctx->ring_pages[0]);
217 215
218 return 0; 216 return 0;
219} 217}
@@ -284,7 +282,6 @@ static void free_ioctx_rcu(struct rcu_head *head)
284 */ 282 */
285static void free_ioctx(struct kioctx *ctx) 283static void free_ioctx(struct kioctx *ctx)
286{ 284{
287 struct aio_ring_info *info = &ctx->ring_info;
288 struct aio_ring *ring; 285 struct aio_ring *ring;
289 struct io_event res; 286 struct io_event res;
290 struct kiocb *req; 287 struct kiocb *req;
@@ -302,18 +299,18 @@ static void free_ioctx(struct kioctx *ctx)
302 299
303 spin_unlock_irq(&ctx->ctx_lock); 300 spin_unlock_irq(&ctx->ctx_lock);
304 301
305 ring = kmap_atomic(info->ring_pages[0]); 302 ring = kmap_atomic(ctx->ring_pages[0]);
306 head = ring->head; 303 head = ring->head;
307 kunmap_atomic(ring); 304 kunmap_atomic(ring);
308 305
309 while (atomic_read(&ctx->reqs_active) > 0) { 306 while (atomic_read(&ctx->reqs_active) > 0) {
310 wait_event(ctx->wait, head != info->tail); 307 wait_event(ctx->wait, head != ctx->tail);
311 308
312 avail = (head <= info->tail ? info->tail : info->nr) - head; 309 avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
313 310
314 atomic_sub(avail, &ctx->reqs_active); 311 atomic_sub(avail, &ctx->reqs_active);
315 head += avail; 312 head += avail;
316 head %= info->nr; 313 head %= ctx->nr_events;
317 } 314 }
318 315
319 WARN_ON(atomic_read(&ctx->reqs_active) < 0); 316 WARN_ON(atomic_read(&ctx->reqs_active) < 0);
@@ -372,7 +369,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
372 atomic_set(&ctx->dead, 0); 369 atomic_set(&ctx->dead, 0);
373 spin_lock_init(&ctx->ctx_lock); 370 spin_lock_init(&ctx->ctx_lock);
374 spin_lock_init(&ctx->completion_lock); 371 spin_lock_init(&ctx->completion_lock);
375 mutex_init(&ctx->ring_info.ring_lock); 372 mutex_init(&ctx->ring_lock);
376 init_waitqueue_head(&ctx->wait); 373 init_waitqueue_head(&ctx->wait);
377 374
378 INIT_LIST_HEAD(&ctx->active_reqs); 375 INIT_LIST_HEAD(&ctx->active_reqs);
@@ -396,7 +393,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
396 spin_unlock(&mm->ioctx_lock); 393 spin_unlock(&mm->ioctx_lock);
397 394
398 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 395 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
399 ctx, ctx->user_id, mm, ctx->ring_info.nr); 396 ctx, ctx->user_id, mm, ctx->nr_events);
400 return ctx; 397 return ctx;
401 398
402out_cleanup: 399out_cleanup:
@@ -491,7 +488,7 @@ void exit_aio(struct mm_struct *mm)
491 * just set it to 0; aio_free_ring() is the only 488 * just set it to 0; aio_free_ring() is the only
492 * place that uses ->mmap_size, so it's safe. 489 * place that uses ->mmap_size, so it's safe.
493 */ 490 */
494 ctx->ring_info.mmap_size = 0; 491 ctx->mmap_size = 0;
495 492
496 if (!atomic_xchg(&ctx->dead, 1)) { 493 if (!atomic_xchg(&ctx->dead, 1)) {
497 hlist_del_rcu(&ctx->list); 494 hlist_del_rcu(&ctx->list);
@@ -514,10 +511,10 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
514{ 511{
515 struct kiocb *req; 512 struct kiocb *req;
516 513
517 if (atomic_read(&ctx->reqs_active) >= ctx->ring_info.nr) 514 if (atomic_read(&ctx->reqs_active) >= ctx->nr_events)
518 return NULL; 515 return NULL;
519 516
520 if (atomic_inc_return(&ctx->reqs_active) > ctx->ring_info.nr - 1) 517 if (atomic_inc_return(&ctx->reqs_active) > ctx->nr_events - 1)
521 goto out_put; 518 goto out_put;
522 519
523 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); 520 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
@@ -578,7 +575,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
578void aio_complete(struct kiocb *iocb, long res, long res2) 575void aio_complete(struct kiocb *iocb, long res, long res2)
579{ 576{
580 struct kioctx *ctx = iocb->ki_ctx; 577 struct kioctx *ctx = iocb->ki_ctx;
581 struct aio_ring_info *info;
582 struct aio_ring *ring; 578 struct aio_ring *ring;
583 struct io_event *ev_page, *event; 579 struct io_event *ev_page, *event;
584 unsigned long flags; 580 unsigned long flags;
@@ -599,8 +595,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
599 return; 595 return;
600 } 596 }
601 597
602 info = &ctx->ring_info;
603
604 /* 598 /*
605 * Take rcu_read_lock() in case the kioctx is being destroyed, as we 599 * Take rcu_read_lock() in case the kioctx is being destroyed, as we
606 * need to issue a wakeup after decrementing reqs_active. 600 * need to issue a wakeup after decrementing reqs_active.
@@ -633,13 +627,13 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
633 */ 627 */
634 spin_lock_irqsave(&ctx->completion_lock, flags); 628 spin_lock_irqsave(&ctx->completion_lock, flags);
635 629
636 tail = info->tail; 630 tail = ctx->tail;
637 pos = tail + AIO_EVENTS_OFFSET; 631 pos = tail + AIO_EVENTS_OFFSET;
638 632
639 if (++tail >= info->nr) 633 if (++tail >= ctx->nr_events)
640 tail = 0; 634 tail = 0;
641 635
642 ev_page = kmap_atomic(info->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 636 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
643 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 637 event = ev_page + pos % AIO_EVENTS_PER_PAGE;
644 638
645 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 639 event->obj = (u64)(unsigned long)iocb->ki_obj.user;
@@ -648,7 +642,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
648 event->res2 = res2; 642 event->res2 = res2;
649 643
650 kunmap_atomic(ev_page); 644 kunmap_atomic(ev_page);
651 flush_dcache_page(info->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 645 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
652 646
653 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", 647 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
654 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, 648 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
@@ -659,12 +653,12 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
659 */ 653 */
660 smp_wmb(); /* make event visible before updating tail */ 654 smp_wmb(); /* make event visible before updating tail */
661 655
662 info->tail = tail; 656 ctx->tail = tail;
663 657
664 ring = kmap_atomic(info->ring_pages[0]); 658 ring = kmap_atomic(ctx->ring_pages[0]);
665 ring->tail = tail; 659 ring->tail = tail;
666 kunmap_atomic(ring); 660 kunmap_atomic(ring);
667 flush_dcache_page(info->ring_pages[0]); 661 flush_dcache_page(ctx->ring_pages[0]);
668 662
669 spin_unlock_irqrestore(&ctx->completion_lock, flags); 663 spin_unlock_irqrestore(&ctx->completion_lock, flags);
670 664
@@ -704,21 +698,20 @@ EXPORT_SYMBOL(aio_complete);
704static long aio_read_events_ring(struct kioctx *ctx, 698static long aio_read_events_ring(struct kioctx *ctx,
705 struct io_event __user *event, long nr) 699 struct io_event __user *event, long nr)
706{ 700{
707 struct aio_ring_info *info = &ctx->ring_info;
708 struct aio_ring *ring; 701 struct aio_ring *ring;
709 unsigned head, pos; 702 unsigned head, pos;
710 long ret = 0; 703 long ret = 0;
711 int copy_ret; 704 int copy_ret;
712 705
713 mutex_lock(&info->ring_lock); 706 mutex_lock(&ctx->ring_lock);
714 707
715 ring = kmap_atomic(info->ring_pages[0]); 708 ring = kmap_atomic(ctx->ring_pages[0]);
716 head = ring->head; 709 head = ring->head;
717 kunmap_atomic(ring); 710 kunmap_atomic(ring);
718 711
719 pr_debug("h%u t%u m%u\n", head, info->tail, info->nr); 712 pr_debug("h%u t%u m%u\n", head, ctx->tail, ctx->nr_events);
720 713
721 if (head == info->tail) 714 if (head == ctx->tail)
722 goto out; 715 goto out;
723 716
724 while (ret < nr) { 717 while (ret < nr) {
@@ -726,8 +719,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
726 struct io_event *ev; 719 struct io_event *ev;
727 struct page *page; 720 struct page *page;
728 721
729 avail = (head <= info->tail ? info->tail : info->nr) - head; 722 avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
730 if (head == info->tail) 723 if (head == ctx->tail)
731 break; 724 break;
732 725
733 avail = min(avail, nr - ret); 726 avail = min(avail, nr - ret);
@@ -735,7 +728,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
735 ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); 728 ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
736 729
737 pos = head + AIO_EVENTS_OFFSET; 730 pos = head + AIO_EVENTS_OFFSET;
738 page = info->ring_pages[pos / AIO_EVENTS_PER_PAGE]; 731 page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
739 pos %= AIO_EVENTS_PER_PAGE; 732 pos %= AIO_EVENTS_PER_PAGE;
740 733
741 ev = kmap(page); 734 ev = kmap(page);
@@ -750,19 +743,19 @@ static long aio_read_events_ring(struct kioctx *ctx,
750 743
751 ret += avail; 744 ret += avail;
752 head += avail; 745 head += avail;
753 head %= info->nr; 746 head %= ctx->nr_events;
754 } 747 }
755 748
756 ring = kmap_atomic(info->ring_pages[0]); 749 ring = kmap_atomic(ctx->ring_pages[0]);
757 ring->head = head; 750 ring->head = head;
758 kunmap_atomic(ring); 751 kunmap_atomic(ring);
759 flush_dcache_page(info->ring_pages[0]); 752 flush_dcache_page(ctx->ring_pages[0]);
760 753
761 pr_debug("%li h%u t%u\n", ret, head, info->tail); 754 pr_debug("%li h%u t%u\n", ret, head, ctx->tail);
762 755
763 atomic_sub(ret, &ctx->reqs_active); 756 atomic_sub(ret, &ctx->reqs_active);
764out: 757out:
765 mutex_unlock(&info->ring_lock); 758 mutex_unlock(&ctx->ring_lock);
766 759
767 return ret; 760 return ret;
768} 761}