aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c120
1 files changed, 67 insertions, 53 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 062a5f6a1448..12a3de0ee6da 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -52,7 +52,8 @@
52struct aio_ring { 52struct aio_ring {
53 unsigned id; /* kernel internal index number */ 53 unsigned id; /* kernel internal index number */
54 unsigned nr; /* number of io_events */ 54 unsigned nr; /* number of io_events */
55 unsigned head; 55 unsigned head; /* Written to by userland or under ring_lock
56 * mutex by aio_read_events_ring(). */
56 unsigned tail; 57 unsigned tail;
57 58
58 unsigned magic; 59 unsigned magic;
@@ -243,6 +244,11 @@ static void aio_free_ring(struct kioctx *ctx)
243{ 244{
244 int i; 245 int i;
245 246
247 /* Disconnect the kiotx from the ring file. This prevents future
248 * accesses to the kioctx from page migration.
249 */
250 put_aio_ring_file(ctx);
251
246 for (i = 0; i < ctx->nr_pages; i++) { 252 for (i = 0; i < ctx->nr_pages; i++) {
247 struct page *page; 253 struct page *page;
248 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, 254 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
@@ -254,8 +260,6 @@ static void aio_free_ring(struct kioctx *ctx)
254 put_page(page); 260 put_page(page);
255 } 261 }
256 262
257 put_aio_ring_file(ctx);
258
259 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { 263 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
260 kfree(ctx->ring_pages); 264 kfree(ctx->ring_pages);
261 ctx->ring_pages = NULL; 265 ctx->ring_pages = NULL;
@@ -283,29 +287,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
283{ 287{
284 struct kioctx *ctx; 288 struct kioctx *ctx;
285 unsigned long flags; 289 unsigned long flags;
290 pgoff_t idx;
286 int rc; 291 int rc;
287 292
288 rc = 0; 293 rc = 0;
289 294
290 /* Make sure the old page hasn't already been changed */ 295 /* mapping->private_lock here protects against the kioctx teardown. */
291 spin_lock(&mapping->private_lock); 296 spin_lock(&mapping->private_lock);
292 ctx = mapping->private_data; 297 ctx = mapping->private_data;
293 if (ctx) { 298 if (!ctx) {
294 pgoff_t idx; 299 rc = -EINVAL;
295 spin_lock_irqsave(&ctx->completion_lock, flags); 300 goto out;
296 idx = old->index; 301 }
297 if (idx < (pgoff_t)ctx->nr_pages) { 302
298 if (ctx->ring_pages[idx] != old) 303 /* The ring_lock mutex. The prevents aio_read_events() from writing
299 rc = -EAGAIN; 304 * to the ring's head, and prevents page migration from mucking in
300 } else 305 * a partially initialized kiotx.
301 rc = -EINVAL; 306 */
302 spin_unlock_irqrestore(&ctx->completion_lock, flags); 307 if (!mutex_trylock(&ctx->ring_lock)) {
308 rc = -EAGAIN;
309 goto out;
310 }
311
312 idx = old->index;
313 if (idx < (pgoff_t)ctx->nr_pages) {
314 /* Make sure the old page hasn't already been changed */
315 if (ctx->ring_pages[idx] != old)
316 rc = -EAGAIN;
303 } else 317 } else
304 rc = -EINVAL; 318 rc = -EINVAL;
305 spin_unlock(&mapping->private_lock);
306 319
307 if (rc != 0) 320 if (rc != 0)
308 return rc; 321 goto out_unlock;
309 322
310 /* Writeback must be complete */ 323 /* Writeback must be complete */
311 BUG_ON(PageWriteback(old)); 324 BUG_ON(PageWriteback(old));
@@ -314,38 +327,26 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
314 rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); 327 rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
315 if (rc != MIGRATEPAGE_SUCCESS) { 328 if (rc != MIGRATEPAGE_SUCCESS) {
316 put_page(new); 329 put_page(new);
317 return rc; 330 goto out_unlock;
318 } 331 }
319 332
320 /* We can potentially race against kioctx teardown here. Use the 333 /* Take completion_lock to prevent other writes to the ring buffer
321 * address_space's private data lock to protect the mapping's 334 * while the old page is copied to the new. This prevents new
322 * private_data. 335 * events from being lost.
323 */ 336 */
324 spin_lock(&mapping->private_lock); 337 spin_lock_irqsave(&ctx->completion_lock, flags);
325 ctx = mapping->private_data; 338 migrate_page_copy(new, old);
326 if (ctx) { 339 BUG_ON(ctx->ring_pages[idx] != old);
327 pgoff_t idx; 340 ctx->ring_pages[idx] = new;
328 spin_lock_irqsave(&ctx->completion_lock, flags); 341 spin_unlock_irqrestore(&ctx->completion_lock, flags);
329 migrate_page_copy(new, old);
330 idx = old->index;
331 if (idx < (pgoff_t)ctx->nr_pages) {
332 /* And only do the move if things haven't changed */
333 if (ctx->ring_pages[idx] == old)
334 ctx->ring_pages[idx] = new;
335 else
336 rc = -EAGAIN;
337 } else
338 rc = -EINVAL;
339 spin_unlock_irqrestore(&ctx->completion_lock, flags);
340 } else
341 rc = -EBUSY;
342 spin_unlock(&mapping->private_lock);
343 342
344 if (rc == MIGRATEPAGE_SUCCESS) 343 /* The old page is no longer accessible. */
345 put_page(old); 344 put_page(old);
346 else
347 put_page(new);
348 345
346out_unlock:
347 mutex_unlock(&ctx->ring_lock);
348out:
349 spin_unlock(&mapping->private_lock);
349 return rc; 350 return rc;
350} 351}
351#endif 352#endif
@@ -380,7 +381,7 @@ static int aio_setup_ring(struct kioctx *ctx)
380 file = aio_private_file(ctx, nr_pages); 381 file = aio_private_file(ctx, nr_pages);
381 if (IS_ERR(file)) { 382 if (IS_ERR(file)) {
382 ctx->aio_ring_file = NULL; 383 ctx->aio_ring_file = NULL;
383 return -EAGAIN; 384 return -ENOMEM;
384 } 385 }
385 386
386 ctx->aio_ring_file = file; 387 ctx->aio_ring_file = file;
@@ -415,7 +416,7 @@ static int aio_setup_ring(struct kioctx *ctx)
415 416
416 if (unlikely(i != nr_pages)) { 417 if (unlikely(i != nr_pages)) {
417 aio_free_ring(ctx); 418 aio_free_ring(ctx);
418 return -EAGAIN; 419 return -ENOMEM;
419 } 420 }
420 421
421 ctx->mmap_size = nr_pages * PAGE_SIZE; 422 ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -429,7 +430,7 @@ static int aio_setup_ring(struct kioctx *ctx)
429 if (IS_ERR((void *)ctx->mmap_base)) { 430 if (IS_ERR((void *)ctx->mmap_base)) {
430 ctx->mmap_size = 0; 431 ctx->mmap_size = 0;
431 aio_free_ring(ctx); 432 aio_free_ring(ctx);
432 return -EAGAIN; 433 return -ENOMEM;
433 } 434 }
434 435
435 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); 436 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
@@ -556,6 +557,10 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
556 rcu_read_unlock(); 557 rcu_read_unlock();
557 spin_unlock(&mm->ioctx_lock); 558 spin_unlock(&mm->ioctx_lock);
558 559
560 /* While kioctx setup is in progress,
561 * we are protected from page migration
562 * changes ring_pages by ->ring_lock.
563 */
559 ring = kmap_atomic(ctx->ring_pages[0]); 564 ring = kmap_atomic(ctx->ring_pages[0]);
560 ring->id = ctx->id; 565 ring->id = ctx->id;
561 kunmap_atomic(ring); 566 kunmap_atomic(ring);
@@ -640,24 +645,28 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
640 645
641 ctx->max_reqs = nr_events; 646 ctx->max_reqs = nr_events;
642 647
643 if (percpu_ref_init(&ctx->users, free_ioctx_users))
644 goto err;
645
646 if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
647 goto err;
648
649 spin_lock_init(&ctx->ctx_lock); 648 spin_lock_init(&ctx->ctx_lock);
650 spin_lock_init(&ctx->completion_lock); 649 spin_lock_init(&ctx->completion_lock);
651 mutex_init(&ctx->ring_lock); 650 mutex_init(&ctx->ring_lock);
651 /* Protect against page migration throughout kiotx setup by keeping
652 * the ring_lock mutex held until setup is complete. */
653 mutex_lock(&ctx->ring_lock);
652 init_waitqueue_head(&ctx->wait); 654 init_waitqueue_head(&ctx->wait);
653 655
654 INIT_LIST_HEAD(&ctx->active_reqs); 656 INIT_LIST_HEAD(&ctx->active_reqs);
655 657
658 if (percpu_ref_init(&ctx->users, free_ioctx_users))
659 goto err;
660
661 if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
662 goto err;
663
656 ctx->cpu = alloc_percpu(struct kioctx_cpu); 664 ctx->cpu = alloc_percpu(struct kioctx_cpu);
657 if (!ctx->cpu) 665 if (!ctx->cpu)
658 goto err; 666 goto err;
659 667
660 if (aio_setup_ring(ctx) < 0) 668 err = aio_setup_ring(ctx);
669 if (err < 0)
661 goto err; 670 goto err;
662 671
663 atomic_set(&ctx->reqs_available, ctx->nr_events - 1); 672 atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
@@ -683,6 +692,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
683 if (err) 692 if (err)
684 goto err_cleanup; 693 goto err_cleanup;
685 694
695 /* Release the ring_lock mutex now that all setup is complete. */
696 mutex_unlock(&ctx->ring_lock);
697
686 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 698 pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
687 ctx, ctx->user_id, mm, ctx->nr_events); 699 ctx, ctx->user_id, mm, ctx->nr_events);
688 return ctx; 700 return ctx;
@@ -692,6 +704,7 @@ err_cleanup:
692err_ctx: 704err_ctx:
693 aio_free_ring(ctx); 705 aio_free_ring(ctx);
694err: 706err:
707 mutex_unlock(&ctx->ring_lock);
695 free_percpu(ctx->cpu); 708 free_percpu(ctx->cpu);
696 free_percpu(ctx->reqs.pcpu_count); 709 free_percpu(ctx->reqs.pcpu_count);
697 free_percpu(ctx->users.pcpu_count); 710 free_percpu(ctx->users.pcpu_count);
@@ -1024,6 +1037,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
1024 1037
1025 mutex_lock(&ctx->ring_lock); 1038 mutex_lock(&ctx->ring_lock);
1026 1039
1040 /* Access to ->ring_pages here is protected by ctx->ring_lock. */
1027 ring = kmap_atomic(ctx->ring_pages[0]); 1041 ring = kmap_atomic(ctx->ring_pages[0]);
1028 head = ring->head; 1042 head = ring->head;
1029 tail = ring->tail; 1043 tail = ring->tail;