aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-12-19 15:11:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-12-22 14:03:08 -0500
commit3dc9acb67600393249a795934ccdfc291a200e6b (patch)
tree1187e9b17ef208f2245700b933c55af4159e0af7 /fs
parentb7000adef17a5cce85636e40fa2c2d9851a89e28 (diff)
aio: clean up and fix aio_setup_ring page mapping
Since commit 36bc08cc01709 ("fs/aio: Add support to aio ring pages migration") the aio ring setup code has used a special per-ring backing inode for the page allocations, rather than just using random anonymous pages. However, rather than remembering the pages as it allocated them, it would allocate the pages, insert them into the file mapping (dirty, so that they couldn't be free'd), and then forget about them. And then to look them up again, it would mmap the mapping, and then use "get_user_pages()" to get back an array of the pages we just created. Now, not only is that incredibly inefficient, it also leaked all the pages if the mmap failed (which could happen due to excessive number of mappings, for example). So clean it all up, making it much more straightforward. Also remove some left-overs of the previous (broken) mm_populate() usage that was removed in commit d6c355c7dabc ("aio: fix race in ring buffer page lookup introduced by page migration support") but left the pointless and now misleading MAP_POPULATE flag around. Tested-and-acked-by: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c58
1 files changed, 23 insertions, 35 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6cb22e..643db8fc43c5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -326,7 +326,7 @@ static int aio_setup_ring(struct kioctx *ctx)
326 struct aio_ring *ring; 326 struct aio_ring *ring;
327 unsigned nr_events = ctx->max_reqs; 327 unsigned nr_events = ctx->max_reqs;
328 struct mm_struct *mm = current->mm; 328 struct mm_struct *mm = current->mm;
329 unsigned long size, populate; 329 unsigned long size, unused;
330 int nr_pages; 330 int nr_pages;
331 int i; 331 int i;
332 struct file *file; 332 struct file *file;
@@ -347,6 +347,20 @@ static int aio_setup_ring(struct kioctx *ctx)
347 return -EAGAIN; 347 return -EAGAIN;
348 } 348 }
349 349
350 ctx->aio_ring_file = file;
351 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
352 / sizeof(struct io_event);
353
354 ctx->ring_pages = ctx->internal_pages;
355 if (nr_pages > AIO_RING_PAGES) {
356 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
357 GFP_KERNEL);
358 if (!ctx->ring_pages) {
359 put_aio_ring_file(ctx);
360 return -ENOMEM;
361 }
362 }
363
350 for (i = 0; i < nr_pages; i++) { 364 for (i = 0; i < nr_pages; i++) {
351 struct page *page; 365 struct page *page;
352 page = find_or_create_page(file->f_inode->i_mapping, 366 page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,19 +372,14 @@ static int aio_setup_ring(struct kioctx *ctx)
358 SetPageUptodate(page); 372 SetPageUptodate(page);
359 SetPageDirty(page); 373 SetPageDirty(page);
360 unlock_page(page); 374 unlock_page(page);
375
376 ctx->ring_pages[i] = page;
361 } 377 }
362 ctx->aio_ring_file = file; 378 ctx->nr_pages = i;
363 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
364 / sizeof(struct io_event);
365 379
366 ctx->ring_pages = ctx->internal_pages; 380 if (unlikely(i != nr_pages)) {
367 if (nr_pages > AIO_RING_PAGES) { 381 aio_free_ring(ctx);
368 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), 382 return -EAGAIN;
369 GFP_KERNEL);
370 if (!ctx->ring_pages) {
371 put_aio_ring_file(ctx);
372 return -ENOMEM;
373 }
374 } 383 }
375 384
376 ctx->mmap_size = nr_pages * PAGE_SIZE; 385 ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -379,9 +388,9 @@ static int aio_setup_ring(struct kioctx *ctx)
379 down_write(&mm->mmap_sem); 388 down_write(&mm->mmap_sem);
380 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, 389 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
381 PROT_READ | PROT_WRITE, 390 PROT_READ | PROT_WRITE,
382 MAP_SHARED | MAP_POPULATE, 0, &populate); 391 MAP_SHARED, 0, &unused);
392 up_write(&mm->mmap_sem);
383 if (IS_ERR((void *)ctx->mmap_base)) { 393 if (IS_ERR((void *)ctx->mmap_base)) {
384 up_write(&mm->mmap_sem);
385 ctx->mmap_size = 0; 394 ctx->mmap_size = 0;
386 aio_free_ring(ctx); 395 aio_free_ring(ctx);
387 return -EAGAIN; 396 return -EAGAIN;
@@ -389,27 +398,6 @@ static int aio_setup_ring(struct kioctx *ctx)
389 398
390 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); 399 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
391 400
392 /* We must do this while still holding mmap_sem for write, as we
393 * need to be protected against userspace attempting to mremap()
394 * or munmap() the ring buffer.
395 */
396 ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
397 1, 0, ctx->ring_pages, NULL);
398
399 /* Dropping the reference here is safe as the page cache will hold
400 * onto the pages for us. It is also required so that page migration
401 * can unmap the pages and get the right reference count.
402 */
403 for (i = 0; i < ctx->nr_pages; i++)
404 put_page(ctx->ring_pages[i]);
405
406 up_write(&mm->mmap_sem);
407
408 if (unlikely(ctx->nr_pages != nr_pages)) {
409 aio_free_ring(ctx);
410 return -EAGAIN;
411 }
412
413 ctx->user_id = ctx->mmap_base; 401 ctx->user_id = ctx->mmap_base;
414 ctx->nr_events = nr_events; /* trusted copy */ 402 ctx->nr_events = nr_events; /* trusted copy */
415 403