aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c220
1 files changed, 139 insertions, 81 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a3b4021c448f..d78f577baef2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1587,6 +1587,9 @@ EXPORT_SYMBOL(find_lock_entry);
1587 * @gfp_mask and added to the page cache and the VM's LRU 1587 * @gfp_mask and added to the page cache and the VM's LRU
1588 * list. The page is returned locked and with an increased 1588 * list. The page is returned locked and with an increased
1589 * refcount. 1589 * refcount.
1590 * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
1591 * its own locking dance if the page is already in cache, or unlock the page
1592 * before returning if we had to add the page to pagecache.
1590 * 1593 *
1591 * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even 1594 * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
1592 * if the GFP flags specified for FGP_CREAT are atomic. 1595 * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1641,7 +1644,7 @@ no_page:
1641 if (!page) 1644 if (!page)
1642 return NULL; 1645 return NULL;
1643 1646
1644 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) 1647 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
1645 fgp_flags |= FGP_LOCK; 1648 fgp_flags |= FGP_LOCK;
1646 1649
1647 /* Init accessed so avoid atomic mark_page_accessed later */ 1650 /* Init accessed so avoid atomic mark_page_accessed later */
@@ -1655,6 +1658,13 @@ no_page:
1655 if (err == -EEXIST) 1658 if (err == -EEXIST)
1656 goto repeat; 1659 goto repeat;
1657 } 1660 }
1661
1662 /*
1663 * add_to_page_cache_lru locks the page, and for mmap we expect
1664 * an unlocked page.
1665 */
1666 if (page && (fgp_flags & FGP_FOR_MMAP))
1667 unlock_page(page);
1658 } 1668 }
1659 1669
1660 return page; 1670 return page;
@@ -2379,64 +2389,98 @@ out:
2379EXPORT_SYMBOL(generic_file_read_iter); 2389EXPORT_SYMBOL(generic_file_read_iter);
2380 2390
2381#ifdef CONFIG_MMU 2391#ifdef CONFIG_MMU
2382/** 2392#define MMAP_LOTSAMISS (100)
2383 * page_cache_read - adds requested page to the page cache if not already there 2393static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
2384 * @file: file to read 2394 struct file *fpin)
2385 * @offset: page index
2386 * @gfp_mask: memory allocation flags
2387 *
2388 * This adds the requested page to the page cache if it isn't already there,
2389 * and schedules an I/O to read in its contents from disk.
2390 *
2391 * Return: %0 on success, negative error code otherwise.
2392 */
2393static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
2394{ 2395{
2395 struct address_space *mapping = file->f_mapping; 2396 int flags = vmf->flags;
2396 struct page *page;
2397 int ret;
2398 2397
2399 do { 2398 if (fpin)
2400 page = __page_cache_alloc(gfp_mask); 2399 return fpin;
2401 if (!page)
2402 return -ENOMEM;
2403 2400
2404 ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask); 2401 /*
2405 if (ret == 0) 2402 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
2406 ret = mapping->a_ops->readpage(file, page); 2403 * anything, so we only pin the file and drop the mmap_sem if only
2407 else if (ret == -EEXIST) 2404 * FAULT_FLAG_ALLOW_RETRY is set.
2408 ret = 0; /* losing race to add is OK */ 2405 */
2406 if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
2407 FAULT_FLAG_ALLOW_RETRY) {
2408 fpin = get_file(vmf->vma->vm_file);
2409 up_read(&vmf->vma->vm_mm->mmap_sem);
2410 }
2411 return fpin;
2412}
2409 2413
2410 put_page(page); 2414/*
2415 * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
2416 * @vmf - the vm_fault for this fault.
2417 * @page - the page to lock.
2418 * @fpin - the pointer to the file we may pin (or is already pinned).
2419 *
2420 * This works similar to lock_page_or_retry in that it can drop the mmap_sem.
2421 * It differs in that it actually returns the page locked if it returns 1 and 0
2422 * if it couldn't lock the page. If we did have to drop the mmap_sem then fpin
2423 * will point to the pinned file and needs to be fput()'ed at a later point.
2424 */
2425static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
2426 struct file **fpin)
2427{
2428 if (trylock_page(page))
2429 return 1;
2411 2430
2412 } while (ret == AOP_TRUNCATED_PAGE); 2431 /*
2432 * NOTE! This will make us return with VM_FAULT_RETRY, but with
2433 * the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT
2434 * is supposed to work. We have way too many special cases..
2435 */
2436 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2437 return 0;
2413 2438
2414 return ret; 2439 *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
2440 if (vmf->flags & FAULT_FLAG_KILLABLE) {
2441 if (__lock_page_killable(page)) {
2442 /*
2443 * We didn't have the right flags to drop the mmap_sem,
2444 * but all fault_handlers only check for fatal signals
2445 * if we return VM_FAULT_RETRY, so we need to drop the
2446 * mmap_sem here and return 0 if we don't have a fpin.
2447 */
2448 if (*fpin == NULL)
2449 up_read(&vmf->vma->vm_mm->mmap_sem);
2450 return 0;
2451 }
2452 } else
2453 __lock_page(page);
2454 return 1;
2415} 2455}
2416 2456
2417#define MMAP_LOTSAMISS (100)
2418 2457
2419/* 2458/*
2420 * Synchronous readahead happens when we don't even find 2459 * Synchronous readahead happens when we don't even find a page in the page
2421 * a page in the page cache at all. 2460 * cache at all. We don't want to perform IO under the mmap sem, so if we have
2461 * to drop the mmap sem we return the file that was pinned in order for us to do
2462 * that. If we didn't pin a file then we return NULL. The file that is
2463 * returned needs to be fput()'ed when we're done with it.
2422 */ 2464 */
2423static void do_sync_mmap_readahead(struct vm_area_struct *vma, 2465static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
2424 struct file_ra_state *ra,
2425 struct file *file,
2426 pgoff_t offset)
2427{ 2466{
2467 struct file *file = vmf->vma->vm_file;
2468 struct file_ra_state *ra = &file->f_ra;
2428 struct address_space *mapping = file->f_mapping; 2469 struct address_space *mapping = file->f_mapping;
2470 struct file *fpin = NULL;
2471 pgoff_t offset = vmf->pgoff;
2429 2472
2430 /* If we don't want any read-ahead, don't bother */ 2473 /* If we don't want any read-ahead, don't bother */
2431 if (vma->vm_flags & VM_RAND_READ) 2474 if (vmf->vma->vm_flags & VM_RAND_READ)
2432 return; 2475 return fpin;
2433 if (!ra->ra_pages) 2476 if (!ra->ra_pages)
2434 return; 2477 return fpin;
2435 2478
2436 if (vma->vm_flags & VM_SEQ_READ) { 2479 if (vmf->vma->vm_flags & VM_SEQ_READ) {
2480 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2437 page_cache_sync_readahead(mapping, ra, file, offset, 2481 page_cache_sync_readahead(mapping, ra, file, offset,
2438 ra->ra_pages); 2482 ra->ra_pages);
2439 return; 2483 return fpin;
2440 } 2484 }
2441 2485
2442 /* Avoid banging the cache line if not needed */ 2486 /* Avoid banging the cache line if not needed */
@@ -2448,37 +2492,44 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
2448 * stop bothering with read-ahead. It will only hurt. 2492 * stop bothering with read-ahead. It will only hurt.
2449 */ 2493 */
2450 if (ra->mmap_miss > MMAP_LOTSAMISS) 2494 if (ra->mmap_miss > MMAP_LOTSAMISS)
2451 return; 2495 return fpin;
2452 2496
2453 /* 2497 /*
2454 * mmap read-around 2498 * mmap read-around
2455 */ 2499 */
2500 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2456 ra->start = max_t(long, 0, offset - ra->ra_pages / 2); 2501 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
2457 ra->size = ra->ra_pages; 2502 ra->size = ra->ra_pages;
2458 ra->async_size = ra->ra_pages / 4; 2503 ra->async_size = ra->ra_pages / 4;
2459 ra_submit(ra, mapping, file); 2504 ra_submit(ra, mapping, file);
2505 return fpin;
2460} 2506}
2461 2507
2462/* 2508/*
2463 * Asynchronous readahead happens when we find the page and PG_readahead, 2509 * Asynchronous readahead happens when we find the page and PG_readahead,
2464 * so we want to possibly extend the readahead further.. 2510 * so we want to possibly extend the readahead further. We return the file that
2511 * was pinned if we have to drop the mmap_sem in order to do IO.
2465 */ 2512 */
2466static void do_async_mmap_readahead(struct vm_area_struct *vma, 2513static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
2467 struct file_ra_state *ra, 2514 struct page *page)
2468 struct file *file,
2469 struct page *page,
2470 pgoff_t offset)
2471{ 2515{
2516 struct file *file = vmf->vma->vm_file;
2517 struct file_ra_state *ra = &file->f_ra;
2472 struct address_space *mapping = file->f_mapping; 2518 struct address_space *mapping = file->f_mapping;
2519 struct file *fpin = NULL;
2520 pgoff_t offset = vmf->pgoff;
2473 2521
2474 /* If we don't want any read-ahead, don't bother */ 2522 /* If we don't want any read-ahead, don't bother */
2475 if (vma->vm_flags & VM_RAND_READ) 2523 if (vmf->vma->vm_flags & VM_RAND_READ)
2476 return; 2524 return fpin;
2477 if (ra->mmap_miss > 0) 2525 if (ra->mmap_miss > 0)
2478 ra->mmap_miss--; 2526 ra->mmap_miss--;
2479 if (PageReadahead(page)) 2527 if (PageReadahead(page)) {
2528 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2480 page_cache_async_readahead(mapping, ra, file, 2529 page_cache_async_readahead(mapping, ra, file,
2481 page, offset, ra->ra_pages); 2530 page, offset, ra->ra_pages);
2531 }
2532 return fpin;
2482} 2533}
2483 2534
2484/** 2535/**
@@ -2510,6 +2561,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
2510{ 2561{
2511 int error; 2562 int error;
2512 struct file *file = vmf->vma->vm_file; 2563 struct file *file = vmf->vma->vm_file;
2564 struct file *fpin = NULL;
2513 struct address_space *mapping = file->f_mapping; 2565 struct address_space *mapping = file->f_mapping;
2514 struct file_ra_state *ra = &file->f_ra; 2566 struct file_ra_state *ra = &file->f_ra;
2515 struct inode *inode = mapping->host; 2567 struct inode *inode = mapping->host;
@@ -2531,23 +2583,26 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
2531 * We found the page, so try async readahead before 2583 * We found the page, so try async readahead before
2532 * waiting for the lock. 2584 * waiting for the lock.
2533 */ 2585 */
2534 do_async_mmap_readahead(vmf->vma, ra, file, page, offset); 2586 fpin = do_async_mmap_readahead(vmf, page);
2535 } else if (!page) { 2587 } else if (!page) {
2536 /* No page in the page cache at all */ 2588 /* No page in the page cache at all */
2537 do_sync_mmap_readahead(vmf->vma, ra, file, offset);
2538 count_vm_event(PGMAJFAULT); 2589 count_vm_event(PGMAJFAULT);
2539 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); 2590 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
2540 ret = VM_FAULT_MAJOR; 2591 ret = VM_FAULT_MAJOR;
2592 fpin = do_sync_mmap_readahead(vmf);
2541retry_find: 2593retry_find:
2542 page = find_get_page(mapping, offset); 2594 page = pagecache_get_page(mapping, offset,
2543 if (!page) 2595 FGP_CREAT|FGP_FOR_MMAP,
2544 goto no_cached_page; 2596 vmf->gfp_mask);
2597 if (!page) {
2598 if (fpin)
2599 goto out_retry;
2600 return vmf_error(-ENOMEM);
2601 }
2545 } 2602 }
2546 2603
2547 if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) { 2604 if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
2548 put_page(page); 2605 goto out_retry;
2549 return ret | VM_FAULT_RETRY;
2550 }
2551 2606
2552 /* Did it get truncated? */ 2607 /* Did it get truncated? */
2553 if (unlikely(page->mapping != mapping)) { 2608 if (unlikely(page->mapping != mapping)) {
@@ -2565,6 +2620,16 @@ retry_find:
2565 goto page_not_uptodate; 2620 goto page_not_uptodate;
2566 2621
2567 /* 2622 /*
2623 * We've made it this far and we had to drop our mmap_sem, now is the
2624 * time to return to the upper layer and have it re-find the vma and
2625 * redo the fault.
2626 */
2627 if (fpin) {
2628 unlock_page(page);
2629 goto out_retry;
2630 }
2631
2632 /*
2568 * Found the page and have a reference on it. 2633 * Found the page and have a reference on it.
2569 * We must recheck i_size under page lock. 2634 * We must recheck i_size under page lock.
2570 */ 2635 */
@@ -2578,28 +2643,6 @@ retry_find:
2578 vmf->page = page; 2643 vmf->page = page;
2579 return ret | VM_FAULT_LOCKED; 2644 return ret | VM_FAULT_LOCKED;
2580 2645
2581no_cached_page:
2582 /*
2583 * We're only likely to ever get here if MADV_RANDOM is in
2584 * effect.
2585 */
2586 error = page_cache_read(file, offset, vmf->gfp_mask);
2587
2588 /*
2589 * The page we want has now been added to the page cache.
2590 * In the unlikely event that someone removed it in the
2591 * meantime, we'll just come back here and read it again.
2592 */
2593 if (error >= 0)
2594 goto retry_find;
2595
2596 /*
2597 * An error return from page_cache_read can result if the
2598 * system is low on memory, or a problem occurs while trying
2599 * to schedule I/O.
2600 */
2601 return vmf_error(error);
2602
2603page_not_uptodate: 2646page_not_uptodate:
2604 /* 2647 /*
2605 * Umm, take care of errors if the page isn't up-to-date. 2648 * Umm, take care of errors if the page isn't up-to-date.
@@ -2608,12 +2651,15 @@ page_not_uptodate:
2608 * and we need to check for errors. 2651 * and we need to check for errors.
2609 */ 2652 */
2610 ClearPageError(page); 2653 ClearPageError(page);
2654 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2611 error = mapping->a_ops->readpage(file, page); 2655 error = mapping->a_ops->readpage(file, page);
2612 if (!error) { 2656 if (!error) {
2613 wait_on_page_locked(page); 2657 wait_on_page_locked(page);
2614 if (!PageUptodate(page)) 2658 if (!PageUptodate(page))
2615 error = -EIO; 2659 error = -EIO;
2616 } 2660 }
2661 if (fpin)
2662 goto out_retry;
2617 put_page(page); 2663 put_page(page);
2618 2664
2619 if (!error || error == AOP_TRUNCATED_PAGE) 2665 if (!error || error == AOP_TRUNCATED_PAGE)
@@ -2622,6 +2668,18 @@ page_not_uptodate:
2622 /* Things didn't work out. Return zero to tell the mm layer so. */ 2668 /* Things didn't work out. Return zero to tell the mm layer so. */
2623 shrink_readahead_size_eio(file, ra); 2669 shrink_readahead_size_eio(file, ra);
2624 return VM_FAULT_SIGBUS; 2670 return VM_FAULT_SIGBUS;
2671
2672out_retry:
2673 /*
2674 * We dropped the mmap_sem, we need to return to the fault handler to
2675 * re-find the vma and come back and find our hopefully still populated
2676 * page.
2677 */
2678 if (page)
2679 put_page(page);
2680 if (fpin)
2681 fput(fpin);
2682 return ret | VM_FAULT_RETRY;
2625} 2683}
2626EXPORT_SYMBOL(filemap_fault); 2684EXPORT_SYMBOL(filemap_fault);
2627 2685