diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 220 |
1 files changed, 139 insertions, 81 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index a3b4021c448f..d78f577baef2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1587,6 +1587,9 @@ EXPORT_SYMBOL(find_lock_entry); | |||
1587 | * @gfp_mask and added to the page cache and the VM's LRU | 1587 | * @gfp_mask and added to the page cache and the VM's LRU |
1588 | * list. The page is returned locked and with an increased | 1588 | * list. The page is returned locked and with an increased |
1589 | * refcount. | 1589 | * refcount. |
1590 | * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do | ||
1591 | * its own locking dance if the page is already in cache, or unlock the page | ||
1592 | * before returning if we had to add the page to pagecache. | ||
1590 | * | 1593 | * |
1591 | * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even | 1594 | * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even |
1592 | * if the GFP flags specified for FGP_CREAT are atomic. | 1595 | * if the GFP flags specified for FGP_CREAT are atomic. |
@@ -1641,7 +1644,7 @@ no_page: | |||
1641 | if (!page) | 1644 | if (!page) |
1642 | return NULL; | 1645 | return NULL; |
1643 | 1646 | ||
1644 | if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) | 1647 | if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) |
1645 | fgp_flags |= FGP_LOCK; | 1648 | fgp_flags |= FGP_LOCK; |
1646 | 1649 | ||
1647 | /* Init accessed so avoid atomic mark_page_accessed later */ | 1650 | /* Init accessed so avoid atomic mark_page_accessed later */ |
@@ -1655,6 +1658,13 @@ no_page: | |||
1655 | if (err == -EEXIST) | 1658 | if (err == -EEXIST) |
1656 | goto repeat; | 1659 | goto repeat; |
1657 | } | 1660 | } |
1661 | |||
1662 | /* | ||
1663 | * add_to_page_cache_lru locks the page, and for mmap we expect | ||
1664 | * an unlocked page. | ||
1665 | */ | ||
1666 | if (page && (fgp_flags & FGP_FOR_MMAP)) | ||
1667 | unlock_page(page); | ||
1658 | } | 1668 | } |
1659 | 1669 | ||
1660 | return page; | 1670 | return page; |
@@ -2379,64 +2389,98 @@ out: | |||
2379 | EXPORT_SYMBOL(generic_file_read_iter); | 2389 | EXPORT_SYMBOL(generic_file_read_iter); |
2380 | 2390 | ||
2381 | #ifdef CONFIG_MMU | 2391 | #ifdef CONFIG_MMU |
2382 | /** | 2392 | #define MMAP_LOTSAMISS (100) |
2383 | * page_cache_read - adds requested page to the page cache if not already there | 2393 | static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, |
2384 | * @file: file to read | 2394 | struct file *fpin) |
2385 | * @offset: page index | ||
2386 | * @gfp_mask: memory allocation flags | ||
2387 | * | ||
2388 | * This adds the requested page to the page cache if it isn't already there, | ||
2389 | * and schedules an I/O to read in its contents from disk. | ||
2390 | * | ||
2391 | * Return: %0 on success, negative error code otherwise. | ||
2392 | */ | ||
2393 | static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) | ||
2394 | { | 2395 | { |
2395 | struct address_space *mapping = file->f_mapping; | 2396 | int flags = vmf->flags; |
2396 | struct page *page; | ||
2397 | int ret; | ||
2398 | 2397 | ||
2399 | do { | 2398 | if (fpin) |
2400 | page = __page_cache_alloc(gfp_mask); | 2399 | return fpin; |
2401 | if (!page) | ||
2402 | return -ENOMEM; | ||
2403 | 2400 | ||
2404 | ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask); | 2401 | /* |
2405 | if (ret == 0) | 2402 | * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or |
2406 | ret = mapping->a_ops->readpage(file, page); | 2403 | * anything, so we only pin the file and drop the mmap_sem if only |
2407 | else if (ret == -EEXIST) | 2404 | * FAULT_FLAG_ALLOW_RETRY is set. |
2408 | ret = 0; /* losing race to add is OK */ | 2405 | */ |
2406 | if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == | ||
2407 | FAULT_FLAG_ALLOW_RETRY) { | ||
2408 | fpin = get_file(vmf->vma->vm_file); | ||
2409 | up_read(&vmf->vma->vm_mm->mmap_sem); | ||
2410 | } | ||
2411 | return fpin; | ||
2412 | } | ||
2409 | 2413 | ||
2410 | put_page(page); | 2414 | /* |
2415 | * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem | ||
2416 | * @vmf - the vm_fault for this fault. | ||
2417 | * @page - the page to lock. | ||
2418 | * @fpin - the pointer to the file we may pin (or is already pinned). | ||
2419 | * | ||
2420 | * This works similar to lock_page_or_retry in that it can drop the mmap_sem. | ||
2421 | * It differs in that it actually returns the page locked if it returns 1 and 0 | ||
2422 | * if it couldn't lock the page. If we did have to drop the mmap_sem then fpin | ||
2423 | * will point to the pinned file and needs to be fput()'ed at a later point. | ||
2424 | */ | ||
2425 | static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, | ||
2426 | struct file **fpin) | ||
2427 | { | ||
2428 | if (trylock_page(page)) | ||
2429 | return 1; | ||
2411 | 2430 | ||
2412 | } while (ret == AOP_TRUNCATED_PAGE); | 2431 | /* |
2432 | * NOTE! This will make us return with VM_FAULT_RETRY, but with | ||
2433 | * the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT | ||
2434 | * is supposed to work. We have way too many special cases.. | ||
2435 | */ | ||
2436 | if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) | ||
2437 | return 0; | ||
2413 | 2438 | ||
2414 | return ret; | 2439 | *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); |
2440 | if (vmf->flags & FAULT_FLAG_KILLABLE) { | ||
2441 | if (__lock_page_killable(page)) { | ||
2442 | /* | ||
2443 | * We didn't have the right flags to drop the mmap_sem, | ||
2444 | * but all fault_handlers only check for fatal signals | ||
2445 | * if we return VM_FAULT_RETRY, so we need to drop the | ||
2446 | * mmap_sem here and return 0 if we don't have a fpin. | ||
2447 | */ | ||
2448 | if (*fpin == NULL) | ||
2449 | up_read(&vmf->vma->vm_mm->mmap_sem); | ||
2450 | return 0; | ||
2451 | } | ||
2452 | } else | ||
2453 | __lock_page(page); | ||
2454 | return 1; | ||
2415 | } | 2455 | } |
2416 | 2456 | ||
2417 | #define MMAP_LOTSAMISS (100) | ||
2418 | 2457 | ||
2419 | /* | 2458 | /* |
2420 | * Synchronous readahead happens when we don't even find | 2459 | * Synchronous readahead happens when we don't even find a page in the page |
2421 | * a page in the page cache at all. | 2460 | * cache at all. We don't want to perform IO under the mmap sem, so if we have |
2461 | * to drop the mmap sem we return the file that was pinned in order for us to do | ||
2462 | * that. If we didn't pin a file then we return NULL. The file that is | ||
2463 | * returned needs to be fput()'ed when we're done with it. | ||
2422 | */ | 2464 | */ |
2423 | static void do_sync_mmap_readahead(struct vm_area_struct *vma, | 2465 | static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) |
2424 | struct file_ra_state *ra, | ||
2425 | struct file *file, | ||
2426 | pgoff_t offset) | ||
2427 | { | 2466 | { |
2467 | struct file *file = vmf->vma->vm_file; | ||
2468 | struct file_ra_state *ra = &file->f_ra; | ||
2428 | struct address_space *mapping = file->f_mapping; | 2469 | struct address_space *mapping = file->f_mapping; |
2470 | struct file *fpin = NULL; | ||
2471 | pgoff_t offset = vmf->pgoff; | ||
2429 | 2472 | ||
2430 | /* If we don't want any read-ahead, don't bother */ | 2473 | /* If we don't want any read-ahead, don't bother */ |
2431 | if (vma->vm_flags & VM_RAND_READ) | 2474 | if (vmf->vma->vm_flags & VM_RAND_READ) |
2432 | return; | 2475 | return fpin; |
2433 | if (!ra->ra_pages) | 2476 | if (!ra->ra_pages) |
2434 | return; | 2477 | return fpin; |
2435 | 2478 | ||
2436 | if (vma->vm_flags & VM_SEQ_READ) { | 2479 | if (vmf->vma->vm_flags & VM_SEQ_READ) { |
2480 | fpin = maybe_unlock_mmap_for_io(vmf, fpin); | ||
2437 | page_cache_sync_readahead(mapping, ra, file, offset, | 2481 | page_cache_sync_readahead(mapping, ra, file, offset, |
2438 | ra->ra_pages); | 2482 | ra->ra_pages); |
2439 | return; | 2483 | return fpin; |
2440 | } | 2484 | } |
2441 | 2485 | ||
2442 | /* Avoid banging the cache line if not needed */ | 2486 | /* Avoid banging the cache line if not needed */ |
@@ -2448,37 +2492,44 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, | |||
2448 | * stop bothering with read-ahead. It will only hurt. | 2492 | * stop bothering with read-ahead. It will only hurt. |
2449 | */ | 2493 | */ |
2450 | if (ra->mmap_miss > MMAP_LOTSAMISS) | 2494 | if (ra->mmap_miss > MMAP_LOTSAMISS) |
2451 | return; | 2495 | return fpin; |
2452 | 2496 | ||
2453 | /* | 2497 | /* |
2454 | * mmap read-around | 2498 | * mmap read-around |
2455 | */ | 2499 | */ |
2500 | fpin = maybe_unlock_mmap_for_io(vmf, fpin); | ||
2456 | ra->start = max_t(long, 0, offset - ra->ra_pages / 2); | 2501 | ra->start = max_t(long, 0, offset - ra->ra_pages / 2); |
2457 | ra->size = ra->ra_pages; | 2502 | ra->size = ra->ra_pages; |
2458 | ra->async_size = ra->ra_pages / 4; | 2503 | ra->async_size = ra->ra_pages / 4; |
2459 | ra_submit(ra, mapping, file); | 2504 | ra_submit(ra, mapping, file); |
2505 | return fpin; | ||
2460 | } | 2506 | } |
2461 | 2507 | ||
2462 | /* | 2508 | /* |
2463 | * Asynchronous readahead happens when we find the page and PG_readahead, | 2509 | * Asynchronous readahead happens when we find the page and PG_readahead, |
2464 | * so we want to possibly extend the readahead further.. | 2510 | * so we want to possibly extend the readahead further. We return the file that |
2511 | * was pinned if we have to drop the mmap_sem in order to do IO. | ||
2465 | */ | 2512 | */ |
2466 | static void do_async_mmap_readahead(struct vm_area_struct *vma, | 2513 | static struct file *do_async_mmap_readahead(struct vm_fault *vmf, |
2467 | struct file_ra_state *ra, | 2514 | struct page *page) |
2468 | struct file *file, | ||
2469 | struct page *page, | ||
2470 | pgoff_t offset) | ||
2471 | { | 2515 | { |
2516 | struct file *file = vmf->vma->vm_file; | ||
2517 | struct file_ra_state *ra = &file->f_ra; | ||
2472 | struct address_space *mapping = file->f_mapping; | 2518 | struct address_space *mapping = file->f_mapping; |
2519 | struct file *fpin = NULL; | ||
2520 | pgoff_t offset = vmf->pgoff; | ||
2473 | 2521 | ||
2474 | /* If we don't want any read-ahead, don't bother */ | 2522 | /* If we don't want any read-ahead, don't bother */ |
2475 | if (vma->vm_flags & VM_RAND_READ) | 2523 | if (vmf->vma->vm_flags & VM_RAND_READ) |
2476 | return; | 2524 | return fpin; |
2477 | if (ra->mmap_miss > 0) | 2525 | if (ra->mmap_miss > 0) |
2478 | ra->mmap_miss--; | 2526 | ra->mmap_miss--; |
2479 | if (PageReadahead(page)) | 2527 | if (PageReadahead(page)) { |
2528 | fpin = maybe_unlock_mmap_for_io(vmf, fpin); | ||
2480 | page_cache_async_readahead(mapping, ra, file, | 2529 | page_cache_async_readahead(mapping, ra, file, |
2481 | page, offset, ra->ra_pages); | 2530 | page, offset, ra->ra_pages); |
2531 | } | ||
2532 | return fpin; | ||
2482 | } | 2533 | } |
2483 | 2534 | ||
2484 | /** | 2535 | /** |
@@ -2510,6 +2561,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) | |||
2510 | { | 2561 | { |
2511 | int error; | 2562 | int error; |
2512 | struct file *file = vmf->vma->vm_file; | 2563 | struct file *file = vmf->vma->vm_file; |
2564 | struct file *fpin = NULL; | ||
2513 | struct address_space *mapping = file->f_mapping; | 2565 | struct address_space *mapping = file->f_mapping; |
2514 | struct file_ra_state *ra = &file->f_ra; | 2566 | struct file_ra_state *ra = &file->f_ra; |
2515 | struct inode *inode = mapping->host; | 2567 | struct inode *inode = mapping->host; |
@@ -2531,23 +2583,26 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) | |||
2531 | * We found the page, so try async readahead before | 2583 | * We found the page, so try async readahead before |
2532 | * waiting for the lock. | 2584 | * waiting for the lock. |
2533 | */ | 2585 | */ |
2534 | do_async_mmap_readahead(vmf->vma, ra, file, page, offset); | 2586 | fpin = do_async_mmap_readahead(vmf, page); |
2535 | } else if (!page) { | 2587 | } else if (!page) { |
2536 | /* No page in the page cache at all */ | 2588 | /* No page in the page cache at all */ |
2537 | do_sync_mmap_readahead(vmf->vma, ra, file, offset); | ||
2538 | count_vm_event(PGMAJFAULT); | 2589 | count_vm_event(PGMAJFAULT); |
2539 | count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); | 2590 | count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); |
2540 | ret = VM_FAULT_MAJOR; | 2591 | ret = VM_FAULT_MAJOR; |
2592 | fpin = do_sync_mmap_readahead(vmf); | ||
2541 | retry_find: | 2593 | retry_find: |
2542 | page = find_get_page(mapping, offset); | 2594 | page = pagecache_get_page(mapping, offset, |
2543 | if (!page) | 2595 | FGP_CREAT|FGP_FOR_MMAP, |
2544 | goto no_cached_page; | 2596 | vmf->gfp_mask); |
2597 | if (!page) { | ||
2598 | if (fpin) | ||
2599 | goto out_retry; | ||
2600 | return vmf_error(-ENOMEM); | ||
2601 | } | ||
2545 | } | 2602 | } |
2546 | 2603 | ||
2547 | if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) { | 2604 | if (!lock_page_maybe_drop_mmap(vmf, page, &fpin)) |
2548 | put_page(page); | 2605 | goto out_retry; |
2549 | return ret | VM_FAULT_RETRY; | ||
2550 | } | ||
2551 | 2606 | ||
2552 | /* Did it get truncated? */ | 2607 | /* Did it get truncated? */ |
2553 | if (unlikely(page->mapping != mapping)) { | 2608 | if (unlikely(page->mapping != mapping)) { |
@@ -2565,6 +2620,16 @@ retry_find: | |||
2565 | goto page_not_uptodate; | 2620 | goto page_not_uptodate; |
2566 | 2621 | ||
2567 | /* | 2622 | /* |
2623 | * We've made it this far and we had to drop our mmap_sem, now is the | ||
2624 | * time to return to the upper layer and have it re-find the vma and | ||
2625 | * redo the fault. | ||
2626 | */ | ||
2627 | if (fpin) { | ||
2628 | unlock_page(page); | ||
2629 | goto out_retry; | ||
2630 | } | ||
2631 | |||
2632 | /* | ||
2568 | * Found the page and have a reference on it. | 2633 | * Found the page and have a reference on it. |
2569 | * We must recheck i_size under page lock. | 2634 | * We must recheck i_size under page lock. |
2570 | */ | 2635 | */ |
@@ -2578,28 +2643,6 @@ retry_find: | |||
2578 | vmf->page = page; | 2643 | vmf->page = page; |
2579 | return ret | VM_FAULT_LOCKED; | 2644 | return ret | VM_FAULT_LOCKED; |
2580 | 2645 | ||
2581 | no_cached_page: | ||
2582 | /* | ||
2583 | * We're only likely to ever get here if MADV_RANDOM is in | ||
2584 | * effect. | ||
2585 | */ | ||
2586 | error = page_cache_read(file, offset, vmf->gfp_mask); | ||
2587 | |||
2588 | /* | ||
2589 | * The page we want has now been added to the page cache. | ||
2590 | * In the unlikely event that someone removed it in the | ||
2591 | * meantime, we'll just come back here and read it again. | ||
2592 | */ | ||
2593 | if (error >= 0) | ||
2594 | goto retry_find; | ||
2595 | |||
2596 | /* | ||
2597 | * An error return from page_cache_read can result if the | ||
2598 | * system is low on memory, or a problem occurs while trying | ||
2599 | * to schedule I/O. | ||
2600 | */ | ||
2601 | return vmf_error(error); | ||
2602 | |||
2603 | page_not_uptodate: | 2646 | page_not_uptodate: |
2604 | /* | 2647 | /* |
2605 | * Umm, take care of errors if the page isn't up-to-date. | 2648 | * Umm, take care of errors if the page isn't up-to-date. |
@@ -2608,12 +2651,15 @@ page_not_uptodate: | |||
2608 | * and we need to check for errors. | 2651 | * and we need to check for errors. |
2609 | */ | 2652 | */ |
2610 | ClearPageError(page); | 2653 | ClearPageError(page); |
2654 | fpin = maybe_unlock_mmap_for_io(vmf, fpin); | ||
2611 | error = mapping->a_ops->readpage(file, page); | 2655 | error = mapping->a_ops->readpage(file, page); |
2612 | if (!error) { | 2656 | if (!error) { |
2613 | wait_on_page_locked(page); | 2657 | wait_on_page_locked(page); |
2614 | if (!PageUptodate(page)) | 2658 | if (!PageUptodate(page)) |
2615 | error = -EIO; | 2659 | error = -EIO; |
2616 | } | 2660 | } |
2661 | if (fpin) | ||
2662 | goto out_retry; | ||
2617 | put_page(page); | 2663 | put_page(page); |
2618 | 2664 | ||
2619 | if (!error || error == AOP_TRUNCATED_PAGE) | 2665 | if (!error || error == AOP_TRUNCATED_PAGE) |
@@ -2622,6 +2668,18 @@ page_not_uptodate: | |||
2622 | /* Things didn't work out. Return zero to tell the mm layer so. */ | 2668 | /* Things didn't work out. Return zero to tell the mm layer so. */ |
2623 | shrink_readahead_size_eio(file, ra); | 2669 | shrink_readahead_size_eio(file, ra); |
2624 | return VM_FAULT_SIGBUS; | 2670 | return VM_FAULT_SIGBUS; |
2671 | |||
2672 | out_retry: | ||
2673 | /* | ||
2674 | * We dropped the mmap_sem, we need to return to the fault handler to | ||
2675 | * re-find the vma and come back and find our hopefully still populated | ||
2676 | * page. | ||
2677 | */ | ||
2678 | if (page) | ||
2679 | put_page(page); | ||
2680 | if (fpin) | ||
2681 | fput(fpin); | ||
2682 | return ret | VM_FAULT_RETRY; | ||
2625 | } | 2683 | } |
2626 | EXPORT_SYMBOL(filemap_fault); | 2684 | EXPORT_SYMBOL(filemap_fault); |
2627 | 2685 | ||