aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c175
1 files changed, 103 insertions, 72 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a49702445ce0..8c88e186a773 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -130,17 +130,8 @@ static int page_cache_tree_insert(struct address_space *mapping,
130 return -EEXIST; 130 return -EEXIST;
131 131
132 mapping->nrexceptional--; 132 mapping->nrexceptional--;
133 if (!dax_mapping(mapping)) { 133 if (shadowp)
134 if (shadowp) 134 *shadowp = p;
135 *shadowp = p;
136 } else {
137 /* DAX can replace empty locked entry with a hole */
138 WARN_ON_ONCE(p !=
139 dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
140 /* Wakeup waiters for exceptional entry lock */
141 dax_wake_mapping_entry_waiter(mapping, page->index, p,
142 true);
143 }
144 } 135 }
145 __radix_tree_replace(&mapping->page_tree, node, slot, page, 136 __radix_tree_replace(&mapping->page_tree, node, slot, page,
146 workingset_update_node, mapping); 137 workingset_update_node, mapping);
@@ -402,8 +393,7 @@ bool filemap_range_has_page(struct address_space *mapping,
402{ 393{
403 pgoff_t index = start_byte >> PAGE_SHIFT; 394 pgoff_t index = start_byte >> PAGE_SHIFT;
404 pgoff_t end = end_byte >> PAGE_SHIFT; 395 pgoff_t end = end_byte >> PAGE_SHIFT;
405 struct pagevec pvec; 396 struct page *page;
406 bool ret;
407 397
408 if (end_byte < start_byte) 398 if (end_byte < start_byte)
409 return false; 399 return false;
@@ -411,12 +401,10 @@ bool filemap_range_has_page(struct address_space *mapping,
411 if (mapping->nrpages == 0) 401 if (mapping->nrpages == 0)
412 return false; 402 return false;
413 403
414 pagevec_init(&pvec, 0); 404 if (!find_get_pages_range(mapping, &index, end, 1, &page))
415 if (!pagevec_lookup(&pvec, mapping, index, 1))
416 return false; 405 return false;
417 ret = (pvec.pages[0]->index <= end); 406 put_page(page);
418 pagevec_release(&pvec); 407 return true;
419 return ret;
420} 408}
421EXPORT_SYMBOL(filemap_range_has_page); 409EXPORT_SYMBOL(filemap_range_has_page);
422 410
@@ -476,6 +464,29 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
476EXPORT_SYMBOL(filemap_fdatawait_range); 464EXPORT_SYMBOL(filemap_fdatawait_range);
477 465
478/** 466/**
467 * file_fdatawait_range - wait for writeback to complete
468 * @file: file pointing to address space structure to wait for
469 * @start_byte: offset in bytes where the range starts
470 * @end_byte: offset in bytes where the range ends (inclusive)
471 *
472 * Walk the list of under-writeback pages of the address space that file
473 * refers to, in the given range and wait for all of them. Check error
474 * status of the address space vs. the file->f_wb_err cursor and return it.
475 *
476 * Since the error status of the file is advanced by this function,
477 * callers are responsible for checking the return value and handling and/or
478 * reporting the error.
479 */
480int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
481{
482 struct address_space *mapping = file->f_mapping;
483
484 __filemap_fdatawait_range(mapping, start_byte, end_byte);
485 return file_check_and_advance_wb_err(file);
486}
487EXPORT_SYMBOL(file_fdatawait_range);
488
489/**
479 * filemap_fdatawait_keep_errors - wait for writeback without clearing errors 490 * filemap_fdatawait_keep_errors - wait for writeback without clearing errors
480 * @mapping: address space structure to wait for 491 * @mapping: address space structure to wait for
481 * 492 *
@@ -489,45 +500,22 @@ EXPORT_SYMBOL(filemap_fdatawait_range);
489 */ 500 */
490int filemap_fdatawait_keep_errors(struct address_space *mapping) 501int filemap_fdatawait_keep_errors(struct address_space *mapping)
491{ 502{
492 loff_t i_size = i_size_read(mapping->host); 503 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
493
494 if (i_size == 0)
495 return 0;
496
497 __filemap_fdatawait_range(mapping, 0, i_size - 1);
498 return filemap_check_and_keep_errors(mapping); 504 return filemap_check_and_keep_errors(mapping);
499} 505}
500EXPORT_SYMBOL(filemap_fdatawait_keep_errors); 506EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
501 507
502/** 508static bool mapping_needs_writeback(struct address_space *mapping)
503 * filemap_fdatawait - wait for all under-writeback pages to complete
504 * @mapping: address space structure to wait for
505 *
506 * Walk the list of under-writeback pages of the given address space
507 * and wait for all of them. Check error status of the address space
508 * and return it.
509 *
510 * Since the error status of the address space is cleared by this function,
511 * callers are responsible for checking the return value and handling and/or
512 * reporting the error.
513 */
514int filemap_fdatawait(struct address_space *mapping)
515{ 509{
516 loff_t i_size = i_size_read(mapping->host); 510 return (!dax_mapping(mapping) && mapping->nrpages) ||
517 511 (dax_mapping(mapping) && mapping->nrexceptional);
518 if (i_size == 0)
519 return 0;
520
521 return filemap_fdatawait_range(mapping, 0, i_size - 1);
522} 512}
523EXPORT_SYMBOL(filemap_fdatawait);
524 513
525int filemap_write_and_wait(struct address_space *mapping) 514int filemap_write_and_wait(struct address_space *mapping)
526{ 515{
527 int err = 0; 516 int err = 0;
528 517
529 if ((!dax_mapping(mapping) && mapping->nrpages) || 518 if (mapping_needs_writeback(mapping)) {
530 (dax_mapping(mapping) && mapping->nrexceptional)) {
531 err = filemap_fdatawrite(mapping); 519 err = filemap_fdatawrite(mapping);
532 /* 520 /*
533 * Even if the above returned error, the pages may be 521 * Even if the above returned error, the pages may be
@@ -566,8 +554,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
566{ 554{
567 int err = 0; 555 int err = 0;
568 556
569 if ((!dax_mapping(mapping) && mapping->nrpages) || 557 if (mapping_needs_writeback(mapping)) {
570 (dax_mapping(mapping) && mapping->nrexceptional)) {
571 err = __filemap_fdatawrite_range(mapping, lstart, lend, 558 err = __filemap_fdatawrite_range(mapping, lstart, lend,
572 WB_SYNC_ALL); 559 WB_SYNC_ALL);
573 /* See comment of filemap_write_and_wait() */ 560 /* See comment of filemap_write_and_wait() */
@@ -589,7 +576,7 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
589 576
590void __filemap_set_wb_err(struct address_space *mapping, int err) 577void __filemap_set_wb_err(struct address_space *mapping, int err)
591{ 578{
592 errseq_t eseq = __errseq_set(&mapping->wb_err, err); 579 errseq_t eseq = errseq_set(&mapping->wb_err, err);
593 580
594 trace_filemap_set_wb_err(mapping, eseq); 581 trace_filemap_set_wb_err(mapping, eseq);
595} 582}
@@ -656,8 +643,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
656 int err = 0, err2; 643 int err = 0, err2;
657 struct address_space *mapping = file->f_mapping; 644 struct address_space *mapping = file->f_mapping;
658 645
659 if ((!dax_mapping(mapping) && mapping->nrpages) || 646 if (mapping_needs_writeback(mapping)) {
660 (dax_mapping(mapping) && mapping->nrexceptional)) {
661 err = __filemap_fdatawrite_range(mapping, lstart, lend, 647 err = __filemap_fdatawrite_range(mapping, lstart, lend,
662 WB_SYNC_ALL); 648 WB_SYNC_ALL);
663 /* See comment of filemap_write_and_wait() */ 649 /* See comment of filemap_write_and_wait() */
@@ -885,6 +871,7 @@ void __init pagecache_init(void)
885 page_writeback_init(); 871 page_writeback_init();
886} 872}
887 873
874/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
888struct wait_page_key { 875struct wait_page_key {
889 struct page *page; 876 struct page *page;
890 int bit_nr; 877 int bit_nr;
@@ -909,8 +896,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
909 896
910 if (wait_page->bit_nr != key->bit_nr) 897 if (wait_page->bit_nr != key->bit_nr)
911 return 0; 898 return 0;
899
900 /* Stop walking if it's locked */
912 if (test_bit(key->bit_nr, &key->page->flags)) 901 if (test_bit(key->bit_nr, &key->page->flags))
913 return 0; 902 return -1;
914 903
915 return autoremove_wake_function(wait, mode, sync, key); 904 return autoremove_wake_function(wait, mode, sync, key);
916} 905}
@@ -920,13 +909,33 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
920 wait_queue_head_t *q = page_waitqueue(page); 909 wait_queue_head_t *q = page_waitqueue(page);
921 struct wait_page_key key; 910 struct wait_page_key key;
922 unsigned long flags; 911 unsigned long flags;
912 wait_queue_entry_t bookmark;
923 913
924 key.page = page; 914 key.page = page;
925 key.bit_nr = bit_nr; 915 key.bit_nr = bit_nr;
926 key.page_match = 0; 916 key.page_match = 0;
927 917
918 bookmark.flags = 0;
919 bookmark.private = NULL;
920 bookmark.func = NULL;
921 INIT_LIST_HEAD(&bookmark.entry);
922
928 spin_lock_irqsave(&q->lock, flags); 923 spin_lock_irqsave(&q->lock, flags);
929 __wake_up_locked_key(q, TASK_NORMAL, &key); 924 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
925
926 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
927 /*
928 * Take a breather from holding the lock,
929 * allow pages that finish wake up asynchronously
930 * to acquire the lock and remove themselves
931 * from wait queue
932 */
933 spin_unlock_irqrestore(&q->lock, flags);
934 cpu_relax();
935 spin_lock_irqsave(&q->lock, flags);
936 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
937 }
938
930 /* 939 /*
931 * It is possible for other pages to have collided on the waitqueue 940 * It is possible for other pages to have collided on the waitqueue
932 * hash, so in that case check for a page match. That prevents a long- 941 * hash, so in that case check for a page match. That prevents a long-
@@ -964,6 +973,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
964 int ret = 0; 973 int ret = 0;
965 974
966 init_wait(wait); 975 init_wait(wait);
976 wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
967 wait->func = wake_page_function; 977 wait->func = wake_page_function;
968 wait_page.page = page; 978 wait_page.page = page;
969 wait_page.bit_nr = bit_nr; 979 wait_page.bit_nr = bit_nr;
@@ -972,10 +982,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
972 spin_lock_irq(&q->lock); 982 spin_lock_irq(&q->lock);
973 983
974 if (likely(list_empty(&wait->entry))) { 984 if (likely(list_empty(&wait->entry))) {
975 if (lock) 985 __add_wait_queue_entry_tail(q, wait);
976 __add_wait_queue_entry_tail_exclusive(q, wait);
977 else
978 __add_wait_queue(q, wait);
979 SetPageWaiters(page); 986 SetPageWaiters(page);
980 } 987 }
981 988
@@ -985,10 +992,6 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
985 992
986 if (likely(test_bit(bit_nr, &page->flags))) { 993 if (likely(test_bit(bit_nr, &page->flags))) {
987 io_schedule(); 994 io_schedule();
988 if (unlikely(signal_pending_state(state, current))) {
989 ret = -EINTR;
990 break;
991 }
992 } 995 }
993 996
994 if (lock) { 997 if (lock) {
@@ -998,6 +1001,11 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
998 if (!test_bit(bit_nr, &page->flags)) 1001 if (!test_bit(bit_nr, &page->flags))
999 break; 1002 break;
1000 } 1003 }
1004
1005 if (unlikely(signal_pending_state(state, current))) {
1006 ret = -EINTR;
1007 break;
1008 }
1001 } 1009 }
1002 1010
1003 finish_wait(q, wait); 1011 finish_wait(q, wait);
@@ -1039,7 +1047,7 @@ void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
1039 unsigned long flags; 1047 unsigned long flags;
1040 1048
1041 spin_lock_irqsave(&q->lock, flags); 1049 spin_lock_irqsave(&q->lock, flags);
1042 __add_wait_queue(q, waiter); 1050 __add_wait_queue_entry_tail(q, waiter);
1043 SetPageWaiters(page); 1051 SetPageWaiters(page);
1044 spin_unlock_irqrestore(&q->lock, flags); 1052 spin_unlock_irqrestore(&q->lock, flags);
1045} 1053}
@@ -1564,23 +1572,29 @@ export:
1564} 1572}
1565 1573
1566/** 1574/**
1567 * find_get_pages - gang pagecache lookup 1575 * find_get_pages_range - gang pagecache lookup
1568 * @mapping: The address_space to search 1576 * @mapping: The address_space to search
1569 * @start: The starting page index 1577 * @start: The starting page index
1578 * @end: The final page index (inclusive)
1570 * @nr_pages: The maximum number of pages 1579 * @nr_pages: The maximum number of pages
1571 * @pages: Where the resulting pages are placed 1580 * @pages: Where the resulting pages are placed
1572 * 1581 *
1573 * find_get_pages() will search for and return a group of up to 1582 * find_get_pages_range() will search for and return a group of up to @nr_pages
1574 * @nr_pages pages in the mapping. The pages are placed at @pages. 1583 * pages in the mapping starting at index @start and up to index @end
1575 * find_get_pages() takes a reference against the returned pages. 1584 * (inclusive). The pages are placed at @pages. find_get_pages_range() takes
1585 * a reference against the returned pages.
1576 * 1586 *
1577 * The search returns a group of mapping-contiguous pages with ascending 1587 * The search returns a group of mapping-contiguous pages with ascending
1578 * indexes. There may be holes in the indices due to not-present pages. 1588 * indexes. There may be holes in the indices due to not-present pages.
1589 * We also update @start to index the next page for the traversal.
1579 * 1590 *
1580 * find_get_pages() returns the number of pages which were found. 1591 * find_get_pages_range() returns the number of pages which were found. If this
1592 * number is smaller than @nr_pages, the end of specified range has been
1593 * reached.
1581 */ 1594 */
1582unsigned find_get_pages(struct address_space *mapping, pgoff_t start, 1595unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
1583 unsigned int nr_pages, struct page **pages) 1596 pgoff_t end, unsigned int nr_pages,
1597 struct page **pages)
1584{ 1598{
1585 struct radix_tree_iter iter; 1599 struct radix_tree_iter iter;
1586 void **slot; 1600 void **slot;
@@ -1590,8 +1604,11 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
1590 return 0; 1604 return 0;
1591 1605
1592 rcu_read_lock(); 1606 rcu_read_lock();
1593 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 1607 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) {
1594 struct page *head, *page; 1608 struct page *head, *page;
1609
1610 if (iter.index > end)
1611 break;
1595repeat: 1612repeat:
1596 page = radix_tree_deref_slot(slot); 1613 page = radix_tree_deref_slot(slot);
1597 if (unlikely(!page)) 1614 if (unlikely(!page))
@@ -1627,11 +1644,25 @@ repeat:
1627 } 1644 }
1628 1645
1629 pages[ret] = page; 1646 pages[ret] = page;
1630 if (++ret == nr_pages) 1647 if (++ret == nr_pages) {
1631 break; 1648 *start = pages[ret - 1]->index + 1;
1649 goto out;
1650 }
1632 } 1651 }
1633 1652
1653 /*
1654 * We come here when there is no page beyond @end. We take care to not
1655 * overflow the index @start as it confuses some of the callers. This
1656 * breaks the iteration when there is page at index -1 but that is
1657 * already broken anyway.
1658 */
1659 if (end == (pgoff_t)-1)
1660 *start = (pgoff_t)-1;
1661 else
1662 *start = end + 1;
1663out:
1634 rcu_read_unlock(); 1664 rcu_read_unlock();
1665
1635 return ret; 1666 return ret;
1636} 1667}
1637 1668