diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 175 |
1 files changed, 103 insertions, 72 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index a49702445ce0..8c88e186a773 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -130,17 +130,8 @@ static int page_cache_tree_insert(struct address_space *mapping, | |||
130 | return -EEXIST; | 130 | return -EEXIST; |
131 | 131 | ||
132 | mapping->nrexceptional--; | 132 | mapping->nrexceptional--; |
133 | if (!dax_mapping(mapping)) { | 133 | if (shadowp) |
134 | if (shadowp) | 134 | *shadowp = p; |
135 | *shadowp = p; | ||
136 | } else { | ||
137 | /* DAX can replace empty locked entry with a hole */ | ||
138 | WARN_ON_ONCE(p != | ||
139 | dax_radix_locked_entry(0, RADIX_DAX_EMPTY)); | ||
140 | /* Wakeup waiters for exceptional entry lock */ | ||
141 | dax_wake_mapping_entry_waiter(mapping, page->index, p, | ||
142 | true); | ||
143 | } | ||
144 | } | 135 | } |
145 | __radix_tree_replace(&mapping->page_tree, node, slot, page, | 136 | __radix_tree_replace(&mapping->page_tree, node, slot, page, |
146 | workingset_update_node, mapping); | 137 | workingset_update_node, mapping); |
@@ -402,8 +393,7 @@ bool filemap_range_has_page(struct address_space *mapping, | |||
402 | { | 393 | { |
403 | pgoff_t index = start_byte >> PAGE_SHIFT; | 394 | pgoff_t index = start_byte >> PAGE_SHIFT; |
404 | pgoff_t end = end_byte >> PAGE_SHIFT; | 395 | pgoff_t end = end_byte >> PAGE_SHIFT; |
405 | struct pagevec pvec; | 396 | struct page *page; |
406 | bool ret; | ||
407 | 397 | ||
408 | if (end_byte < start_byte) | 398 | if (end_byte < start_byte) |
409 | return false; | 399 | return false; |
@@ -411,12 +401,10 @@ bool filemap_range_has_page(struct address_space *mapping, | |||
411 | if (mapping->nrpages == 0) | 401 | if (mapping->nrpages == 0) |
412 | return false; | 402 | return false; |
413 | 403 | ||
414 | pagevec_init(&pvec, 0); | 404 | if (!find_get_pages_range(mapping, &index, end, 1, &page)) |
415 | if (!pagevec_lookup(&pvec, mapping, index, 1)) | ||
416 | return false; | 405 | return false; |
417 | ret = (pvec.pages[0]->index <= end); | 406 | put_page(page); |
418 | pagevec_release(&pvec); | 407 | return true; |
419 | return ret; | ||
420 | } | 408 | } |
421 | EXPORT_SYMBOL(filemap_range_has_page); | 409 | EXPORT_SYMBOL(filemap_range_has_page); |
422 | 410 | ||
@@ -476,6 +464,29 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, | |||
476 | EXPORT_SYMBOL(filemap_fdatawait_range); | 464 | EXPORT_SYMBOL(filemap_fdatawait_range); |
477 | 465 | ||
478 | /** | 466 | /** |
467 | * file_fdatawait_range - wait for writeback to complete | ||
468 | * @file: file pointing to address space structure to wait for | ||
469 | * @start_byte: offset in bytes where the range starts | ||
470 | * @end_byte: offset in bytes where the range ends (inclusive) | ||
471 | * | ||
472 | * Walk the list of under-writeback pages of the address space that file | ||
473 | * refers to, in the given range and wait for all of them. Check error | ||
474 | * status of the address space vs. the file->f_wb_err cursor and return it. | ||
475 | * | ||
476 | * Since the error status of the file is advanced by this function, | ||
477 | * callers are responsible for checking the return value and handling and/or | ||
478 | * reporting the error. | ||
479 | */ | ||
480 | int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte) | ||
481 | { | ||
482 | struct address_space *mapping = file->f_mapping; | ||
483 | |||
484 | __filemap_fdatawait_range(mapping, start_byte, end_byte); | ||
485 | return file_check_and_advance_wb_err(file); | ||
486 | } | ||
487 | EXPORT_SYMBOL(file_fdatawait_range); | ||
488 | |||
489 | /** | ||
479 | * filemap_fdatawait_keep_errors - wait for writeback without clearing errors | 490 | * filemap_fdatawait_keep_errors - wait for writeback without clearing errors |
480 | * @mapping: address space structure to wait for | 491 | * @mapping: address space structure to wait for |
481 | * | 492 | * |
@@ -489,45 +500,22 @@ EXPORT_SYMBOL(filemap_fdatawait_range); | |||
489 | */ | 500 | */ |
490 | int filemap_fdatawait_keep_errors(struct address_space *mapping) | 501 | int filemap_fdatawait_keep_errors(struct address_space *mapping) |
491 | { | 502 | { |
492 | loff_t i_size = i_size_read(mapping->host); | 503 | __filemap_fdatawait_range(mapping, 0, LLONG_MAX); |
493 | |||
494 | if (i_size == 0) | ||
495 | return 0; | ||
496 | |||
497 | __filemap_fdatawait_range(mapping, 0, i_size - 1); | ||
498 | return filemap_check_and_keep_errors(mapping); | 504 | return filemap_check_and_keep_errors(mapping); |
499 | } | 505 | } |
500 | EXPORT_SYMBOL(filemap_fdatawait_keep_errors); | 506 | EXPORT_SYMBOL(filemap_fdatawait_keep_errors); |
501 | 507 | ||
502 | /** | 508 | static bool mapping_needs_writeback(struct address_space *mapping) |
503 | * filemap_fdatawait - wait for all under-writeback pages to complete | ||
504 | * @mapping: address space structure to wait for | ||
505 | * | ||
506 | * Walk the list of under-writeback pages of the given address space | ||
507 | * and wait for all of them. Check error status of the address space | ||
508 | * and return it. | ||
509 | * | ||
510 | * Since the error status of the address space is cleared by this function, | ||
511 | * callers are responsible for checking the return value and handling and/or | ||
512 | * reporting the error. | ||
513 | */ | ||
514 | int filemap_fdatawait(struct address_space *mapping) | ||
515 | { | 509 | { |
516 | loff_t i_size = i_size_read(mapping->host); | 510 | return (!dax_mapping(mapping) && mapping->nrpages) || |
517 | 511 | (dax_mapping(mapping) && mapping->nrexceptional); | |
518 | if (i_size == 0) | ||
519 | return 0; | ||
520 | |||
521 | return filemap_fdatawait_range(mapping, 0, i_size - 1); | ||
522 | } | 512 | } |
523 | EXPORT_SYMBOL(filemap_fdatawait); | ||
524 | 513 | ||
525 | int filemap_write_and_wait(struct address_space *mapping) | 514 | int filemap_write_and_wait(struct address_space *mapping) |
526 | { | 515 | { |
527 | int err = 0; | 516 | int err = 0; |
528 | 517 | ||
529 | if ((!dax_mapping(mapping) && mapping->nrpages) || | 518 | if (mapping_needs_writeback(mapping)) { |
530 | (dax_mapping(mapping) && mapping->nrexceptional)) { | ||
531 | err = filemap_fdatawrite(mapping); | 519 | err = filemap_fdatawrite(mapping); |
532 | /* | 520 | /* |
533 | * Even if the above returned error, the pages may be | 521 | * Even if the above returned error, the pages may be |
@@ -566,8 +554,7 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
566 | { | 554 | { |
567 | int err = 0; | 555 | int err = 0; |
568 | 556 | ||
569 | if ((!dax_mapping(mapping) && mapping->nrpages) || | 557 | if (mapping_needs_writeback(mapping)) { |
570 | (dax_mapping(mapping) && mapping->nrexceptional)) { | ||
571 | err = __filemap_fdatawrite_range(mapping, lstart, lend, | 558 | err = __filemap_fdatawrite_range(mapping, lstart, lend, |
572 | WB_SYNC_ALL); | 559 | WB_SYNC_ALL); |
573 | /* See comment of filemap_write_and_wait() */ | 560 | /* See comment of filemap_write_and_wait() */ |
@@ -589,7 +576,7 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); | |||
589 | 576 | ||
590 | void __filemap_set_wb_err(struct address_space *mapping, int err) | 577 | void __filemap_set_wb_err(struct address_space *mapping, int err) |
591 | { | 578 | { |
592 | errseq_t eseq = __errseq_set(&mapping->wb_err, err); | 579 | errseq_t eseq = errseq_set(&mapping->wb_err, err); |
593 | 580 | ||
594 | trace_filemap_set_wb_err(mapping, eseq); | 581 | trace_filemap_set_wb_err(mapping, eseq); |
595 | } | 582 | } |
@@ -656,8 +643,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend) | |||
656 | int err = 0, err2; | 643 | int err = 0, err2; |
657 | struct address_space *mapping = file->f_mapping; | 644 | struct address_space *mapping = file->f_mapping; |
658 | 645 | ||
659 | if ((!dax_mapping(mapping) && mapping->nrpages) || | 646 | if (mapping_needs_writeback(mapping)) { |
660 | (dax_mapping(mapping) && mapping->nrexceptional)) { | ||
661 | err = __filemap_fdatawrite_range(mapping, lstart, lend, | 647 | err = __filemap_fdatawrite_range(mapping, lstart, lend, |
662 | WB_SYNC_ALL); | 648 | WB_SYNC_ALL); |
663 | /* See comment of filemap_write_and_wait() */ | 649 | /* See comment of filemap_write_and_wait() */ |
@@ -885,6 +871,7 @@ void __init pagecache_init(void) | |||
885 | page_writeback_init(); | 871 | page_writeback_init(); |
886 | } | 872 | } |
887 | 873 | ||
874 | /* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */ | ||
888 | struct wait_page_key { | 875 | struct wait_page_key { |
889 | struct page *page; | 876 | struct page *page; |
890 | int bit_nr; | 877 | int bit_nr; |
@@ -909,8 +896,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, | |||
909 | 896 | ||
910 | if (wait_page->bit_nr != key->bit_nr) | 897 | if (wait_page->bit_nr != key->bit_nr) |
911 | return 0; | 898 | return 0; |
899 | |||
900 | /* Stop walking if it's locked */ | ||
912 | if (test_bit(key->bit_nr, &key->page->flags)) | 901 | if (test_bit(key->bit_nr, &key->page->flags)) |
913 | return 0; | 902 | return -1; |
914 | 903 | ||
915 | return autoremove_wake_function(wait, mode, sync, key); | 904 | return autoremove_wake_function(wait, mode, sync, key); |
916 | } | 905 | } |
@@ -920,13 +909,33 @@ static void wake_up_page_bit(struct page *page, int bit_nr) | |||
920 | wait_queue_head_t *q = page_waitqueue(page); | 909 | wait_queue_head_t *q = page_waitqueue(page); |
921 | struct wait_page_key key; | 910 | struct wait_page_key key; |
922 | unsigned long flags; | 911 | unsigned long flags; |
912 | wait_queue_entry_t bookmark; | ||
923 | 913 | ||
924 | key.page = page; | 914 | key.page = page; |
925 | key.bit_nr = bit_nr; | 915 | key.bit_nr = bit_nr; |
926 | key.page_match = 0; | 916 | key.page_match = 0; |
927 | 917 | ||
918 | bookmark.flags = 0; | ||
919 | bookmark.private = NULL; | ||
920 | bookmark.func = NULL; | ||
921 | INIT_LIST_HEAD(&bookmark.entry); | ||
922 | |||
928 | spin_lock_irqsave(&q->lock, flags); | 923 | spin_lock_irqsave(&q->lock, flags); |
929 | __wake_up_locked_key(q, TASK_NORMAL, &key); | 924 | __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); |
925 | |||
926 | while (bookmark.flags & WQ_FLAG_BOOKMARK) { | ||
927 | /* | ||
928 | * Take a breather from holding the lock, | ||
929 | * allow pages that finish wake up asynchronously | ||
930 | * to acquire the lock and remove themselves | ||
931 | * from wait queue | ||
932 | */ | ||
933 | spin_unlock_irqrestore(&q->lock, flags); | ||
934 | cpu_relax(); | ||
935 | spin_lock_irqsave(&q->lock, flags); | ||
936 | __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); | ||
937 | } | ||
938 | |||
930 | /* | 939 | /* |
931 | * It is possible for other pages to have collided on the waitqueue | 940 | * It is possible for other pages to have collided on the waitqueue |
932 | * hash, so in that case check for a page match. That prevents a long- | 941 | * hash, so in that case check for a page match. That prevents a long- |
@@ -964,6 +973,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, | |||
964 | int ret = 0; | 973 | int ret = 0; |
965 | 974 | ||
966 | init_wait(wait); | 975 | init_wait(wait); |
976 | wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0; | ||
967 | wait->func = wake_page_function; | 977 | wait->func = wake_page_function; |
968 | wait_page.page = page; | 978 | wait_page.page = page; |
969 | wait_page.bit_nr = bit_nr; | 979 | wait_page.bit_nr = bit_nr; |
@@ -972,10 +982,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, | |||
972 | spin_lock_irq(&q->lock); | 982 | spin_lock_irq(&q->lock); |
973 | 983 | ||
974 | if (likely(list_empty(&wait->entry))) { | 984 | if (likely(list_empty(&wait->entry))) { |
975 | if (lock) | 985 | __add_wait_queue_entry_tail(q, wait); |
976 | __add_wait_queue_entry_tail_exclusive(q, wait); | ||
977 | else | ||
978 | __add_wait_queue(q, wait); | ||
979 | SetPageWaiters(page); | 986 | SetPageWaiters(page); |
980 | } | 987 | } |
981 | 988 | ||
@@ -985,10 +992,6 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, | |||
985 | 992 | ||
986 | if (likely(test_bit(bit_nr, &page->flags))) { | 993 | if (likely(test_bit(bit_nr, &page->flags))) { |
987 | io_schedule(); | 994 | io_schedule(); |
988 | if (unlikely(signal_pending_state(state, current))) { | ||
989 | ret = -EINTR; | ||
990 | break; | ||
991 | } | ||
992 | } | 995 | } |
993 | 996 | ||
994 | if (lock) { | 997 | if (lock) { |
@@ -998,6 +1001,11 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, | |||
998 | if (!test_bit(bit_nr, &page->flags)) | 1001 | if (!test_bit(bit_nr, &page->flags)) |
999 | break; | 1002 | break; |
1000 | } | 1003 | } |
1004 | |||
1005 | if (unlikely(signal_pending_state(state, current))) { | ||
1006 | ret = -EINTR; | ||
1007 | break; | ||
1008 | } | ||
1001 | } | 1009 | } |
1002 | 1010 | ||
1003 | finish_wait(q, wait); | 1011 | finish_wait(q, wait); |
@@ -1039,7 +1047,7 @@ void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) | |||
1039 | unsigned long flags; | 1047 | unsigned long flags; |
1040 | 1048 | ||
1041 | spin_lock_irqsave(&q->lock, flags); | 1049 | spin_lock_irqsave(&q->lock, flags); |
1042 | __add_wait_queue(q, waiter); | 1050 | __add_wait_queue_entry_tail(q, waiter); |
1043 | SetPageWaiters(page); | 1051 | SetPageWaiters(page); |
1044 | spin_unlock_irqrestore(&q->lock, flags); | 1052 | spin_unlock_irqrestore(&q->lock, flags); |
1045 | } | 1053 | } |
@@ -1564,23 +1572,29 @@ export: | |||
1564 | } | 1572 | } |
1565 | 1573 | ||
1566 | /** | 1574 | /** |
1567 | * find_get_pages - gang pagecache lookup | 1575 | * find_get_pages_range - gang pagecache lookup |
1568 | * @mapping: The address_space to search | 1576 | * @mapping: The address_space to search |
1569 | * @start: The starting page index | 1577 | * @start: The starting page index |
1578 | * @end: The final page index (inclusive) | ||
1570 | * @nr_pages: The maximum number of pages | 1579 | * @nr_pages: The maximum number of pages |
1571 | * @pages: Where the resulting pages are placed | 1580 | * @pages: Where the resulting pages are placed |
1572 | * | 1581 | * |
1573 | * find_get_pages() will search for and return a group of up to | 1582 | * find_get_pages_range() will search for and return a group of up to @nr_pages |
1574 | * @nr_pages pages in the mapping. The pages are placed at @pages. | 1583 | * pages in the mapping starting at index @start and up to index @end |
1575 | * find_get_pages() takes a reference against the returned pages. | 1584 | * (inclusive). The pages are placed at @pages. find_get_pages_range() takes |
1585 | * a reference against the returned pages. | ||
1576 | * | 1586 | * |
1577 | * The search returns a group of mapping-contiguous pages with ascending | 1587 | * The search returns a group of mapping-contiguous pages with ascending |
1578 | * indexes. There may be holes in the indices due to not-present pages. | 1588 | * indexes. There may be holes in the indices due to not-present pages. |
1589 | * We also update @start to index the next page for the traversal. | ||
1579 | * | 1590 | * |
1580 | * find_get_pages() returns the number of pages which were found. | 1591 | * find_get_pages_range() returns the number of pages which were found. If this |
1592 | * number is smaller than @nr_pages, the end of specified range has been | ||
1593 | * reached. | ||
1581 | */ | 1594 | */ |
1582 | unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | 1595 | unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, |
1583 | unsigned int nr_pages, struct page **pages) | 1596 | pgoff_t end, unsigned int nr_pages, |
1597 | struct page **pages) | ||
1584 | { | 1598 | { |
1585 | struct radix_tree_iter iter; | 1599 | struct radix_tree_iter iter; |
1586 | void **slot; | 1600 | void **slot; |
@@ -1590,8 +1604,11 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
1590 | return 0; | 1604 | return 0; |
1591 | 1605 | ||
1592 | rcu_read_lock(); | 1606 | rcu_read_lock(); |
1593 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | 1607 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) { |
1594 | struct page *head, *page; | 1608 | struct page *head, *page; |
1609 | |||
1610 | if (iter.index > end) | ||
1611 | break; | ||
1595 | repeat: | 1612 | repeat: |
1596 | page = radix_tree_deref_slot(slot); | 1613 | page = radix_tree_deref_slot(slot); |
1597 | if (unlikely(!page)) | 1614 | if (unlikely(!page)) |
@@ -1627,11 +1644,25 @@ repeat: | |||
1627 | } | 1644 | } |
1628 | 1645 | ||
1629 | pages[ret] = page; | 1646 | pages[ret] = page; |
1630 | if (++ret == nr_pages) | 1647 | if (++ret == nr_pages) { |
1631 | break; | 1648 | *start = pages[ret - 1]->index + 1; |
1649 | goto out; | ||
1650 | } | ||
1632 | } | 1651 | } |
1633 | 1652 | ||
1653 | /* | ||
1654 | * We come here when there is no page beyond @end. We take care to not | ||
1655 | * overflow the index @start as it confuses some of the callers. This | ||
1656 | * breaks the iteration when there is page at index -1 but that is | ||
1657 | * already broken anyway. | ||
1658 | */ | ||
1659 | if (end == (pgoff_t)-1) | ||
1660 | *start = (pgoff_t)-1; | ||
1661 | else | ||
1662 | *start = end + 1; | ||
1663 | out: | ||
1634 | rcu_read_unlock(); | 1664 | rcu_read_unlock(); |
1665 | |||
1635 | return ret; | 1666 | return ret; |
1636 | } | 1667 | } |
1637 | 1668 | ||