diff options
Diffstat (limited to 'mm/filemap.c')
| -rw-r--r-- | mm/filemap.c | 228 |
1 files changed, 161 insertions, 67 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 2d3ec1ffc66e..5de7633e1dbe 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -109,7 +109,7 @@ | |||
| 109 | /* | 109 | /* |
| 110 | * Remove a page from the page cache and free it. Caller has to make | 110 | * Remove a page from the page cache and free it. Caller has to make |
| 111 | * sure the page is locked and that nobody else uses it - or that usage | 111 | * sure the page is locked and that nobody else uses it - or that usage |
| 112 | * is safe. The caller must hold a write_lock on the mapping's tree_lock. | 112 | * is safe. The caller must hold the mapping's tree_lock. |
| 113 | */ | 113 | */ |
| 114 | void __remove_from_page_cache(struct page *page) | 114 | void __remove_from_page_cache(struct page *page) |
| 115 | { | 115 | { |
| @@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page) | |||
| 141 | 141 | ||
| 142 | BUG_ON(!PageLocked(page)); | 142 | BUG_ON(!PageLocked(page)); |
| 143 | 143 | ||
| 144 | write_lock_irq(&mapping->tree_lock); | 144 | spin_lock_irq(&mapping->tree_lock); |
| 145 | __remove_from_page_cache(page); | 145 | __remove_from_page_cache(page); |
| 146 | write_unlock_irq(&mapping->tree_lock); | 146 | spin_unlock_irq(&mapping->tree_lock); |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | static int sync_page(void *word) | 149 | static int sync_page(void *word) |
| @@ -442,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
| 442 | } | 442 | } |
| 443 | 443 | ||
| 444 | /** | 444 | /** |
| 445 | * add_to_page_cache - add newly allocated pagecache pages | 445 | * add_to_page_cache_locked - add a locked page to the pagecache |
| 446 | * @page: page to add | 446 | * @page: page to add |
| 447 | * @mapping: the page's address_space | 447 | * @mapping: the page's address_space |
| 448 | * @offset: page index | 448 | * @offset: page index |
| 449 | * @gfp_mask: page allocation mode | 449 | * @gfp_mask: page allocation mode |
| 450 | * | 450 | * |
| 451 | * This function is used to add newly allocated pagecache pages; | 451 | * This function is used to add a page to the pagecache. It must be locked. |
| 452 | * the page is new, so we can just run SetPageLocked() against it. | ||
| 453 | * The other page state flags were set by rmqueue(). | ||
| 454 | * | ||
| 455 | * This function does not add the page to the LRU. The caller must do that. | 452 | * This function does not add the page to the LRU. The caller must do that. |
| 456 | */ | 453 | */ |
| 457 | int add_to_page_cache(struct page *page, struct address_space *mapping, | 454 | int add_to_page_cache_locked(struct page *page, struct address_space *mapping, |
| 458 | pgoff_t offset, gfp_t gfp_mask) | 455 | pgoff_t offset, gfp_t gfp_mask) |
| 459 | { | 456 | { |
| 460 | int error = mem_cgroup_cache_charge(page, current->mm, | 457 | int error; |
| 458 | |||
| 459 | VM_BUG_ON(!PageLocked(page)); | ||
| 460 | |||
| 461 | error = mem_cgroup_cache_charge(page, current->mm, | ||
| 461 | gfp_mask & ~__GFP_HIGHMEM); | 462 | gfp_mask & ~__GFP_HIGHMEM); |
| 462 | if (error) | 463 | if (error) |
| 463 | goto out; | 464 | goto out; |
| 464 | 465 | ||
| 465 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 466 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); |
| 466 | if (error == 0) { | 467 | if (error == 0) { |
| 467 | write_lock_irq(&mapping->tree_lock); | 468 | page_cache_get(page); |
| 469 | page->mapping = mapping; | ||
| 470 | page->index = offset; | ||
| 471 | |||
| 472 | spin_lock_irq(&mapping->tree_lock); | ||
| 468 | error = radix_tree_insert(&mapping->page_tree, offset, page); | 473 | error = radix_tree_insert(&mapping->page_tree, offset, page); |
| 469 | if (!error) { | 474 | if (likely(!error)) { |
| 470 | page_cache_get(page); | ||
| 471 | SetPageLocked(page); | ||
| 472 | page->mapping = mapping; | ||
| 473 | page->index = offset; | ||
| 474 | mapping->nrpages++; | 475 | mapping->nrpages++; |
| 475 | __inc_zone_page_state(page, NR_FILE_PAGES); | 476 | __inc_zone_page_state(page, NR_FILE_PAGES); |
| 476 | } else | 477 | } else { |
| 478 | page->mapping = NULL; | ||
| 477 | mem_cgroup_uncharge_cache_page(page); | 479 | mem_cgroup_uncharge_cache_page(page); |
| 480 | page_cache_release(page); | ||
| 481 | } | ||
| 478 | 482 | ||
| 479 | write_unlock_irq(&mapping->tree_lock); | 483 | spin_unlock_irq(&mapping->tree_lock); |
| 480 | radix_tree_preload_end(); | 484 | radix_tree_preload_end(); |
| 481 | } else | 485 | } else |
| 482 | mem_cgroup_uncharge_cache_page(page); | 486 | mem_cgroup_uncharge_cache_page(page); |
| 483 | out: | 487 | out: |
| 484 | return error; | 488 | return error; |
| 485 | } | 489 | } |
| 486 | EXPORT_SYMBOL(add_to_page_cache); | 490 | EXPORT_SYMBOL(add_to_page_cache_locked); |
| 487 | 491 | ||
| 488 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 492 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
| 489 | pgoff_t offset, gfp_t gfp_mask) | 493 | pgoff_t offset, gfp_t gfp_mask) |
| @@ -633,15 +637,35 @@ void __lock_page_nosync(struct page *page) | |||
| 633 | * Is there a pagecache struct page at the given (mapping, offset) tuple? | 637 | * Is there a pagecache struct page at the given (mapping, offset) tuple? |
| 634 | * If yes, increment its refcount and return it; if no, return NULL. | 638 | * If yes, increment its refcount and return it; if no, return NULL. |
| 635 | */ | 639 | */ |
| 636 | struct page * find_get_page(struct address_space *mapping, pgoff_t offset) | 640 | struct page *find_get_page(struct address_space *mapping, pgoff_t offset) |
| 637 | { | 641 | { |
| 642 | void **pagep; | ||
| 638 | struct page *page; | 643 | struct page *page; |
| 639 | 644 | ||
| 640 | read_lock_irq(&mapping->tree_lock); | 645 | rcu_read_lock(); |
| 641 | page = radix_tree_lookup(&mapping->page_tree, offset); | 646 | repeat: |
| 642 | if (page) | 647 | page = NULL; |
| 643 | page_cache_get(page); | 648 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); |
| 644 | read_unlock_irq(&mapping->tree_lock); | 649 | if (pagep) { |
| 650 | page = radix_tree_deref_slot(pagep); | ||
| 651 | if (unlikely(!page || page == RADIX_TREE_RETRY)) | ||
| 652 | goto repeat; | ||
| 653 | |||
| 654 | if (!page_cache_get_speculative(page)) | ||
| 655 | goto repeat; | ||
| 656 | |||
| 657 | /* | ||
| 658 | * Has the page moved? | ||
| 659 | * This is part of the lockless pagecache protocol. See | ||
| 660 | * include/linux/pagemap.h for details. | ||
| 661 | */ | ||
| 662 | if (unlikely(page != *pagep)) { | ||
| 663 | page_cache_release(page); | ||
| 664 | goto repeat; | ||
| 665 | } | ||
| 666 | } | ||
| 667 | rcu_read_unlock(); | ||
| 668 | |||
| 645 | return page; | 669 | return page; |
| 646 | } | 670 | } |
| 647 | EXPORT_SYMBOL(find_get_page); | 671 | EXPORT_SYMBOL(find_get_page); |
| @@ -656,32 +680,22 @@ EXPORT_SYMBOL(find_get_page); | |||
| 656 | * | 680 | * |
| 657 | * Returns zero if the page was not present. find_lock_page() may sleep. | 681 | * Returns zero if the page was not present. find_lock_page() may sleep. |
| 658 | */ | 682 | */ |
| 659 | struct page *find_lock_page(struct address_space *mapping, | 683 | struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) |
| 660 | pgoff_t offset) | ||
| 661 | { | 684 | { |
| 662 | struct page *page; | 685 | struct page *page; |
| 663 | 686 | ||
| 664 | repeat: | 687 | repeat: |
| 665 | read_lock_irq(&mapping->tree_lock); | 688 | page = find_get_page(mapping, offset); |
| 666 | page = radix_tree_lookup(&mapping->page_tree, offset); | ||
| 667 | if (page) { | 689 | if (page) { |
| 668 | page_cache_get(page); | 690 | lock_page(page); |
| 669 | if (TestSetPageLocked(page)) { | 691 | /* Has the page been truncated? */ |
| 670 | read_unlock_irq(&mapping->tree_lock); | 692 | if (unlikely(page->mapping != mapping)) { |
| 671 | __lock_page(page); | 693 | unlock_page(page); |
| 672 | 694 | page_cache_release(page); | |
| 673 | /* Has the page been truncated while we slept? */ | 695 | goto repeat; |
| 674 | if (unlikely(page->mapping != mapping)) { | ||
| 675 | unlock_page(page); | ||
| 676 | page_cache_release(page); | ||
| 677 | goto repeat; | ||
| 678 | } | ||
| 679 | VM_BUG_ON(page->index != offset); | ||
| 680 | goto out; | ||
| 681 | } | 696 | } |
| 697 | VM_BUG_ON(page->index != offset); | ||
| 682 | } | 698 | } |
| 683 | read_unlock_irq(&mapping->tree_lock); | ||
| 684 | out: | ||
| 685 | return page; | 699 | return page; |
| 686 | } | 700 | } |
| 687 | EXPORT_SYMBOL(find_lock_page); | 701 | EXPORT_SYMBOL(find_lock_page); |
| @@ -747,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
| 747 | { | 761 | { |
| 748 | unsigned int i; | 762 | unsigned int i; |
| 749 | unsigned int ret; | 763 | unsigned int ret; |
| 764 | unsigned int nr_found; | ||
| 765 | |||
| 766 | rcu_read_lock(); | ||
| 767 | restart: | ||
| 768 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
| 769 | (void ***)pages, start, nr_pages); | ||
| 770 | ret = 0; | ||
| 771 | for (i = 0; i < nr_found; i++) { | ||
| 772 | struct page *page; | ||
| 773 | repeat: | ||
| 774 | page = radix_tree_deref_slot((void **)pages[i]); | ||
| 775 | if (unlikely(!page)) | ||
| 776 | continue; | ||
| 777 | /* | ||
| 778 | * this can only trigger if nr_found == 1, making livelock | ||
| 779 | * a non issue. | ||
| 780 | */ | ||
| 781 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
| 782 | goto restart; | ||
| 750 | 783 | ||
| 751 | read_lock_irq(&mapping->tree_lock); | 784 | if (!page_cache_get_speculative(page)) |
| 752 | ret = radix_tree_gang_lookup(&mapping->page_tree, | 785 | goto repeat; |
| 753 | (void **)pages, start, nr_pages); | 786 | |
| 754 | for (i = 0; i < ret; i++) | 787 | /* Has the page moved? */ |
| 755 | page_cache_get(pages[i]); | 788 | if (unlikely(page != *((void **)pages[i]))) { |
| 756 | read_unlock_irq(&mapping->tree_lock); | 789 | page_cache_release(page); |
| 790 | goto repeat; | ||
| 791 | } | ||
| 792 | |||
| 793 | pages[ret] = page; | ||
| 794 | ret++; | ||
| 795 | } | ||
| 796 | rcu_read_unlock(); | ||
| 757 | return ret; | 797 | return ret; |
| 758 | } | 798 | } |
| 759 | 799 | ||
| @@ -774,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
| 774 | { | 814 | { |
| 775 | unsigned int i; | 815 | unsigned int i; |
| 776 | unsigned int ret; | 816 | unsigned int ret; |
| 817 | unsigned int nr_found; | ||
| 818 | |||
| 819 | rcu_read_lock(); | ||
| 820 | restart: | ||
| 821 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
| 822 | (void ***)pages, index, nr_pages); | ||
| 823 | ret = 0; | ||
| 824 | for (i = 0; i < nr_found; i++) { | ||
| 825 | struct page *page; | ||
| 826 | repeat: | ||
| 827 | page = radix_tree_deref_slot((void **)pages[i]); | ||
| 828 | if (unlikely(!page)) | ||
| 829 | continue; | ||
| 830 | /* | ||
| 831 | * this can only trigger if nr_found == 1, making livelock | ||
| 832 | * a non issue. | ||
| 833 | */ | ||
| 834 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
| 835 | goto restart; | ||
| 777 | 836 | ||
| 778 | read_lock_irq(&mapping->tree_lock); | 837 | if (page->mapping == NULL || page->index != index) |
| 779 | ret = radix_tree_gang_lookup(&mapping->page_tree, | ||
| 780 | (void **)pages, index, nr_pages); | ||
| 781 | for (i = 0; i < ret; i++) { | ||
| 782 | if (pages[i]->mapping == NULL || pages[i]->index != index) | ||
| 783 | break; | 838 | break; |
| 784 | 839 | ||
| 785 | page_cache_get(pages[i]); | 840 | if (!page_cache_get_speculative(page)) |
| 841 | goto repeat; | ||
| 842 | |||
| 843 | /* Has the page moved? */ | ||
| 844 | if (unlikely(page != *((void **)pages[i]))) { | ||
| 845 | page_cache_release(page); | ||
| 846 | goto repeat; | ||
| 847 | } | ||
| 848 | |||
| 849 | pages[ret] = page; | ||
| 850 | ret++; | ||
| 786 | index++; | 851 | index++; |
| 787 | } | 852 | } |
| 788 | read_unlock_irq(&mapping->tree_lock); | 853 | rcu_read_unlock(); |
| 789 | return i; | 854 | return ret; |
| 790 | } | 855 | } |
| 791 | EXPORT_SYMBOL(find_get_pages_contig); | 856 | EXPORT_SYMBOL(find_get_pages_contig); |
| 792 | 857 | ||
| @@ -806,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
| 806 | { | 871 | { |
| 807 | unsigned int i; | 872 | unsigned int i; |
| 808 | unsigned int ret; | 873 | unsigned int ret; |
| 874 | unsigned int nr_found; | ||
| 875 | |||
| 876 | rcu_read_lock(); | ||
| 877 | restart: | ||
| 878 | nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, | ||
| 879 | (void ***)pages, *index, nr_pages, tag); | ||
| 880 | ret = 0; | ||
| 881 | for (i = 0; i < nr_found; i++) { | ||
| 882 | struct page *page; | ||
| 883 | repeat: | ||
| 884 | page = radix_tree_deref_slot((void **)pages[i]); | ||
| 885 | if (unlikely(!page)) | ||
| 886 | continue; | ||
| 887 | /* | ||
| 888 | * this can only trigger if nr_found == 1, making livelock | ||
| 889 | * a non issue. | ||
| 890 | */ | ||
| 891 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
| 892 | goto restart; | ||
| 893 | |||
| 894 | if (!page_cache_get_speculative(page)) | ||
| 895 | goto repeat; | ||
| 896 | |||
| 897 | /* Has the page moved? */ | ||
| 898 | if (unlikely(page != *((void **)pages[i]))) { | ||
| 899 | page_cache_release(page); | ||
| 900 | goto repeat; | ||
| 901 | } | ||
| 902 | |||
| 903 | pages[ret] = page; | ||
| 904 | ret++; | ||
| 905 | } | ||
| 906 | rcu_read_unlock(); | ||
| 809 | 907 | ||
| 810 | read_lock_irq(&mapping->tree_lock); | ||
| 811 | ret = radix_tree_gang_lookup_tag(&mapping->page_tree, | ||
| 812 | (void **)pages, *index, nr_pages, tag); | ||
| 813 | for (i = 0; i < ret; i++) | ||
| 814 | page_cache_get(pages[i]); | ||
| 815 | if (ret) | 908 | if (ret) |
| 816 | *index = pages[ret - 1]->index + 1; | 909 | *index = pages[ret - 1]->index + 1; |
| 817 | read_unlock_irq(&mapping->tree_lock); | 910 | |
| 818 | return ret; | 911 | return ret; |
| 819 | } | 912 | } |
| 820 | EXPORT_SYMBOL(find_get_pages_tag); | 913 | EXPORT_SYMBOL(find_get_pages_tag); |
| @@ -1665,8 +1758,9 @@ static int __remove_suid(struct dentry *dentry, int kill) | |||
| 1665 | return notify_change(dentry, &newattrs); | 1758 | return notify_change(dentry, &newattrs); |
| 1666 | } | 1759 | } |
| 1667 | 1760 | ||
| 1668 | int remove_suid(struct dentry *dentry) | 1761 | int file_remove_suid(struct file *file) |
| 1669 | { | 1762 | { |
| 1763 | struct dentry *dentry = file->f_path.dentry; | ||
| 1670 | int killsuid = should_remove_suid(dentry); | 1764 | int killsuid = should_remove_suid(dentry); |
| 1671 | int killpriv = security_inode_need_killpriv(dentry); | 1765 | int killpriv = security_inode_need_killpriv(dentry); |
| 1672 | int error = 0; | 1766 | int error = 0; |
| @@ -1680,7 +1774,7 @@ int remove_suid(struct dentry *dentry) | |||
| 1680 | 1774 | ||
| 1681 | return error; | 1775 | return error; |
| 1682 | } | 1776 | } |
| 1683 | EXPORT_SYMBOL(remove_suid); | 1777 | EXPORT_SYMBOL(file_remove_suid); |
| 1684 | 1778 | ||
| 1685 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, | 1779 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, |
| 1686 | const struct iovec *iov, size_t base, size_t bytes) | 1780 | const struct iovec *iov, size_t base, size_t bytes) |
| @@ -2436,7 +2530,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
| 2436 | if (count == 0) | 2530 | if (count == 0) |
| 2437 | goto out; | 2531 | goto out; |
| 2438 | 2532 | ||
| 2439 | err = remove_suid(file->f_path.dentry); | 2533 | err = file_remove_suid(file); |
| 2440 | if (err) | 2534 | if (err) |
| 2441 | goto out; | 2535 | goto out; |
| 2442 | 2536 | ||
