diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 228 |
1 files changed, 161 insertions, 67 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 2d3ec1ffc66e..5de7633e1dbe 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -109,7 +109,7 @@ | |||
109 | /* | 109 | /* |
110 | * Remove a page from the page cache and free it. Caller has to make | 110 | * Remove a page from the page cache and free it. Caller has to make |
111 | * sure the page is locked and that nobody else uses it - or that usage | 111 | * sure the page is locked and that nobody else uses it - or that usage |
112 | * is safe. The caller must hold a write_lock on the mapping's tree_lock. | 112 | * is safe. The caller must hold the mapping's tree_lock. |
113 | */ | 113 | */ |
114 | void __remove_from_page_cache(struct page *page) | 114 | void __remove_from_page_cache(struct page *page) |
115 | { | 115 | { |
@@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page) | |||
141 | 141 | ||
142 | BUG_ON(!PageLocked(page)); | 142 | BUG_ON(!PageLocked(page)); |
143 | 143 | ||
144 | write_lock_irq(&mapping->tree_lock); | 144 | spin_lock_irq(&mapping->tree_lock); |
145 | __remove_from_page_cache(page); | 145 | __remove_from_page_cache(page); |
146 | write_unlock_irq(&mapping->tree_lock); | 146 | spin_unlock_irq(&mapping->tree_lock); |
147 | } | 147 | } |
148 | 148 | ||
149 | static int sync_page(void *word) | 149 | static int sync_page(void *word) |
@@ -442,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
442 | } | 442 | } |
443 | 443 | ||
444 | /** | 444 | /** |
445 | * add_to_page_cache - add newly allocated pagecache pages | 445 | * add_to_page_cache_locked - add a locked page to the pagecache |
446 | * @page: page to add | 446 | * @page: page to add |
447 | * @mapping: the page's address_space | 447 | * @mapping: the page's address_space |
448 | * @offset: page index | 448 | * @offset: page index |
449 | * @gfp_mask: page allocation mode | 449 | * @gfp_mask: page allocation mode |
450 | * | 450 | * |
451 | * This function is used to add newly allocated pagecache pages; | 451 | * This function is used to add a page to the pagecache. It must be locked. |
452 | * the page is new, so we can just run SetPageLocked() against it. | ||
453 | * The other page state flags were set by rmqueue(). | ||
454 | * | ||
455 | * This function does not add the page to the LRU. The caller must do that. | 452 | * This function does not add the page to the LRU. The caller must do that. |
456 | */ | 453 | */ |
457 | int add_to_page_cache(struct page *page, struct address_space *mapping, | 454 | int add_to_page_cache_locked(struct page *page, struct address_space *mapping, |
458 | pgoff_t offset, gfp_t gfp_mask) | 455 | pgoff_t offset, gfp_t gfp_mask) |
459 | { | 456 | { |
460 | int error = mem_cgroup_cache_charge(page, current->mm, | 457 | int error; |
458 | |||
459 | VM_BUG_ON(!PageLocked(page)); | ||
460 | |||
461 | error = mem_cgroup_cache_charge(page, current->mm, | ||
461 | gfp_mask & ~__GFP_HIGHMEM); | 462 | gfp_mask & ~__GFP_HIGHMEM); |
462 | if (error) | 463 | if (error) |
463 | goto out; | 464 | goto out; |
464 | 465 | ||
465 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 466 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); |
466 | if (error == 0) { | 467 | if (error == 0) { |
467 | write_lock_irq(&mapping->tree_lock); | 468 | page_cache_get(page); |
469 | page->mapping = mapping; | ||
470 | page->index = offset; | ||
471 | |||
472 | spin_lock_irq(&mapping->tree_lock); | ||
468 | error = radix_tree_insert(&mapping->page_tree, offset, page); | 473 | error = radix_tree_insert(&mapping->page_tree, offset, page); |
469 | if (!error) { | 474 | if (likely(!error)) { |
470 | page_cache_get(page); | ||
471 | SetPageLocked(page); | ||
472 | page->mapping = mapping; | ||
473 | page->index = offset; | ||
474 | mapping->nrpages++; | 475 | mapping->nrpages++; |
475 | __inc_zone_page_state(page, NR_FILE_PAGES); | 476 | __inc_zone_page_state(page, NR_FILE_PAGES); |
476 | } else | 477 | } else { |
478 | page->mapping = NULL; | ||
477 | mem_cgroup_uncharge_cache_page(page); | 479 | mem_cgroup_uncharge_cache_page(page); |
480 | page_cache_release(page); | ||
481 | } | ||
478 | 482 | ||
479 | write_unlock_irq(&mapping->tree_lock); | 483 | spin_unlock_irq(&mapping->tree_lock); |
480 | radix_tree_preload_end(); | 484 | radix_tree_preload_end(); |
481 | } else | 485 | } else |
482 | mem_cgroup_uncharge_cache_page(page); | 486 | mem_cgroup_uncharge_cache_page(page); |
483 | out: | 487 | out: |
484 | return error; | 488 | return error; |
485 | } | 489 | } |
486 | EXPORT_SYMBOL(add_to_page_cache); | 490 | EXPORT_SYMBOL(add_to_page_cache_locked); |
487 | 491 | ||
488 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 492 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
489 | pgoff_t offset, gfp_t gfp_mask) | 493 | pgoff_t offset, gfp_t gfp_mask) |
@@ -633,15 +637,35 @@ void __lock_page_nosync(struct page *page) | |||
633 | * Is there a pagecache struct page at the given (mapping, offset) tuple? | 637 | * Is there a pagecache struct page at the given (mapping, offset) tuple? |
634 | * If yes, increment its refcount and return it; if no, return NULL. | 638 | * If yes, increment its refcount and return it; if no, return NULL. |
635 | */ | 639 | */ |
636 | struct page * find_get_page(struct address_space *mapping, pgoff_t offset) | 640 | struct page *find_get_page(struct address_space *mapping, pgoff_t offset) |
637 | { | 641 | { |
642 | void **pagep; | ||
638 | struct page *page; | 643 | struct page *page; |
639 | 644 | ||
640 | read_lock_irq(&mapping->tree_lock); | 645 | rcu_read_lock(); |
641 | page = radix_tree_lookup(&mapping->page_tree, offset); | 646 | repeat: |
642 | if (page) | 647 | page = NULL; |
643 | page_cache_get(page); | 648 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); |
644 | read_unlock_irq(&mapping->tree_lock); | 649 | if (pagep) { |
650 | page = radix_tree_deref_slot(pagep); | ||
651 | if (unlikely(!page || page == RADIX_TREE_RETRY)) | ||
652 | goto repeat; | ||
653 | |||
654 | if (!page_cache_get_speculative(page)) | ||
655 | goto repeat; | ||
656 | |||
657 | /* | ||
658 | * Has the page moved? | ||
659 | * This is part of the lockless pagecache protocol. See | ||
660 | * include/linux/pagemap.h for details. | ||
661 | */ | ||
662 | if (unlikely(page != *pagep)) { | ||
663 | page_cache_release(page); | ||
664 | goto repeat; | ||
665 | } | ||
666 | } | ||
667 | rcu_read_unlock(); | ||
668 | |||
645 | return page; | 669 | return page; |
646 | } | 670 | } |
647 | EXPORT_SYMBOL(find_get_page); | 671 | EXPORT_SYMBOL(find_get_page); |
@@ -656,32 +680,22 @@ EXPORT_SYMBOL(find_get_page); | |||
656 | * | 680 | * |
657 | * Returns zero if the page was not present. find_lock_page() may sleep. | 681 | * Returns zero if the page was not present. find_lock_page() may sleep. |
658 | */ | 682 | */ |
659 | struct page *find_lock_page(struct address_space *mapping, | 683 | struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) |
660 | pgoff_t offset) | ||
661 | { | 684 | { |
662 | struct page *page; | 685 | struct page *page; |
663 | 686 | ||
664 | repeat: | 687 | repeat: |
665 | read_lock_irq(&mapping->tree_lock); | 688 | page = find_get_page(mapping, offset); |
666 | page = radix_tree_lookup(&mapping->page_tree, offset); | ||
667 | if (page) { | 689 | if (page) { |
668 | page_cache_get(page); | 690 | lock_page(page); |
669 | if (TestSetPageLocked(page)) { | 691 | /* Has the page been truncated? */ |
670 | read_unlock_irq(&mapping->tree_lock); | 692 | if (unlikely(page->mapping != mapping)) { |
671 | __lock_page(page); | 693 | unlock_page(page); |
672 | 694 | page_cache_release(page); | |
673 | /* Has the page been truncated while we slept? */ | 695 | goto repeat; |
674 | if (unlikely(page->mapping != mapping)) { | ||
675 | unlock_page(page); | ||
676 | page_cache_release(page); | ||
677 | goto repeat; | ||
678 | } | ||
679 | VM_BUG_ON(page->index != offset); | ||
680 | goto out; | ||
681 | } | 696 | } |
697 | VM_BUG_ON(page->index != offset); | ||
682 | } | 698 | } |
683 | read_unlock_irq(&mapping->tree_lock); | ||
684 | out: | ||
685 | return page; | 699 | return page; |
686 | } | 700 | } |
687 | EXPORT_SYMBOL(find_lock_page); | 701 | EXPORT_SYMBOL(find_lock_page); |
@@ -747,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
747 | { | 761 | { |
748 | unsigned int i; | 762 | unsigned int i; |
749 | unsigned int ret; | 763 | unsigned int ret; |
764 | unsigned int nr_found; | ||
765 | |||
766 | rcu_read_lock(); | ||
767 | restart: | ||
768 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
769 | (void ***)pages, start, nr_pages); | ||
770 | ret = 0; | ||
771 | for (i = 0; i < nr_found; i++) { | ||
772 | struct page *page; | ||
773 | repeat: | ||
774 | page = radix_tree_deref_slot((void **)pages[i]); | ||
775 | if (unlikely(!page)) | ||
776 | continue; | ||
777 | /* | ||
778 | * this can only trigger if nr_found == 1, making livelock | ||
779 | * a non issue. | ||
780 | */ | ||
781 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
782 | goto restart; | ||
750 | 783 | ||
751 | read_lock_irq(&mapping->tree_lock); | 784 | if (!page_cache_get_speculative(page)) |
752 | ret = radix_tree_gang_lookup(&mapping->page_tree, | 785 | goto repeat; |
753 | (void **)pages, start, nr_pages); | 786 | |
754 | for (i = 0; i < ret; i++) | 787 | /* Has the page moved? */ |
755 | page_cache_get(pages[i]); | 788 | if (unlikely(page != *((void **)pages[i]))) { |
756 | read_unlock_irq(&mapping->tree_lock); | 789 | page_cache_release(page); |
790 | goto repeat; | ||
791 | } | ||
792 | |||
793 | pages[ret] = page; | ||
794 | ret++; | ||
795 | } | ||
796 | rcu_read_unlock(); | ||
757 | return ret; | 797 | return ret; |
758 | } | 798 | } |
759 | 799 | ||
@@ -774,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
774 | { | 814 | { |
775 | unsigned int i; | 815 | unsigned int i; |
776 | unsigned int ret; | 816 | unsigned int ret; |
817 | unsigned int nr_found; | ||
818 | |||
819 | rcu_read_lock(); | ||
820 | restart: | ||
821 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
822 | (void ***)pages, index, nr_pages); | ||
823 | ret = 0; | ||
824 | for (i = 0; i < nr_found; i++) { | ||
825 | struct page *page; | ||
826 | repeat: | ||
827 | page = radix_tree_deref_slot((void **)pages[i]); | ||
828 | if (unlikely(!page)) | ||
829 | continue; | ||
830 | /* | ||
831 | * this can only trigger if nr_found == 1, making livelock | ||
832 | * a non issue. | ||
833 | */ | ||
834 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
835 | goto restart; | ||
777 | 836 | ||
778 | read_lock_irq(&mapping->tree_lock); | 837 | if (page->mapping == NULL || page->index != index) |
779 | ret = radix_tree_gang_lookup(&mapping->page_tree, | ||
780 | (void **)pages, index, nr_pages); | ||
781 | for (i = 0; i < ret; i++) { | ||
782 | if (pages[i]->mapping == NULL || pages[i]->index != index) | ||
783 | break; | 838 | break; |
784 | 839 | ||
785 | page_cache_get(pages[i]); | 840 | if (!page_cache_get_speculative(page)) |
841 | goto repeat; | ||
842 | |||
843 | /* Has the page moved? */ | ||
844 | if (unlikely(page != *((void **)pages[i]))) { | ||
845 | page_cache_release(page); | ||
846 | goto repeat; | ||
847 | } | ||
848 | |||
849 | pages[ret] = page; | ||
850 | ret++; | ||
786 | index++; | 851 | index++; |
787 | } | 852 | } |
788 | read_unlock_irq(&mapping->tree_lock); | 853 | rcu_read_unlock(); |
789 | return i; | 854 | return ret; |
790 | } | 855 | } |
791 | EXPORT_SYMBOL(find_get_pages_contig); | 856 | EXPORT_SYMBOL(find_get_pages_contig); |
792 | 857 | ||
@@ -806,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
806 | { | 871 | { |
807 | unsigned int i; | 872 | unsigned int i; |
808 | unsigned int ret; | 873 | unsigned int ret; |
874 | unsigned int nr_found; | ||
875 | |||
876 | rcu_read_lock(); | ||
877 | restart: | ||
878 | nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, | ||
879 | (void ***)pages, *index, nr_pages, tag); | ||
880 | ret = 0; | ||
881 | for (i = 0; i < nr_found; i++) { | ||
882 | struct page *page; | ||
883 | repeat: | ||
884 | page = radix_tree_deref_slot((void **)pages[i]); | ||
885 | if (unlikely(!page)) | ||
886 | continue; | ||
887 | /* | ||
888 | * this can only trigger if nr_found == 1, making livelock | ||
889 | * a non issue. | ||
890 | */ | ||
891 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
892 | goto restart; | ||
893 | |||
894 | if (!page_cache_get_speculative(page)) | ||
895 | goto repeat; | ||
896 | |||
897 | /* Has the page moved? */ | ||
898 | if (unlikely(page != *((void **)pages[i]))) { | ||
899 | page_cache_release(page); | ||
900 | goto repeat; | ||
901 | } | ||
902 | |||
903 | pages[ret] = page; | ||
904 | ret++; | ||
905 | } | ||
906 | rcu_read_unlock(); | ||
809 | 907 | ||
810 | read_lock_irq(&mapping->tree_lock); | ||
811 | ret = radix_tree_gang_lookup_tag(&mapping->page_tree, | ||
812 | (void **)pages, *index, nr_pages, tag); | ||
813 | for (i = 0; i < ret; i++) | ||
814 | page_cache_get(pages[i]); | ||
815 | if (ret) | 908 | if (ret) |
816 | *index = pages[ret - 1]->index + 1; | 909 | *index = pages[ret - 1]->index + 1; |
817 | read_unlock_irq(&mapping->tree_lock); | 910 | |
818 | return ret; | 911 | return ret; |
819 | } | 912 | } |
820 | EXPORT_SYMBOL(find_get_pages_tag); | 913 | EXPORT_SYMBOL(find_get_pages_tag); |
@@ -1665,8 +1758,9 @@ static int __remove_suid(struct dentry *dentry, int kill) | |||
1665 | return notify_change(dentry, &newattrs); | 1758 | return notify_change(dentry, &newattrs); |
1666 | } | 1759 | } |
1667 | 1760 | ||
1668 | int remove_suid(struct dentry *dentry) | 1761 | int file_remove_suid(struct file *file) |
1669 | { | 1762 | { |
1763 | struct dentry *dentry = file->f_path.dentry; | ||
1670 | int killsuid = should_remove_suid(dentry); | 1764 | int killsuid = should_remove_suid(dentry); |
1671 | int killpriv = security_inode_need_killpriv(dentry); | 1765 | int killpriv = security_inode_need_killpriv(dentry); |
1672 | int error = 0; | 1766 | int error = 0; |
@@ -1680,7 +1774,7 @@ int remove_suid(struct dentry *dentry) | |||
1680 | 1774 | ||
1681 | return error; | 1775 | return error; |
1682 | } | 1776 | } |
1683 | EXPORT_SYMBOL(remove_suid); | 1777 | EXPORT_SYMBOL(file_remove_suid); |
1684 | 1778 | ||
1685 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, | 1779 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, |
1686 | const struct iovec *iov, size_t base, size_t bytes) | 1780 | const struct iovec *iov, size_t base, size_t bytes) |
@@ -2436,7 +2530,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
2436 | if (count == 0) | 2530 | if (count == 0) |
2437 | goto out; | 2531 | goto out; |
2438 | 2532 | ||
2439 | err = remove_suid(file->f_path.dentry); | 2533 | err = file_remove_suid(file); |
2440 | if (err) | 2534 | if (err) |
2441 | goto out; | 2535 | goto out; |
2442 | 2536 | ||