diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-28 15:14:43 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-28 15:14:43 -0400 |
commit | 414f746d232d41ed6ae8632c4495ae795373c44b (patch) | |
tree | 167f9bc8f139c6e82e6732b38c7a938b8a9d31cd /mm | |
parent | 5a7a201c51c324876d00a54e7208af6af12d1ca4 (diff) | |
parent | c9272c4f9fbe2087beb3392f526dc5b19efaa56b (diff) |
Merge branch 'linus' into cpus4096
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 3 | ||||
-rw-r--r-- | mm/allocpercpu.c | 20 | ||||
-rw-r--r-- | mm/filemap.c | 228 | ||||
-rw-r--r-- | mm/filemap_xip.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 24 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/migrate.c | 29 | ||||
-rw-r--r-- | mm/nommu.c | 4 | ||||
-rw-r--r-- | mm/page-writeback.c | 12 | ||||
-rw-r--r-- | mm/readahead.c | 6 | ||||
-rw-r--r-- | mm/rmap.c | 2 | ||||
-rw-r--r-- | mm/shmem.c | 8 | ||||
-rw-r--r-- | mm/shmem_acl.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 11 | ||||
-rw-r--r-- | mm/slob.c | 7 | ||||
-rw-r--r-- | mm/slub.c | 13 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/swap_state.c | 30 | ||||
-rw-r--r-- | mm/swapfile.c | 10 | ||||
-rw-r--r-- | mm/truncate.c | 6 | ||||
-rw-r--r-- | mm/util.c | 55 | ||||
-rw-r--r-- | mm/vmalloc.c | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 80 |
23 files changed, 375 insertions, 188 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index aa799007a11b..efee5d379df4 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -77,6 +77,9 @@ config FLAT_NODE_MEM_MAP | |||
77 | def_bool y | 77 | def_bool y |
78 | depends on !SPARSEMEM | 78 | depends on !SPARSEMEM |
79 | 79 | ||
80 | config HAVE_GET_USER_PAGES_FAST | ||
81 | bool | ||
82 | |||
80 | # | 83 | # |
81 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's | 84 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's |
82 | # to represent different areas of memory. This variable allows | 85 | # to represent different areas of memory. This variable allows |
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index 843364594e23..4297bc41bfd2 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c | |||
@@ -18,27 +18,28 @@ | |||
18 | * Depopulating per-cpu data for a cpu going offline would be a typical | 18 | * Depopulating per-cpu data for a cpu going offline would be a typical |
19 | * use case. You need to register a cpu hotplug handler for that purpose. | 19 | * use case. You need to register a cpu hotplug handler for that purpose. |
20 | */ | 20 | */ |
21 | void percpu_depopulate(void *__pdata, int cpu) | 21 | static void percpu_depopulate(void *__pdata, int cpu) |
22 | { | 22 | { |
23 | struct percpu_data *pdata = __percpu_disguise(__pdata); | 23 | struct percpu_data *pdata = __percpu_disguise(__pdata); |
24 | 24 | ||
25 | kfree(pdata->ptrs[cpu]); | 25 | kfree(pdata->ptrs[cpu]); |
26 | pdata->ptrs[cpu] = NULL; | 26 | pdata->ptrs[cpu] = NULL; |
27 | } | 27 | } |
28 | EXPORT_SYMBOL_GPL(percpu_depopulate); | ||
29 | 28 | ||
30 | /** | 29 | /** |
31 | * percpu_depopulate_mask - depopulate per-cpu data for some cpu's | 30 | * percpu_depopulate_mask - depopulate per-cpu data for some cpu's |
32 | * @__pdata: per-cpu data to depopulate | 31 | * @__pdata: per-cpu data to depopulate |
33 | * @mask: depopulate per-cpu data for cpu's selected through mask bits | 32 | * @mask: depopulate per-cpu data for cpu's selected through mask bits |
34 | */ | 33 | */ |
35 | void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) | 34 | static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) |
36 | { | 35 | { |
37 | int cpu; | 36 | int cpu; |
38 | for_each_cpu_mask_nr(cpu, *mask) | 37 | for_each_cpu_mask_nr(cpu, *mask) |
39 | percpu_depopulate(__pdata, cpu); | 38 | percpu_depopulate(__pdata, cpu); |
40 | } | 39 | } |
41 | EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); | 40 | |
41 | #define percpu_depopulate_mask(__pdata, mask) \ | ||
42 | __percpu_depopulate_mask((__pdata), &(mask)) | ||
42 | 43 | ||
43 | /** | 44 | /** |
44 | * percpu_populate - populate per-cpu data for given cpu | 45 | * percpu_populate - populate per-cpu data for given cpu |
@@ -51,7 +52,7 @@ EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); | |||
51 | * use case. You need to register a cpu hotplug handler for that purpose. | 52 | * use case. You need to register a cpu hotplug handler for that purpose. |
52 | * Per-cpu object is populated with zeroed buffer. | 53 | * Per-cpu object is populated with zeroed buffer. |
53 | */ | 54 | */ |
54 | void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) | 55 | static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) |
55 | { | 56 | { |
56 | struct percpu_data *pdata = __percpu_disguise(__pdata); | 57 | struct percpu_data *pdata = __percpu_disguise(__pdata); |
57 | int node = cpu_to_node(cpu); | 58 | int node = cpu_to_node(cpu); |
@@ -68,7 +69,6 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) | |||
68 | pdata->ptrs[cpu] = kzalloc(size, gfp); | 69 | pdata->ptrs[cpu] = kzalloc(size, gfp); |
69 | return pdata->ptrs[cpu]; | 70 | return pdata->ptrs[cpu]; |
70 | } | 71 | } |
71 | EXPORT_SYMBOL_GPL(percpu_populate); | ||
72 | 72 | ||
73 | /** | 73 | /** |
74 | * percpu_populate_mask - populate per-cpu data for more cpu's | 74 | * percpu_populate_mask - populate per-cpu data for more cpu's |
@@ -79,8 +79,8 @@ EXPORT_SYMBOL_GPL(percpu_populate); | |||
79 | * | 79 | * |
80 | * Per-cpu objects are populated with zeroed buffers. | 80 | * Per-cpu objects are populated with zeroed buffers. |
81 | */ | 81 | */ |
82 | int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, | 82 | static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, |
83 | cpumask_t *mask) | 83 | cpumask_t *mask) |
84 | { | 84 | { |
85 | cpumask_t populated; | 85 | cpumask_t populated; |
86 | int cpu; | 86 | int cpu; |
@@ -94,7 +94,9 @@ int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, | |||
94 | cpu_set(cpu, populated); | 94 | cpu_set(cpu, populated); |
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | EXPORT_SYMBOL_GPL(__percpu_populate_mask); | 97 | |
98 | #define percpu_populate_mask(__pdata, size, gfp, mask) \ | ||
99 | __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) | ||
98 | 100 | ||
99 | /** | 101 | /** |
100 | * percpu_alloc_mask - initial setup of per-cpu data | 102 | * percpu_alloc_mask - initial setup of per-cpu data |
diff --git a/mm/filemap.c b/mm/filemap.c index 2d3ec1ffc66e..5de7633e1dbe 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -109,7 +109,7 @@ | |||
109 | /* | 109 | /* |
110 | * Remove a page from the page cache and free it. Caller has to make | 110 | * Remove a page from the page cache and free it. Caller has to make |
111 | * sure the page is locked and that nobody else uses it - or that usage | 111 | * sure the page is locked and that nobody else uses it - or that usage |
112 | * is safe. The caller must hold a write_lock on the mapping's tree_lock. | 112 | * is safe. The caller must hold the mapping's tree_lock. |
113 | */ | 113 | */ |
114 | void __remove_from_page_cache(struct page *page) | 114 | void __remove_from_page_cache(struct page *page) |
115 | { | 115 | { |
@@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page) | |||
141 | 141 | ||
142 | BUG_ON(!PageLocked(page)); | 142 | BUG_ON(!PageLocked(page)); |
143 | 143 | ||
144 | write_lock_irq(&mapping->tree_lock); | 144 | spin_lock_irq(&mapping->tree_lock); |
145 | __remove_from_page_cache(page); | 145 | __remove_from_page_cache(page); |
146 | write_unlock_irq(&mapping->tree_lock); | 146 | spin_unlock_irq(&mapping->tree_lock); |
147 | } | 147 | } |
148 | 148 | ||
149 | static int sync_page(void *word) | 149 | static int sync_page(void *word) |
@@ -442,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
442 | } | 442 | } |
443 | 443 | ||
444 | /** | 444 | /** |
445 | * add_to_page_cache - add newly allocated pagecache pages | 445 | * add_to_page_cache_locked - add a locked page to the pagecache |
446 | * @page: page to add | 446 | * @page: page to add |
447 | * @mapping: the page's address_space | 447 | * @mapping: the page's address_space |
448 | * @offset: page index | 448 | * @offset: page index |
449 | * @gfp_mask: page allocation mode | 449 | * @gfp_mask: page allocation mode |
450 | * | 450 | * |
451 | * This function is used to add newly allocated pagecache pages; | 451 | * This function is used to add a page to the pagecache. It must be locked. |
452 | * the page is new, so we can just run SetPageLocked() against it. | ||
453 | * The other page state flags were set by rmqueue(). | ||
454 | * | ||
455 | * This function does not add the page to the LRU. The caller must do that. | 452 | * This function does not add the page to the LRU. The caller must do that. |
456 | */ | 453 | */ |
457 | int add_to_page_cache(struct page *page, struct address_space *mapping, | 454 | int add_to_page_cache_locked(struct page *page, struct address_space *mapping, |
458 | pgoff_t offset, gfp_t gfp_mask) | 455 | pgoff_t offset, gfp_t gfp_mask) |
459 | { | 456 | { |
460 | int error = mem_cgroup_cache_charge(page, current->mm, | 457 | int error; |
458 | |||
459 | VM_BUG_ON(!PageLocked(page)); | ||
460 | |||
461 | error = mem_cgroup_cache_charge(page, current->mm, | ||
461 | gfp_mask & ~__GFP_HIGHMEM); | 462 | gfp_mask & ~__GFP_HIGHMEM); |
462 | if (error) | 463 | if (error) |
463 | goto out; | 464 | goto out; |
464 | 465 | ||
465 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 466 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); |
466 | if (error == 0) { | 467 | if (error == 0) { |
467 | write_lock_irq(&mapping->tree_lock); | 468 | page_cache_get(page); |
469 | page->mapping = mapping; | ||
470 | page->index = offset; | ||
471 | |||
472 | spin_lock_irq(&mapping->tree_lock); | ||
468 | error = radix_tree_insert(&mapping->page_tree, offset, page); | 473 | error = radix_tree_insert(&mapping->page_tree, offset, page); |
469 | if (!error) { | 474 | if (likely(!error)) { |
470 | page_cache_get(page); | ||
471 | SetPageLocked(page); | ||
472 | page->mapping = mapping; | ||
473 | page->index = offset; | ||
474 | mapping->nrpages++; | 475 | mapping->nrpages++; |
475 | __inc_zone_page_state(page, NR_FILE_PAGES); | 476 | __inc_zone_page_state(page, NR_FILE_PAGES); |
476 | } else | 477 | } else { |
478 | page->mapping = NULL; | ||
477 | mem_cgroup_uncharge_cache_page(page); | 479 | mem_cgroup_uncharge_cache_page(page); |
480 | page_cache_release(page); | ||
481 | } | ||
478 | 482 | ||
479 | write_unlock_irq(&mapping->tree_lock); | 483 | spin_unlock_irq(&mapping->tree_lock); |
480 | radix_tree_preload_end(); | 484 | radix_tree_preload_end(); |
481 | } else | 485 | } else |
482 | mem_cgroup_uncharge_cache_page(page); | 486 | mem_cgroup_uncharge_cache_page(page); |
483 | out: | 487 | out: |
484 | return error; | 488 | return error; |
485 | } | 489 | } |
486 | EXPORT_SYMBOL(add_to_page_cache); | 490 | EXPORT_SYMBOL(add_to_page_cache_locked); |
487 | 491 | ||
488 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 492 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
489 | pgoff_t offset, gfp_t gfp_mask) | 493 | pgoff_t offset, gfp_t gfp_mask) |
@@ -633,15 +637,35 @@ void __lock_page_nosync(struct page *page) | |||
633 | * Is there a pagecache struct page at the given (mapping, offset) tuple? | 637 | * Is there a pagecache struct page at the given (mapping, offset) tuple? |
634 | * If yes, increment its refcount and return it; if no, return NULL. | 638 | * If yes, increment its refcount and return it; if no, return NULL. |
635 | */ | 639 | */ |
636 | struct page * find_get_page(struct address_space *mapping, pgoff_t offset) | 640 | struct page *find_get_page(struct address_space *mapping, pgoff_t offset) |
637 | { | 641 | { |
642 | void **pagep; | ||
638 | struct page *page; | 643 | struct page *page; |
639 | 644 | ||
640 | read_lock_irq(&mapping->tree_lock); | 645 | rcu_read_lock(); |
641 | page = radix_tree_lookup(&mapping->page_tree, offset); | 646 | repeat: |
642 | if (page) | 647 | page = NULL; |
643 | page_cache_get(page); | 648 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); |
644 | read_unlock_irq(&mapping->tree_lock); | 649 | if (pagep) { |
650 | page = radix_tree_deref_slot(pagep); | ||
651 | if (unlikely(!page || page == RADIX_TREE_RETRY)) | ||
652 | goto repeat; | ||
653 | |||
654 | if (!page_cache_get_speculative(page)) | ||
655 | goto repeat; | ||
656 | |||
657 | /* | ||
658 | * Has the page moved? | ||
659 | * This is part of the lockless pagecache protocol. See | ||
660 | * include/linux/pagemap.h for details. | ||
661 | */ | ||
662 | if (unlikely(page != *pagep)) { | ||
663 | page_cache_release(page); | ||
664 | goto repeat; | ||
665 | } | ||
666 | } | ||
667 | rcu_read_unlock(); | ||
668 | |||
645 | return page; | 669 | return page; |
646 | } | 670 | } |
647 | EXPORT_SYMBOL(find_get_page); | 671 | EXPORT_SYMBOL(find_get_page); |
@@ -656,32 +680,22 @@ EXPORT_SYMBOL(find_get_page); | |||
656 | * | 680 | * |
657 | * Returns zero if the page was not present. find_lock_page() may sleep. | 681 | * Returns zero if the page was not present. find_lock_page() may sleep. |
658 | */ | 682 | */ |
659 | struct page *find_lock_page(struct address_space *mapping, | 683 | struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) |
660 | pgoff_t offset) | ||
661 | { | 684 | { |
662 | struct page *page; | 685 | struct page *page; |
663 | 686 | ||
664 | repeat: | 687 | repeat: |
665 | read_lock_irq(&mapping->tree_lock); | 688 | page = find_get_page(mapping, offset); |
666 | page = radix_tree_lookup(&mapping->page_tree, offset); | ||
667 | if (page) { | 689 | if (page) { |
668 | page_cache_get(page); | 690 | lock_page(page); |
669 | if (TestSetPageLocked(page)) { | 691 | /* Has the page been truncated? */ |
670 | read_unlock_irq(&mapping->tree_lock); | 692 | if (unlikely(page->mapping != mapping)) { |
671 | __lock_page(page); | 693 | unlock_page(page); |
672 | 694 | page_cache_release(page); | |
673 | /* Has the page been truncated while we slept? */ | 695 | goto repeat; |
674 | if (unlikely(page->mapping != mapping)) { | ||
675 | unlock_page(page); | ||
676 | page_cache_release(page); | ||
677 | goto repeat; | ||
678 | } | ||
679 | VM_BUG_ON(page->index != offset); | ||
680 | goto out; | ||
681 | } | 696 | } |
697 | VM_BUG_ON(page->index != offset); | ||
682 | } | 698 | } |
683 | read_unlock_irq(&mapping->tree_lock); | ||
684 | out: | ||
685 | return page; | 699 | return page; |
686 | } | 700 | } |
687 | EXPORT_SYMBOL(find_lock_page); | 701 | EXPORT_SYMBOL(find_lock_page); |
@@ -747,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
747 | { | 761 | { |
748 | unsigned int i; | 762 | unsigned int i; |
749 | unsigned int ret; | 763 | unsigned int ret; |
764 | unsigned int nr_found; | ||
765 | |||
766 | rcu_read_lock(); | ||
767 | restart: | ||
768 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
769 | (void ***)pages, start, nr_pages); | ||
770 | ret = 0; | ||
771 | for (i = 0; i < nr_found; i++) { | ||
772 | struct page *page; | ||
773 | repeat: | ||
774 | page = radix_tree_deref_slot((void **)pages[i]); | ||
775 | if (unlikely(!page)) | ||
776 | continue; | ||
777 | /* | ||
778 | * this can only trigger if nr_found == 1, making livelock | ||
779 | * a non issue. | ||
780 | */ | ||
781 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
782 | goto restart; | ||
750 | 783 | ||
751 | read_lock_irq(&mapping->tree_lock); | 784 | if (!page_cache_get_speculative(page)) |
752 | ret = radix_tree_gang_lookup(&mapping->page_tree, | 785 | goto repeat; |
753 | (void **)pages, start, nr_pages); | 786 | |
754 | for (i = 0; i < ret; i++) | 787 | /* Has the page moved? */ |
755 | page_cache_get(pages[i]); | 788 | if (unlikely(page != *((void **)pages[i]))) { |
756 | read_unlock_irq(&mapping->tree_lock); | 789 | page_cache_release(page); |
790 | goto repeat; | ||
791 | } | ||
792 | |||
793 | pages[ret] = page; | ||
794 | ret++; | ||
795 | } | ||
796 | rcu_read_unlock(); | ||
757 | return ret; | 797 | return ret; |
758 | } | 798 | } |
759 | 799 | ||
@@ -774,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
774 | { | 814 | { |
775 | unsigned int i; | 815 | unsigned int i; |
776 | unsigned int ret; | 816 | unsigned int ret; |
817 | unsigned int nr_found; | ||
818 | |||
819 | rcu_read_lock(); | ||
820 | restart: | ||
821 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
822 | (void ***)pages, index, nr_pages); | ||
823 | ret = 0; | ||
824 | for (i = 0; i < nr_found; i++) { | ||
825 | struct page *page; | ||
826 | repeat: | ||
827 | page = radix_tree_deref_slot((void **)pages[i]); | ||
828 | if (unlikely(!page)) | ||
829 | continue; | ||
830 | /* | ||
831 | * this can only trigger if nr_found == 1, making livelock | ||
832 | * a non issue. | ||
833 | */ | ||
834 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
835 | goto restart; | ||
777 | 836 | ||
778 | read_lock_irq(&mapping->tree_lock); | 837 | if (page->mapping == NULL || page->index != index) |
779 | ret = radix_tree_gang_lookup(&mapping->page_tree, | ||
780 | (void **)pages, index, nr_pages); | ||
781 | for (i = 0; i < ret; i++) { | ||
782 | if (pages[i]->mapping == NULL || pages[i]->index != index) | ||
783 | break; | 838 | break; |
784 | 839 | ||
785 | page_cache_get(pages[i]); | 840 | if (!page_cache_get_speculative(page)) |
841 | goto repeat; | ||
842 | |||
843 | /* Has the page moved? */ | ||
844 | if (unlikely(page != *((void **)pages[i]))) { | ||
845 | page_cache_release(page); | ||
846 | goto repeat; | ||
847 | } | ||
848 | |||
849 | pages[ret] = page; | ||
850 | ret++; | ||
786 | index++; | 851 | index++; |
787 | } | 852 | } |
788 | read_unlock_irq(&mapping->tree_lock); | 853 | rcu_read_unlock(); |
789 | return i; | 854 | return ret; |
790 | } | 855 | } |
791 | EXPORT_SYMBOL(find_get_pages_contig); | 856 | EXPORT_SYMBOL(find_get_pages_contig); |
792 | 857 | ||
@@ -806,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
806 | { | 871 | { |
807 | unsigned int i; | 872 | unsigned int i; |
808 | unsigned int ret; | 873 | unsigned int ret; |
874 | unsigned int nr_found; | ||
875 | |||
876 | rcu_read_lock(); | ||
877 | restart: | ||
878 | nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, | ||
879 | (void ***)pages, *index, nr_pages, tag); | ||
880 | ret = 0; | ||
881 | for (i = 0; i < nr_found; i++) { | ||
882 | struct page *page; | ||
883 | repeat: | ||
884 | page = radix_tree_deref_slot((void **)pages[i]); | ||
885 | if (unlikely(!page)) | ||
886 | continue; | ||
887 | /* | ||
888 | * this can only trigger if nr_found == 1, making livelock | ||
889 | * a non issue. | ||
890 | */ | ||
891 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
892 | goto restart; | ||
893 | |||
894 | if (!page_cache_get_speculative(page)) | ||
895 | goto repeat; | ||
896 | |||
897 | /* Has the page moved? */ | ||
898 | if (unlikely(page != *((void **)pages[i]))) { | ||
899 | page_cache_release(page); | ||
900 | goto repeat; | ||
901 | } | ||
902 | |||
903 | pages[ret] = page; | ||
904 | ret++; | ||
905 | } | ||
906 | rcu_read_unlock(); | ||
809 | 907 | ||
810 | read_lock_irq(&mapping->tree_lock); | ||
811 | ret = radix_tree_gang_lookup_tag(&mapping->page_tree, | ||
812 | (void **)pages, *index, nr_pages, tag); | ||
813 | for (i = 0; i < ret; i++) | ||
814 | page_cache_get(pages[i]); | ||
815 | if (ret) | 908 | if (ret) |
816 | *index = pages[ret - 1]->index + 1; | 909 | *index = pages[ret - 1]->index + 1; |
817 | read_unlock_irq(&mapping->tree_lock); | 910 | |
818 | return ret; | 911 | return ret; |
819 | } | 912 | } |
820 | EXPORT_SYMBOL(find_get_pages_tag); | 913 | EXPORT_SYMBOL(find_get_pages_tag); |
@@ -1665,8 +1758,9 @@ static int __remove_suid(struct dentry *dentry, int kill) | |||
1665 | return notify_change(dentry, &newattrs); | 1758 | return notify_change(dentry, &newattrs); |
1666 | } | 1759 | } |
1667 | 1760 | ||
1668 | int remove_suid(struct dentry *dentry) | 1761 | int file_remove_suid(struct file *file) |
1669 | { | 1762 | { |
1763 | struct dentry *dentry = file->f_path.dentry; | ||
1670 | int killsuid = should_remove_suid(dentry); | 1764 | int killsuid = should_remove_suid(dentry); |
1671 | int killpriv = security_inode_need_killpriv(dentry); | 1765 | int killpriv = security_inode_need_killpriv(dentry); |
1672 | int error = 0; | 1766 | int error = 0; |
@@ -1680,7 +1774,7 @@ int remove_suid(struct dentry *dentry) | |||
1680 | 1774 | ||
1681 | return error; | 1775 | return error; |
1682 | } | 1776 | } |
1683 | EXPORT_SYMBOL(remove_suid); | 1777 | EXPORT_SYMBOL(file_remove_suid); |
1684 | 1778 | ||
1685 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, | 1779 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, |
1686 | const struct iovec *iov, size_t base, size_t bytes) | 1780 | const struct iovec *iov, size_t base, size_t bytes) |
@@ -2436,7 +2530,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
2436 | if (count == 0) | 2530 | if (count == 0) |
2437 | goto out; | 2531 | goto out; |
2438 | 2532 | ||
2439 | err = remove_suid(file->f_path.dentry); | 2533 | err = file_remove_suid(file); |
2440 | if (err) | 2534 | if (err) |
2441 | goto out; | 2535 | goto out; |
2442 | 2536 | ||
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 3e744abcce9d..98a3f31ccd6a 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -380,7 +380,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, | |||
380 | if (count == 0) | 380 | if (count == 0) |
381 | goto out_backing; | 381 | goto out_backing; |
382 | 382 | ||
383 | ret = remove_suid(filp->f_path.dentry); | 383 | ret = file_remove_suid(filp); |
384 | if (ret) | 384 | if (ret) |
385 | goto out_backing; | 385 | goto out_backing; |
386 | 386 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a8bf4ab01f86..3be79dc18c5c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -1026,18 +1026,6 @@ static void __init report_hugepages(void) | |||
1026 | } | 1026 | } |
1027 | } | 1027 | } |
1028 | 1028 | ||
1029 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
1030 | { | ||
1031 | int node; | ||
1032 | unsigned int nr = 0; | ||
1033 | |||
1034 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
1035 | nr += array[node]; | ||
1036 | |||
1037 | return nr; | ||
1038 | } | ||
1039 | |||
1040 | #ifdef CONFIG_SYSCTL | ||
1041 | #ifdef CONFIG_HIGHMEM | 1029 | #ifdef CONFIG_HIGHMEM |
1042 | static void try_to_free_low(struct hstate *h, unsigned long count) | 1030 | static void try_to_free_low(struct hstate *h, unsigned long count) |
1043 | { | 1031 | { |
@@ -1386,6 +1374,18 @@ static int __init hugetlb_default_setup(char *s) | |||
1386 | } | 1374 | } |
1387 | __setup("default_hugepagesz=", hugetlb_default_setup); | 1375 | __setup("default_hugepagesz=", hugetlb_default_setup); |
1388 | 1376 | ||
1377 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
1378 | { | ||
1379 | int node; | ||
1380 | unsigned int nr = 0; | ||
1381 | |||
1382 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
1383 | nr += array[node]; | ||
1384 | |||
1385 | return nr; | ||
1386 | } | ||
1387 | |||
1388 | #ifdef CONFIG_SYSCTL | ||
1389 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | 1389 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, |
1390 | struct file *file, void __user *buffer, | 1390 | struct file *file, void __user *buffer, |
1391 | size_t *length, loff_t *ppos) | 1391 | size_t *length, loff_t *ppos) |
diff --git a/mm/memory.c b/mm/memory.c index 262e3eb6601a..a8ca04faaea6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -374,7 +374,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
374 | * | 374 | * |
375 | * The calling function must still handle the error. | 375 | * The calling function must still handle the error. |
376 | */ | 376 | */ |
377 | void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | 377 | static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, |
378 | unsigned long vaddr) | ||
378 | { | 379 | { |
379 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " | 380 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " |
380 | "vm_flags = %lx, vaddr = %lx\n", | 381 | "vm_flags = %lx, vaddr = %lx\n", |
diff --git a/mm/migrate.c b/mm/migrate.c index d8c65a65c61d..153572fb60b8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |||
285 | 285 | ||
286 | page = migration_entry_to_page(entry); | 286 | page = migration_entry_to_page(entry); |
287 | 287 | ||
288 | get_page(page); | 288 | /* |
289 | * Once radix-tree replacement of page migration started, page_count | ||
290 | * *must* be zero. And, we don't want to call wait_on_page_locked() | ||
291 | * against a page without get_page(). | ||
292 | * So, we use get_page_unless_zero(), here. Even failed, page fault | ||
293 | * will occur again. | ||
294 | */ | ||
295 | if (!get_page_unless_zero(page)) | ||
296 | goto out; | ||
289 | pte_unmap_unlock(ptep, ptl); | 297 | pte_unmap_unlock(ptep, ptl); |
290 | wait_on_page_locked(page); | 298 | wait_on_page_locked(page); |
291 | put_page(page); | 299 | put_page(page); |
@@ -305,6 +313,7 @@ out: | |||
305 | static int migrate_page_move_mapping(struct address_space *mapping, | 313 | static int migrate_page_move_mapping(struct address_space *mapping, |
306 | struct page *newpage, struct page *page) | 314 | struct page *newpage, struct page *page) |
307 | { | 315 | { |
316 | int expected_count; | ||
308 | void **pslot; | 317 | void **pslot; |
309 | 318 | ||
310 | if (!mapping) { | 319 | if (!mapping) { |
@@ -314,14 +323,20 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
314 | return 0; | 323 | return 0; |
315 | } | 324 | } |
316 | 325 | ||
317 | write_lock_irq(&mapping->tree_lock); | 326 | spin_lock_irq(&mapping->tree_lock); |
318 | 327 | ||
319 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 328 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
320 | page_index(page)); | 329 | page_index(page)); |
321 | 330 | ||
322 | if (page_count(page) != 2 + !!PagePrivate(page) || | 331 | expected_count = 2 + !!PagePrivate(page); |
332 | if (page_count(page) != expected_count || | ||
323 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 333 | (struct page *)radix_tree_deref_slot(pslot) != page) { |
324 | write_unlock_irq(&mapping->tree_lock); | 334 | spin_unlock_irq(&mapping->tree_lock); |
335 | return -EAGAIN; | ||
336 | } | ||
337 | |||
338 | if (!page_freeze_refs(page, expected_count)) { | ||
339 | spin_unlock_irq(&mapping->tree_lock); | ||
325 | return -EAGAIN; | 340 | return -EAGAIN; |
326 | } | 341 | } |
327 | 342 | ||
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
338 | 353 | ||
339 | radix_tree_replace_slot(pslot, newpage); | 354 | radix_tree_replace_slot(pslot, newpage); |
340 | 355 | ||
356 | page_unfreeze_refs(page, expected_count); | ||
341 | /* | 357 | /* |
342 | * Drop cache reference from old page. | 358 | * Drop cache reference from old page. |
343 | * We know this isn't the last reference. | 359 | * We know this isn't the last reference. |
@@ -357,10 +373,9 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
357 | __dec_zone_page_state(page, NR_FILE_PAGES); | 373 | __dec_zone_page_state(page, NR_FILE_PAGES); |
358 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | 374 | __inc_zone_page_state(newpage, NR_FILE_PAGES); |
359 | 375 | ||
360 | write_unlock_irq(&mapping->tree_lock); | 376 | spin_unlock_irq(&mapping->tree_lock); |
361 | if (!PageSwapCache(newpage)) { | 377 | if (!PageSwapCache(newpage)) |
362 | mem_cgroup_uncharge_cache_page(page); | 378 | mem_cgroup_uncharge_cache_page(page); |
363 | } | ||
364 | 379 | ||
365 | return 0; | 380 | return 0; |
366 | } | 381 | } |
diff --git a/mm/nommu.c b/mm/nommu.c index 4462b6a3fcb9..5edccd9c9218 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/ptrace.h> | 25 | #include <linux/tracehook.h> |
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
@@ -745,7 +745,7 @@ static unsigned long determine_vm_flags(struct file *file, | |||
745 | * it's being traced - otherwise breakpoints set in it may interfere | 745 | * it's being traced - otherwise breakpoints set in it may interfere |
746 | * with another untraced process | 746 | * with another untraced process |
747 | */ | 747 | */ |
748 | if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED)) | 748 | if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current)) |
749 | vm_flags &= ~VM_MAYSHARE; | 749 | vm_flags &= ~VM_MAYSHARE; |
750 | 750 | ||
751 | return vm_flags; | 751 | return vm_flags; |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 94c6d8988ab3..24de8b65fdbd 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -1088,7 +1088,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
1088 | if (!mapping) | 1088 | if (!mapping) |
1089 | return 1; | 1089 | return 1; |
1090 | 1090 | ||
1091 | write_lock_irq(&mapping->tree_lock); | 1091 | spin_lock_irq(&mapping->tree_lock); |
1092 | mapping2 = page_mapping(page); | 1092 | mapping2 = page_mapping(page); |
1093 | if (mapping2) { /* Race with truncate? */ | 1093 | if (mapping2) { /* Race with truncate? */ |
1094 | BUG_ON(mapping2 != mapping); | 1094 | BUG_ON(mapping2 != mapping); |
@@ -1102,7 +1102,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
1102 | radix_tree_tag_set(&mapping->page_tree, | 1102 | radix_tree_tag_set(&mapping->page_tree, |
1103 | page_index(page), PAGECACHE_TAG_DIRTY); | 1103 | page_index(page), PAGECACHE_TAG_DIRTY); |
1104 | } | 1104 | } |
1105 | write_unlock_irq(&mapping->tree_lock); | 1105 | spin_unlock_irq(&mapping->tree_lock); |
1106 | if (mapping->host) { | 1106 | if (mapping->host) { |
1107 | /* !PageAnon && !swapper_space */ | 1107 | /* !PageAnon && !swapper_space */ |
1108 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 1108 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
@@ -1258,7 +1258,7 @@ int test_clear_page_writeback(struct page *page) | |||
1258 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 1258 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
1259 | unsigned long flags; | 1259 | unsigned long flags; |
1260 | 1260 | ||
1261 | write_lock_irqsave(&mapping->tree_lock, flags); | 1261 | spin_lock_irqsave(&mapping->tree_lock, flags); |
1262 | ret = TestClearPageWriteback(page); | 1262 | ret = TestClearPageWriteback(page); |
1263 | if (ret) { | 1263 | if (ret) { |
1264 | radix_tree_tag_clear(&mapping->page_tree, | 1264 | radix_tree_tag_clear(&mapping->page_tree, |
@@ -1269,7 +1269,7 @@ int test_clear_page_writeback(struct page *page) | |||
1269 | __bdi_writeout_inc(bdi); | 1269 | __bdi_writeout_inc(bdi); |
1270 | } | 1270 | } |
1271 | } | 1271 | } |
1272 | write_unlock_irqrestore(&mapping->tree_lock, flags); | 1272 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
1273 | } else { | 1273 | } else { |
1274 | ret = TestClearPageWriteback(page); | 1274 | ret = TestClearPageWriteback(page); |
1275 | } | 1275 | } |
@@ -1287,7 +1287,7 @@ int test_set_page_writeback(struct page *page) | |||
1287 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 1287 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
1288 | unsigned long flags; | 1288 | unsigned long flags; |
1289 | 1289 | ||
1290 | write_lock_irqsave(&mapping->tree_lock, flags); | 1290 | spin_lock_irqsave(&mapping->tree_lock, flags); |
1291 | ret = TestSetPageWriteback(page); | 1291 | ret = TestSetPageWriteback(page); |
1292 | if (!ret) { | 1292 | if (!ret) { |
1293 | radix_tree_tag_set(&mapping->page_tree, | 1293 | radix_tree_tag_set(&mapping->page_tree, |
@@ -1300,7 +1300,7 @@ int test_set_page_writeback(struct page *page) | |||
1300 | radix_tree_tag_clear(&mapping->page_tree, | 1300 | radix_tree_tag_clear(&mapping->page_tree, |
1301 | page_index(page), | 1301 | page_index(page), |
1302 | PAGECACHE_TAG_DIRTY); | 1302 | PAGECACHE_TAG_DIRTY); |
1303 | write_unlock_irqrestore(&mapping->tree_lock, flags); | 1303 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
1304 | } else { | 1304 | } else { |
1305 | ret = TestSetPageWriteback(page); | 1305 | ret = TestSetPageWriteback(page); |
1306 | } | 1306 | } |
diff --git a/mm/readahead.c b/mm/readahead.c index d8723a5f6496..77e8ddf945e9 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -382,9 +382,9 @@ ondemand_readahead(struct address_space *mapping, | |||
382 | if (hit_readahead_marker) { | 382 | if (hit_readahead_marker) { |
383 | pgoff_t start; | 383 | pgoff_t start; |
384 | 384 | ||
385 | read_lock_irq(&mapping->tree_lock); | 385 | rcu_read_lock(); |
386 | start = radix_tree_next_hole(&mapping->page_tree, offset, max+1); | 386 | start = radix_tree_next_hole(&mapping->page_tree, offset,max+1); |
387 | read_unlock_irq(&mapping->tree_lock); | 387 | rcu_read_unlock(); |
388 | 388 | ||
389 | if (!start || start - offset > max) | 389 | if (!start || start - offset > max) |
390 | return 0; | 390 | return 0; |
@@ -138,7 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
138 | anon_vma_free(anon_vma); | 138 | anon_vma_free(anon_vma); |
139 | } | 139 | } |
140 | 140 | ||
141 | static void anon_vma_ctor(struct kmem_cache *cachep, void *data) | 141 | static void anon_vma_ctor(void *data) |
142 | { | 142 | { |
143 | struct anon_vma *anon_vma = data; | 143 | struct anon_vma *anon_vma = data; |
144 | 144 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index f92fea94d037..952d361774bb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -936,7 +936,7 @@ found: | |||
936 | spin_lock(&info->lock); | 936 | spin_lock(&info->lock); |
937 | ptr = shmem_swp_entry(info, idx, NULL); | 937 | ptr = shmem_swp_entry(info, idx, NULL); |
938 | if (ptr && ptr->val == entry.val) { | 938 | if (ptr && ptr->val == entry.val) { |
939 | error = add_to_page_cache(page, inode->i_mapping, | 939 | error = add_to_page_cache_locked(page, inode->i_mapping, |
940 | idx, GFP_NOWAIT); | 940 | idx, GFP_NOWAIT); |
941 | /* does mem_cgroup_uncharge_cache_page on error */ | 941 | /* does mem_cgroup_uncharge_cache_page on error */ |
942 | } else /* we must compensate for our precharge above */ | 942 | } else /* we must compensate for our precharge above */ |
@@ -1301,8 +1301,8 @@ repeat: | |||
1301 | SetPageUptodate(filepage); | 1301 | SetPageUptodate(filepage); |
1302 | set_page_dirty(filepage); | 1302 | set_page_dirty(filepage); |
1303 | swap_free(swap); | 1303 | swap_free(swap); |
1304 | } else if (!(error = add_to_page_cache( | 1304 | } else if (!(error = add_to_page_cache_locked(swappage, mapping, |
1305 | swappage, mapping, idx, GFP_NOWAIT))) { | 1305 | idx, GFP_NOWAIT))) { |
1306 | info->flags |= SHMEM_PAGEIN; | 1306 | info->flags |= SHMEM_PAGEIN; |
1307 | shmem_swp_set(info, entry, 0); | 1307 | shmem_swp_set(info, entry, 0); |
1308 | shmem_swp_unmap(entry); | 1308 | shmem_swp_unmap(entry); |
@@ -2352,7 +2352,7 @@ static void shmem_destroy_inode(struct inode *inode) | |||
2352 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | 2352 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); |
2353 | } | 2353 | } |
2354 | 2354 | ||
2355 | static void init_once(struct kmem_cache *cachep, void *foo) | 2355 | static void init_once(void *foo) |
2356 | { | 2356 | { |
2357 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; | 2357 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; |
2358 | 2358 | ||
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c index f5664c5b9eb1..8e5aadd7dcd6 100644 --- a/mm/shmem_acl.c +++ b/mm/shmem_acl.c | |||
@@ -191,7 +191,7 @@ shmem_check_acl(struct inode *inode, int mask) | |||
191 | * shmem_permission - permission() inode operation | 191 | * shmem_permission - permission() inode operation |
192 | */ | 192 | */ |
193 | int | 193 | int |
194 | shmem_permission(struct inode *inode, int mask, struct nameidata *nd) | 194 | shmem_permission(struct inode *inode, int mask) |
195 | { | 195 | { |
196 | return generic_permission(inode, mask, shmem_check_acl); | 196 | return generic_permission(inode, mask, shmem_check_acl); |
197 | } | 197 | } |
@@ -406,7 +406,7 @@ struct kmem_cache { | |||
406 | unsigned int dflags; /* dynamic flags */ | 406 | unsigned int dflags; /* dynamic flags */ |
407 | 407 | ||
408 | /* constructor func */ | 408 | /* constructor func */ |
409 | void (*ctor)(struct kmem_cache *, void *); | 409 | void (*ctor)(void *obj); |
410 | 410 | ||
411 | /* 5) cache creation/removal */ | 411 | /* 5) cache creation/removal */ |
412 | const char *name; | 412 | const char *name; |
@@ -2137,8 +2137,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | |||
2137 | */ | 2137 | */ |
2138 | struct kmem_cache * | 2138 | struct kmem_cache * |
2139 | kmem_cache_create (const char *name, size_t size, size_t align, | 2139 | kmem_cache_create (const char *name, size_t size, size_t align, |
2140 | unsigned long flags, | 2140 | unsigned long flags, void (*ctor)(void *)) |
2141 | void (*ctor)(struct kmem_cache *, void *)) | ||
2142 | { | 2141 | { |
2143 | size_t left_over, slab_size, ralign; | 2142 | size_t left_over, slab_size, ralign; |
2144 | struct kmem_cache *cachep = NULL, *pc; | 2143 | struct kmem_cache *cachep = NULL, *pc; |
@@ -2653,7 +2652,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2653 | * They must also be threaded. | 2652 | * They must also be threaded. |
2654 | */ | 2653 | */ |
2655 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2654 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
2656 | cachep->ctor(cachep, objp + obj_offset(cachep)); | 2655 | cachep->ctor(objp + obj_offset(cachep)); |
2657 | 2656 | ||
2658 | if (cachep->flags & SLAB_RED_ZONE) { | 2657 | if (cachep->flags & SLAB_RED_ZONE) { |
2659 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) | 2658 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) |
@@ -2669,7 +2668,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2669 | cachep->buffer_size / PAGE_SIZE, 0); | 2668 | cachep->buffer_size / PAGE_SIZE, 0); |
2670 | #else | 2669 | #else |
2671 | if (cachep->ctor) | 2670 | if (cachep->ctor) |
2672 | cachep->ctor(cachep, objp); | 2671 | cachep->ctor(objp); |
2673 | #endif | 2672 | #endif |
2674 | slab_bufctl(slabp)[i] = i + 1; | 2673 | slab_bufctl(slabp)[i] = i + 1; |
2675 | } | 2674 | } |
@@ -3093,7 +3092,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
3093 | #endif | 3092 | #endif |
3094 | objp += obj_offset(cachep); | 3093 | objp += obj_offset(cachep); |
3095 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 3094 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
3096 | cachep->ctor(cachep, objp); | 3095 | cachep->ctor(objp); |
3097 | #if ARCH_SLAB_MINALIGN | 3096 | #if ARCH_SLAB_MINALIGN |
3098 | if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { | 3097 | if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { |
3099 | printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", | 3098 | printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", |
@@ -525,12 +525,11 @@ struct kmem_cache { | |||
525 | unsigned int size, align; | 525 | unsigned int size, align; |
526 | unsigned long flags; | 526 | unsigned long flags; |
527 | const char *name; | 527 | const char *name; |
528 | void (*ctor)(struct kmem_cache *, void *); | 528 | void (*ctor)(void *); |
529 | }; | 529 | }; |
530 | 530 | ||
531 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | 531 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, |
532 | size_t align, unsigned long flags, | 532 | size_t align, unsigned long flags, void (*ctor)(void *)) |
533 | void (*ctor)(struct kmem_cache *, void *)) | ||
534 | { | 533 | { |
535 | struct kmem_cache *c; | 534 | struct kmem_cache *c; |
536 | 535 | ||
@@ -575,7 +574,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
575 | b = slob_new_page(flags, get_order(c->size), node); | 574 | b = slob_new_page(flags, get_order(c->size), node); |
576 | 575 | ||
577 | if (c->ctor) | 576 | if (c->ctor) |
578 | c->ctor(c, b); | 577 | c->ctor(b); |
579 | 578 | ||
580 | return b; | 579 | return b; |
581 | } | 580 | } |
@@ -1012,7 +1012,7 @@ __setup("slub_debug", setup_slub_debug); | |||
1012 | 1012 | ||
1013 | static unsigned long kmem_cache_flags(unsigned long objsize, | 1013 | static unsigned long kmem_cache_flags(unsigned long objsize, |
1014 | unsigned long flags, const char *name, | 1014 | unsigned long flags, const char *name, |
1015 | void (*ctor)(struct kmem_cache *, void *)) | 1015 | void (*ctor)(void *)) |
1016 | { | 1016 | { |
1017 | /* | 1017 | /* |
1018 | * Enable debugging if selected on the kernel commandline. | 1018 | * Enable debugging if selected on the kernel commandline. |
@@ -1040,7 +1040,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, | |||
1040 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} | 1040 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} |
1041 | static inline unsigned long kmem_cache_flags(unsigned long objsize, | 1041 | static inline unsigned long kmem_cache_flags(unsigned long objsize, |
1042 | unsigned long flags, const char *name, | 1042 | unsigned long flags, const char *name, |
1043 | void (*ctor)(struct kmem_cache *, void *)) | 1043 | void (*ctor)(void *)) |
1044 | { | 1044 | { |
1045 | return flags; | 1045 | return flags; |
1046 | } | 1046 | } |
@@ -1103,7 +1103,7 @@ static void setup_object(struct kmem_cache *s, struct page *page, | |||
1103 | { | 1103 | { |
1104 | setup_object_debug(s, page, object); | 1104 | setup_object_debug(s, page, object); |
1105 | if (unlikely(s->ctor)) | 1105 | if (unlikely(s->ctor)) |
1106 | s->ctor(s, object); | 1106 | s->ctor(object); |
1107 | } | 1107 | } |
1108 | 1108 | ||
1109 | static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | 1109 | static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) |
@@ -2286,7 +2286,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2286 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | 2286 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, |
2287 | const char *name, size_t size, | 2287 | const char *name, size_t size, |
2288 | size_t align, unsigned long flags, | 2288 | size_t align, unsigned long flags, |
2289 | void (*ctor)(struct kmem_cache *, void *)) | 2289 | void (*ctor)(void *)) |
2290 | { | 2290 | { |
2291 | memset(s, 0, kmem_size); | 2291 | memset(s, 0, kmem_size); |
2292 | s->name = name; | 2292 | s->name = name; |
@@ -3042,7 +3042,7 @@ static int slab_unmergeable(struct kmem_cache *s) | |||
3042 | 3042 | ||
3043 | static struct kmem_cache *find_mergeable(size_t size, | 3043 | static struct kmem_cache *find_mergeable(size_t size, |
3044 | size_t align, unsigned long flags, const char *name, | 3044 | size_t align, unsigned long flags, const char *name, |
3045 | void (*ctor)(struct kmem_cache *, void *)) | 3045 | void (*ctor)(void *)) |
3046 | { | 3046 | { |
3047 | struct kmem_cache *s; | 3047 | struct kmem_cache *s; |
3048 | 3048 | ||
@@ -3082,8 +3082,7 @@ static struct kmem_cache *find_mergeable(size_t size, | |||
3082 | } | 3082 | } |
3083 | 3083 | ||
3084 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | 3084 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, |
3085 | size_t align, unsigned long flags, | 3085 | size_t align, unsigned long flags, void (*ctor)(void *)) |
3086 | void (*ctor)(struct kmem_cache *, void *)) | ||
3087 | { | 3086 | { |
3088 | struct kmem_cache *s; | 3087 | struct kmem_cache *s; |
3089 | 3088 | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 8ffc08990008..5d9dbbb9d39e 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -377,7 +377,7 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) | |||
377 | } | 377 | } |
378 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 378 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
379 | 379 | ||
380 | struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | 380 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) |
381 | { | 381 | { |
382 | struct page *map; | 382 | struct page *map; |
383 | struct mem_section *ms = __nr_to_section(pnum); | 383 | struct mem_section *ms = __nr_to_section(pnum); |
diff --git a/mm/swap_state.c b/mm/swap_state.c index d8aadaf2a0ba..b8035b055129 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -39,7 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = { | |||
39 | 39 | ||
40 | struct address_space swapper_space = { | 40 | struct address_space swapper_space = { |
41 | .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), | 41 | .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), |
42 | .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock), | 42 | .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), |
43 | .a_ops = &swap_aops, | 43 | .a_ops = &swap_aops, |
44 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), | 44 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), |
45 | .backing_dev_info = &swap_backing_dev_info, | 45 | .backing_dev_info = &swap_backing_dev_info, |
@@ -56,7 +56,8 @@ static struct { | |||
56 | 56 | ||
57 | void show_swap_cache_info(void) | 57 | void show_swap_cache_info(void) |
58 | { | 58 | { |
59 | printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n", | 59 | printk("%lu pages in swap cache\n", total_swapcache_pages); |
60 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", | ||
60 | swap_cache_info.add_total, swap_cache_info.del_total, | 61 | swap_cache_info.add_total, swap_cache_info.del_total, |
61 | swap_cache_info.find_success, swap_cache_info.find_total); | 62 | swap_cache_info.find_success, swap_cache_info.find_total); |
62 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); | 63 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); |
@@ -64,7 +65,7 @@ void show_swap_cache_info(void) | |||
64 | } | 65 | } |
65 | 66 | ||
66 | /* | 67 | /* |
67 | * add_to_swap_cache resembles add_to_page_cache on swapper_space, | 68 | * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, |
68 | * but sets SwapCache flag and private instead of mapping and index. | 69 | * but sets SwapCache flag and private instead of mapping and index. |
69 | */ | 70 | */ |
70 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | 71 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) |
@@ -76,19 +77,26 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
76 | BUG_ON(PagePrivate(page)); | 77 | BUG_ON(PagePrivate(page)); |
77 | error = radix_tree_preload(gfp_mask); | 78 | error = radix_tree_preload(gfp_mask); |
78 | if (!error) { | 79 | if (!error) { |
79 | write_lock_irq(&swapper_space.tree_lock); | 80 | page_cache_get(page); |
81 | SetPageSwapCache(page); | ||
82 | set_page_private(page, entry.val); | ||
83 | |||
84 | spin_lock_irq(&swapper_space.tree_lock); | ||
80 | error = radix_tree_insert(&swapper_space.page_tree, | 85 | error = radix_tree_insert(&swapper_space.page_tree, |
81 | entry.val, page); | 86 | entry.val, page); |
82 | if (!error) { | 87 | if (likely(!error)) { |
83 | page_cache_get(page); | ||
84 | SetPageSwapCache(page); | ||
85 | set_page_private(page, entry.val); | ||
86 | total_swapcache_pages++; | 88 | total_swapcache_pages++; |
87 | __inc_zone_page_state(page, NR_FILE_PAGES); | 89 | __inc_zone_page_state(page, NR_FILE_PAGES); |
88 | INC_CACHE_INFO(add_total); | 90 | INC_CACHE_INFO(add_total); |
89 | } | 91 | } |
90 | write_unlock_irq(&swapper_space.tree_lock); | 92 | spin_unlock_irq(&swapper_space.tree_lock); |
91 | radix_tree_preload_end(); | 93 | radix_tree_preload_end(); |
94 | |||
95 | if (unlikely(error)) { | ||
96 | set_page_private(page, 0UL); | ||
97 | ClearPageSwapCache(page); | ||
98 | page_cache_release(page); | ||
99 | } | ||
92 | } | 100 | } |
93 | return error; | 101 | return error; |
94 | } | 102 | } |
@@ -175,9 +183,9 @@ void delete_from_swap_cache(struct page *page) | |||
175 | 183 | ||
176 | entry.val = page_private(page); | 184 | entry.val = page_private(page); |
177 | 185 | ||
178 | write_lock_irq(&swapper_space.tree_lock); | 186 | spin_lock_irq(&swapper_space.tree_lock); |
179 | __delete_from_swap_cache(page); | 187 | __delete_from_swap_cache(page); |
180 | write_unlock_irq(&swapper_space.tree_lock); | 188 | spin_unlock_irq(&swapper_space.tree_lock); |
181 | 189 | ||
182 | swap_free(entry); | 190 | swap_free(entry); |
183 | page_cache_release(page); | 191 | page_cache_release(page); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 2f33edb8bee9..6beb6251e99d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -33,8 +33,8 @@ | |||
33 | #include <asm/tlbflush.h> | 33 | #include <asm/tlbflush.h> |
34 | #include <linux/swapops.h> | 34 | #include <linux/swapops.h> |
35 | 35 | ||
36 | DEFINE_SPINLOCK(swap_lock); | 36 | static DEFINE_SPINLOCK(swap_lock); |
37 | unsigned int nr_swapfiles; | 37 | static unsigned int nr_swapfiles; |
38 | long total_swap_pages; | 38 | long total_swap_pages; |
39 | static int swap_overflow; | 39 | static int swap_overflow; |
40 | static int least_priority; | 40 | static int least_priority; |
@@ -44,7 +44,7 @@ static const char Unused_file[] = "Unused swap file entry "; | |||
44 | static const char Bad_offset[] = "Bad swap offset entry "; | 44 | static const char Bad_offset[] = "Bad swap offset entry "; |
45 | static const char Unused_offset[] = "Unused swap offset entry "; | 45 | static const char Unused_offset[] = "Unused swap offset entry "; |
46 | 46 | ||
47 | struct swap_list_t swap_list = {-1, -1}; | 47 | static struct swap_list_t swap_list = {-1, -1}; |
48 | 48 | ||
49 | static struct swap_info_struct swap_info[MAX_SWAPFILES]; | 49 | static struct swap_info_struct swap_info[MAX_SWAPFILES]; |
50 | 50 | ||
@@ -369,13 +369,13 @@ int remove_exclusive_swap_page(struct page *page) | |||
369 | retval = 0; | 369 | retval = 0; |
370 | if (p->swap_map[swp_offset(entry)] == 1) { | 370 | if (p->swap_map[swp_offset(entry)] == 1) { |
371 | /* Recheck the page count with the swapcache lock held.. */ | 371 | /* Recheck the page count with the swapcache lock held.. */ |
372 | write_lock_irq(&swapper_space.tree_lock); | 372 | spin_lock_irq(&swapper_space.tree_lock); |
373 | if ((page_count(page) == 2) && !PageWriteback(page)) { | 373 | if ((page_count(page) == 2) && !PageWriteback(page)) { |
374 | __delete_from_swap_cache(page); | 374 | __delete_from_swap_cache(page); |
375 | SetPageDirty(page); | 375 | SetPageDirty(page); |
376 | retval = 1; | 376 | retval = 1; |
377 | } | 377 | } |
378 | write_unlock_irq(&swapper_space.tree_lock); | 378 | spin_unlock_irq(&swapper_space.tree_lock); |
379 | } | 379 | } |
380 | spin_unlock(&swap_lock); | 380 | spin_unlock(&swap_lock); |
381 | 381 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index b8961cb63414..e68443d74567 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -349,18 +349,18 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
349 | if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) | 349 | if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) |
350 | return 0; | 350 | return 0; |
351 | 351 | ||
352 | write_lock_irq(&mapping->tree_lock); | 352 | spin_lock_irq(&mapping->tree_lock); |
353 | if (PageDirty(page)) | 353 | if (PageDirty(page)) |
354 | goto failed; | 354 | goto failed; |
355 | 355 | ||
356 | BUG_ON(PagePrivate(page)); | 356 | BUG_ON(PagePrivate(page)); |
357 | __remove_from_page_cache(page); | 357 | __remove_from_page_cache(page); |
358 | write_unlock_irq(&mapping->tree_lock); | 358 | spin_unlock_irq(&mapping->tree_lock); |
359 | ClearPageUptodate(page); | 359 | ClearPageUptodate(page); |
360 | page_cache_release(page); /* pagecache ref */ | 360 | page_cache_release(page); /* pagecache ref */ |
361 | return 1; | 361 | return 1; |
362 | failed: | 362 | failed: |
363 | write_unlock_irq(&mapping->tree_lock); | 363 | spin_unlock_irq(&mapping->tree_lock); |
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
@@ -1,7 +1,9 @@ | |||
1 | #include <linux/mm.h> | ||
1 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
2 | #include <linux/string.h> | 3 | #include <linux/string.h> |
3 | #include <linux/module.h> | 4 | #include <linux/module.h> |
4 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/sched.h> | ||
5 | #include <asm/uaccess.h> | 7 | #include <asm/uaccess.h> |
6 | 8 | ||
7 | /** | 9 | /** |
@@ -68,25 +70,22 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp) | |||
68 | EXPORT_SYMBOL(kmemdup); | 70 | EXPORT_SYMBOL(kmemdup); |
69 | 71 | ||
70 | /** | 72 | /** |
71 | * krealloc - reallocate memory. The contents will remain unchanged. | 73 | * __krealloc - like krealloc() but don't free @p. |
72 | * @p: object to reallocate memory for. | 74 | * @p: object to reallocate memory for. |
73 | * @new_size: how many bytes of memory are required. | 75 | * @new_size: how many bytes of memory are required. |
74 | * @flags: the type of memory to allocate. | 76 | * @flags: the type of memory to allocate. |
75 | * | 77 | * |
76 | * The contents of the object pointed to are preserved up to the | 78 | * This function is like krealloc() except it never frees the originally |
77 | * lesser of the new and old sizes. If @p is %NULL, krealloc() | 79 | * allocated buffer. Use this if you don't want to free the buffer immediately |
78 | * behaves exactly like kmalloc(). If @size is 0 and @p is not a | 80 | * like, for example, with RCU. |
79 | * %NULL pointer, the object pointed to is freed. | ||
80 | */ | 81 | */ |
81 | void *krealloc(const void *p, size_t new_size, gfp_t flags) | 82 | void *__krealloc(const void *p, size_t new_size, gfp_t flags) |
82 | { | 83 | { |
83 | void *ret; | 84 | void *ret; |
84 | size_t ks = 0; | 85 | size_t ks = 0; |
85 | 86 | ||
86 | if (unlikely(!new_size)) { | 87 | if (unlikely(!new_size)) |
87 | kfree(p); | ||
88 | return ZERO_SIZE_PTR; | 88 | return ZERO_SIZE_PTR; |
89 | } | ||
90 | 89 | ||
91 | if (p) | 90 | if (p) |
92 | ks = ksize(p); | 91 | ks = ksize(p); |
@@ -95,10 +94,37 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) | |||
95 | return (void *)p; | 94 | return (void *)p; |
96 | 95 | ||
97 | ret = kmalloc_track_caller(new_size, flags); | 96 | ret = kmalloc_track_caller(new_size, flags); |
98 | if (ret && p) { | 97 | if (ret && p) |
99 | memcpy(ret, p, ks); | 98 | memcpy(ret, p, ks); |
99 | |||
100 | return ret; | ||
101 | } | ||
102 | EXPORT_SYMBOL(__krealloc); | ||
103 | |||
104 | /** | ||
105 | * krealloc - reallocate memory. The contents will remain unchanged. | ||
106 | * @p: object to reallocate memory for. | ||
107 | * @new_size: how many bytes of memory are required. | ||
108 | * @flags: the type of memory to allocate. | ||
109 | * | ||
110 | * The contents of the object pointed to are preserved up to the | ||
111 | * lesser of the new and old sizes. If @p is %NULL, krealloc() | ||
112 | * behaves exactly like kmalloc(). If @size is 0 and @p is not a | ||
113 | * %NULL pointer, the object pointed to is freed. | ||
114 | */ | ||
115 | void *krealloc(const void *p, size_t new_size, gfp_t flags) | ||
116 | { | ||
117 | void *ret; | ||
118 | |||
119 | if (unlikely(!new_size)) { | ||
100 | kfree(p); | 120 | kfree(p); |
121 | return ZERO_SIZE_PTR; | ||
101 | } | 122 | } |
123 | |||
124 | ret = __krealloc(p, new_size, flags); | ||
125 | if (ret && p != ret) | ||
126 | kfree(p); | ||
127 | |||
102 | return ret; | 128 | return ret; |
103 | } | 129 | } |
104 | EXPORT_SYMBOL(krealloc); | 130 | EXPORT_SYMBOL(krealloc); |
@@ -136,3 +162,12 @@ char *strndup_user(const char __user *s, long n) | |||
136 | return p; | 162 | return p; |
137 | } | 163 | } |
138 | EXPORT_SYMBOL(strndup_user); | 164 | EXPORT_SYMBOL(strndup_user); |
165 | |||
166 | #ifndef HAVE_ARCH_PICK_MMAP_LAYOUT | ||
167 | void arch_pick_mmap_layout(struct mm_struct *mm) | ||
168 | { | ||
169 | mm->mmap_base = TASK_UNMAPPED_BASE; | ||
170 | mm->get_unmapped_area = arch_get_unmapped_area; | ||
171 | mm->unmap_area = arch_unmap_area; | ||
172 | } | ||
173 | #endif | ||
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 35f293816294..85b9a0d2c877 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -381,16 +381,14 @@ static void __vunmap(const void *addr, int deallocate_pages) | |||
381 | return; | 381 | return; |
382 | 382 | ||
383 | if ((PAGE_SIZE-1) & (unsigned long)addr) { | 383 | if ((PAGE_SIZE-1) & (unsigned long)addr) { |
384 | printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr); | 384 | WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr); |
385 | WARN_ON(1); | ||
386 | return; | 385 | return; |
387 | } | 386 | } |
388 | 387 | ||
389 | area = remove_vm_area(addr); | 388 | area = remove_vm_area(addr); |
390 | if (unlikely(!area)) { | 389 | if (unlikely(!area)) { |
391 | printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", | 390 | WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", |
392 | addr); | 391 | addr); |
393 | WARN_ON(1); | ||
394 | return; | 392 | return; |
395 | } | 393 | } |
396 | 394 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 26672c6cd3ce..8f71761bc4b7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -391,17 +391,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
391 | } | 391 | } |
392 | 392 | ||
393 | /* | 393 | /* |
394 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | 394 | * Same as remove_mapping, but if the page is removed from the mapping, it |
395 | * someone else has a ref on the page, abort and return 0. If it was | 395 | * gets returned with a refcount of 0. |
396 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
397 | * this page. | ||
398 | */ | 396 | */ |
399 | int remove_mapping(struct address_space *mapping, struct page *page) | 397 | static int __remove_mapping(struct address_space *mapping, struct page *page) |
400 | { | 398 | { |
401 | BUG_ON(!PageLocked(page)); | 399 | BUG_ON(!PageLocked(page)); |
402 | BUG_ON(mapping != page_mapping(page)); | 400 | BUG_ON(mapping != page_mapping(page)); |
403 | 401 | ||
404 | write_lock_irq(&mapping->tree_lock); | 402 | spin_lock_irq(&mapping->tree_lock); |
405 | /* | 403 | /* |
406 | * The non racy check for a busy page. | 404 | * The non racy check for a busy page. |
407 | * | 405 | * |
@@ -427,28 +425,48 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
427 | * Note that if SetPageDirty is always performed via set_page_dirty, | 425 | * Note that if SetPageDirty is always performed via set_page_dirty, |
428 | * and thus under tree_lock, then this ordering is not required. | 426 | * and thus under tree_lock, then this ordering is not required. |
429 | */ | 427 | */ |
430 | if (unlikely(page_count(page) != 2)) | 428 | if (!page_freeze_refs(page, 2)) |
431 | goto cannot_free; | 429 | goto cannot_free; |
432 | smp_rmb(); | 430 | /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */ |
433 | if (unlikely(PageDirty(page))) | 431 | if (unlikely(PageDirty(page))) { |
432 | page_unfreeze_refs(page, 2); | ||
434 | goto cannot_free; | 433 | goto cannot_free; |
434 | } | ||
435 | 435 | ||
436 | if (PageSwapCache(page)) { | 436 | if (PageSwapCache(page)) { |
437 | swp_entry_t swap = { .val = page_private(page) }; | 437 | swp_entry_t swap = { .val = page_private(page) }; |
438 | __delete_from_swap_cache(page); | 438 | __delete_from_swap_cache(page); |
439 | write_unlock_irq(&mapping->tree_lock); | 439 | spin_unlock_irq(&mapping->tree_lock); |
440 | swap_free(swap); | 440 | swap_free(swap); |
441 | __put_page(page); /* The pagecache ref */ | 441 | } else { |
442 | return 1; | 442 | __remove_from_page_cache(page); |
443 | spin_unlock_irq(&mapping->tree_lock); | ||
443 | } | 444 | } |
444 | 445 | ||
445 | __remove_from_page_cache(page); | ||
446 | write_unlock_irq(&mapping->tree_lock); | ||
447 | __put_page(page); | ||
448 | return 1; | 446 | return 1; |
449 | 447 | ||
450 | cannot_free: | 448 | cannot_free: |
451 | write_unlock_irq(&mapping->tree_lock); | 449 | spin_unlock_irq(&mapping->tree_lock); |
450 | return 0; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | ||
455 | * someone else has a ref on the page, abort and return 0. If it was | ||
456 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
457 | * this page. | ||
458 | */ | ||
459 | int remove_mapping(struct address_space *mapping, struct page *page) | ||
460 | { | ||
461 | if (__remove_mapping(mapping, page)) { | ||
462 | /* | ||
463 | * Unfreezing the refcount with 1 rather than 2 effectively | ||
464 | * drops the pagecache ref for us without requiring another | ||
465 | * atomic operation. | ||
466 | */ | ||
467 | page_unfreeze_refs(page, 1); | ||
468 | return 1; | ||
469 | } | ||
452 | return 0; | 470 | return 0; |
453 | } | 471 | } |
454 | 472 | ||
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
598 | if (PagePrivate(page)) { | 616 | if (PagePrivate(page)) { |
599 | if (!try_to_release_page(page, sc->gfp_mask)) | 617 | if (!try_to_release_page(page, sc->gfp_mask)) |
600 | goto activate_locked; | 618 | goto activate_locked; |
601 | if (!mapping && page_count(page) == 1) | 619 | if (!mapping && page_count(page) == 1) { |
602 | goto free_it; | 620 | unlock_page(page); |
621 | if (put_page_testzero(page)) | ||
622 | goto free_it; | ||
623 | else { | ||
624 | /* | ||
625 | * rare race with speculative reference. | ||
626 | * the speculative reference will free | ||
627 | * this page shortly, so we may | ||
628 | * increment nr_reclaimed here (and | ||
629 | * leave it off the LRU). | ||
630 | */ | ||
631 | nr_reclaimed++; | ||
632 | continue; | ||
633 | } | ||
634 | } | ||
603 | } | 635 | } |
604 | 636 | ||
605 | if (!mapping || !remove_mapping(mapping, page)) | 637 | if (!mapping || !__remove_mapping(mapping, page)) |
606 | goto keep_locked; | 638 | goto keep_locked; |
607 | 639 | ||
608 | free_it: | ||
609 | unlock_page(page); | 640 | unlock_page(page); |
641 | free_it: | ||
610 | nr_reclaimed++; | 642 | nr_reclaimed++; |
611 | if (!pagevec_add(&freed_pvec, page)) | 643 | if (!pagevec_add(&freed_pvec, page)) { |
612 | __pagevec_release_nonlru(&freed_pvec); | 644 | __pagevec_free(&freed_pvec); |
645 | pagevec_reinit(&freed_pvec); | ||
646 | } | ||
613 | continue; | 647 | continue; |
614 | 648 | ||
615 | activate_locked: | 649 | activate_locked: |
@@ -623,7 +657,7 @@ keep: | |||
623 | } | 657 | } |
624 | list_splice(&ret_pages, page_list); | 658 | list_splice(&ret_pages, page_list); |
625 | if (pagevec_count(&freed_pvec)) | 659 | if (pagevec_count(&freed_pvec)) |
626 | __pagevec_release_nonlru(&freed_pvec); | 660 | __pagevec_free(&freed_pvec); |
627 | count_vm_events(PGACTIVATE, pgactivate); | 661 | count_vm_events(PGACTIVATE, pgactivate); |
628 | return nr_reclaimed; | 662 | return nr_reclaimed; |
629 | } | 663 | } |