aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-28 15:14:43 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-28 15:14:43 -0400
commit414f746d232d41ed6ae8632c4495ae795373c44b (patch)
tree167f9bc8f139c6e82e6732b38c7a938b8a9d31cd /mm
parent5a7a201c51c324876d00a54e7208af6af12d1ca4 (diff)
parentc9272c4f9fbe2087beb3392f526dc5b19efaa56b (diff)
Merge branch 'linus' into cpus4096
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/allocpercpu.c20
-rw-r--r--mm/filemap.c228
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/hugetlb.c24
-rw-r--r--mm/memory.c3
-rw-r--r--mm/migrate.c29
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c12
-rw-r--r--mm/readahead.c6
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/shmem_acl.c2
-rw-r--r--mm/slab.c11
-rw-r--r--mm/slob.c7
-rw-r--r--mm/slub.c13
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_state.c30
-rw-r--r--mm/swapfile.c10
-rw-r--r--mm/truncate.c6
-rw-r--r--mm/util.c55
-rw-r--r--mm/vmalloc.c6
-rw-r--r--mm/vmscan.c80
23 files changed, 375 insertions, 188 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index aa799007a11b..efee5d379df4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -77,6 +77,9 @@ config FLAT_NODE_MEM_MAP
77 def_bool y 77 def_bool y
78 depends on !SPARSEMEM 78 depends on !SPARSEMEM
79 79
80config HAVE_GET_USER_PAGES_FAST
81 bool
82
80# 83#
81# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's 84# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
82# to represent different areas of memory. This variable allows 85# to represent different areas of memory. This variable allows
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 843364594e23..4297bc41bfd2 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -18,27 +18,28 @@
18 * Depopulating per-cpu data for a cpu going offline would be a typical 18 * Depopulating per-cpu data for a cpu going offline would be a typical
19 * use case. You need to register a cpu hotplug handler for that purpose. 19 * use case. You need to register a cpu hotplug handler for that purpose.
20 */ 20 */
21void percpu_depopulate(void *__pdata, int cpu) 21static void percpu_depopulate(void *__pdata, int cpu)
22{ 22{
23 struct percpu_data *pdata = __percpu_disguise(__pdata); 23 struct percpu_data *pdata = __percpu_disguise(__pdata);
24 24
25 kfree(pdata->ptrs[cpu]); 25 kfree(pdata->ptrs[cpu]);
26 pdata->ptrs[cpu] = NULL; 26 pdata->ptrs[cpu] = NULL;
27} 27}
28EXPORT_SYMBOL_GPL(percpu_depopulate);
29 28
30/** 29/**
31 * percpu_depopulate_mask - depopulate per-cpu data for some cpu's 30 * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
32 * @__pdata: per-cpu data to depopulate 31 * @__pdata: per-cpu data to depopulate
33 * @mask: depopulate per-cpu data for cpu's selected through mask bits 32 * @mask: depopulate per-cpu data for cpu's selected through mask bits
34 */ 33 */
35void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) 34static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
36{ 35{
37 int cpu; 36 int cpu;
38 for_each_cpu_mask_nr(cpu, *mask) 37 for_each_cpu_mask_nr(cpu, *mask)
39 percpu_depopulate(__pdata, cpu); 38 percpu_depopulate(__pdata, cpu);
40} 39}
41EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); 40
41#define percpu_depopulate_mask(__pdata, mask) \
42 __percpu_depopulate_mask((__pdata), &(mask))
42 43
43/** 44/**
44 * percpu_populate - populate per-cpu data for given cpu 45 * percpu_populate - populate per-cpu data for given cpu
@@ -51,7 +52,7 @@ EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
51 * use case. You need to register a cpu hotplug handler for that purpose. 52 * use case. You need to register a cpu hotplug handler for that purpose.
52 * Per-cpu object is populated with zeroed buffer. 53 * Per-cpu object is populated with zeroed buffer.
53 */ 54 */
54void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) 55static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
55{ 56{
56 struct percpu_data *pdata = __percpu_disguise(__pdata); 57 struct percpu_data *pdata = __percpu_disguise(__pdata);
57 int node = cpu_to_node(cpu); 58 int node = cpu_to_node(cpu);
@@ -68,7 +69,6 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
68 pdata->ptrs[cpu] = kzalloc(size, gfp); 69 pdata->ptrs[cpu] = kzalloc(size, gfp);
69 return pdata->ptrs[cpu]; 70 return pdata->ptrs[cpu];
70} 71}
71EXPORT_SYMBOL_GPL(percpu_populate);
72 72
73/** 73/**
74 * percpu_populate_mask - populate per-cpu data for more cpu's 74 * percpu_populate_mask - populate per-cpu data for more cpu's
@@ -79,8 +79,8 @@ EXPORT_SYMBOL_GPL(percpu_populate);
79 * 79 *
80 * Per-cpu objects are populated with zeroed buffers. 80 * Per-cpu objects are populated with zeroed buffers.
81 */ 81 */
82int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, 82static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
83 cpumask_t *mask) 83 cpumask_t *mask)
84{ 84{
85 cpumask_t populated; 85 cpumask_t populated;
86 int cpu; 86 int cpu;
@@ -94,7 +94,9 @@ int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
94 cpu_set(cpu, populated); 94 cpu_set(cpu, populated);
95 return 0; 95 return 0;
96} 96}
97EXPORT_SYMBOL_GPL(__percpu_populate_mask); 97
98#define percpu_populate_mask(__pdata, size, gfp, mask) \
99 __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
98 100
99/** 101/**
100 * percpu_alloc_mask - initial setup of per-cpu data 102 * percpu_alloc_mask - initial setup of per-cpu data
diff --git a/mm/filemap.c b/mm/filemap.c
index 2d3ec1ffc66e..5de7633e1dbe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -109,7 +109,7 @@
109/* 109/*
110 * Remove a page from the page cache and free it. Caller has to make 110 * Remove a page from the page cache and free it. Caller has to make
111 * sure the page is locked and that nobody else uses it - or that usage 111 * sure the page is locked and that nobody else uses it - or that usage
112 * is safe. The caller must hold a write_lock on the mapping's tree_lock. 112 * is safe. The caller must hold the mapping's tree_lock.
113 */ 113 */
114void __remove_from_page_cache(struct page *page) 114void __remove_from_page_cache(struct page *page)
115{ 115{
@@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page)
141 141
142 BUG_ON(!PageLocked(page)); 142 BUG_ON(!PageLocked(page));
143 143
144 write_lock_irq(&mapping->tree_lock); 144 spin_lock_irq(&mapping->tree_lock);
145 __remove_from_page_cache(page); 145 __remove_from_page_cache(page);
146 write_unlock_irq(&mapping->tree_lock); 146 spin_unlock_irq(&mapping->tree_lock);
147} 147}
148 148
149static int sync_page(void *word) 149static int sync_page(void *word)
@@ -442,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping,
442} 442}
443 443
444/** 444/**
445 * add_to_page_cache - add newly allocated pagecache pages 445 * add_to_page_cache_locked - add a locked page to the pagecache
446 * @page: page to add 446 * @page: page to add
447 * @mapping: the page's address_space 447 * @mapping: the page's address_space
448 * @offset: page index 448 * @offset: page index
449 * @gfp_mask: page allocation mode 449 * @gfp_mask: page allocation mode
450 * 450 *
451 * This function is used to add newly allocated pagecache pages; 451 * This function is used to add a page to the pagecache. It must be locked.
452 * the page is new, so we can just run SetPageLocked() against it.
453 * The other page state flags were set by rmqueue().
454 *
455 * This function does not add the page to the LRU. The caller must do that. 452 * This function does not add the page to the LRU. The caller must do that.
456 */ 453 */
457int add_to_page_cache(struct page *page, struct address_space *mapping, 454int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
458 pgoff_t offset, gfp_t gfp_mask) 455 pgoff_t offset, gfp_t gfp_mask)
459{ 456{
460 int error = mem_cgroup_cache_charge(page, current->mm, 457 int error;
458
459 VM_BUG_ON(!PageLocked(page));
460
461 error = mem_cgroup_cache_charge(page, current->mm,
461 gfp_mask & ~__GFP_HIGHMEM); 462 gfp_mask & ~__GFP_HIGHMEM);
462 if (error) 463 if (error)
463 goto out; 464 goto out;
464 465
465 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); 466 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
466 if (error == 0) { 467 if (error == 0) {
467 write_lock_irq(&mapping->tree_lock); 468 page_cache_get(page);
469 page->mapping = mapping;
470 page->index = offset;
471
472 spin_lock_irq(&mapping->tree_lock);
468 error = radix_tree_insert(&mapping->page_tree, offset, page); 473 error = radix_tree_insert(&mapping->page_tree, offset, page);
469 if (!error) { 474 if (likely(!error)) {
470 page_cache_get(page);
471 SetPageLocked(page);
472 page->mapping = mapping;
473 page->index = offset;
474 mapping->nrpages++; 475 mapping->nrpages++;
475 __inc_zone_page_state(page, NR_FILE_PAGES); 476 __inc_zone_page_state(page, NR_FILE_PAGES);
476 } else 477 } else {
478 page->mapping = NULL;
477 mem_cgroup_uncharge_cache_page(page); 479 mem_cgroup_uncharge_cache_page(page);
480 page_cache_release(page);
481 }
478 482
479 write_unlock_irq(&mapping->tree_lock); 483 spin_unlock_irq(&mapping->tree_lock);
480 radix_tree_preload_end(); 484 radix_tree_preload_end();
481 } else 485 } else
482 mem_cgroup_uncharge_cache_page(page); 486 mem_cgroup_uncharge_cache_page(page);
483out: 487out:
484 return error; 488 return error;
485} 489}
486EXPORT_SYMBOL(add_to_page_cache); 490EXPORT_SYMBOL(add_to_page_cache_locked);
487 491
488int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 492int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
489 pgoff_t offset, gfp_t gfp_mask) 493 pgoff_t offset, gfp_t gfp_mask)
@@ -633,15 +637,35 @@ void __lock_page_nosync(struct page *page)
633 * Is there a pagecache struct page at the given (mapping, offset) tuple? 637 * Is there a pagecache struct page at the given (mapping, offset) tuple?
634 * If yes, increment its refcount and return it; if no, return NULL. 638 * If yes, increment its refcount and return it; if no, return NULL.
635 */ 639 */
636struct page * find_get_page(struct address_space *mapping, pgoff_t offset) 640struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
637{ 641{
642 void **pagep;
638 struct page *page; 643 struct page *page;
639 644
640 read_lock_irq(&mapping->tree_lock); 645 rcu_read_lock();
641 page = radix_tree_lookup(&mapping->page_tree, offset); 646repeat:
642 if (page) 647 page = NULL;
643 page_cache_get(page); 648 pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
644 read_unlock_irq(&mapping->tree_lock); 649 if (pagep) {
650 page = radix_tree_deref_slot(pagep);
651 if (unlikely(!page || page == RADIX_TREE_RETRY))
652 goto repeat;
653
654 if (!page_cache_get_speculative(page))
655 goto repeat;
656
657 /*
658 * Has the page moved?
659 * This is part of the lockless pagecache protocol. See
660 * include/linux/pagemap.h for details.
661 */
662 if (unlikely(page != *pagep)) {
663 page_cache_release(page);
664 goto repeat;
665 }
666 }
667 rcu_read_unlock();
668
645 return page; 669 return page;
646} 670}
647EXPORT_SYMBOL(find_get_page); 671EXPORT_SYMBOL(find_get_page);
@@ -656,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
656 * 680 *
657 * Returns zero if the page was not present. find_lock_page() may sleep. 681 * Returns zero if the page was not present. find_lock_page() may sleep.
658 */ 682 */
659struct page *find_lock_page(struct address_space *mapping, 683struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
660 pgoff_t offset)
661{ 684{
662 struct page *page; 685 struct page *page;
663 686
664repeat: 687repeat:
665 read_lock_irq(&mapping->tree_lock); 688 page = find_get_page(mapping, offset);
666 page = radix_tree_lookup(&mapping->page_tree, offset);
667 if (page) { 689 if (page) {
668 page_cache_get(page); 690 lock_page(page);
669 if (TestSetPageLocked(page)) { 691 /* Has the page been truncated? */
670 read_unlock_irq(&mapping->tree_lock); 692 if (unlikely(page->mapping != mapping)) {
671 __lock_page(page); 693 unlock_page(page);
672 694 page_cache_release(page);
673 /* Has the page been truncated while we slept? */ 695 goto repeat;
674 if (unlikely(page->mapping != mapping)) {
675 unlock_page(page);
676 page_cache_release(page);
677 goto repeat;
678 }
679 VM_BUG_ON(page->index != offset);
680 goto out;
681 } 696 }
697 VM_BUG_ON(page->index != offset);
682 } 698 }
683 read_unlock_irq(&mapping->tree_lock);
684out:
685 return page; 699 return page;
686} 700}
687EXPORT_SYMBOL(find_lock_page); 701EXPORT_SYMBOL(find_lock_page);
@@ -747,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
747{ 761{
748 unsigned int i; 762 unsigned int i;
749 unsigned int ret; 763 unsigned int ret;
764 unsigned int nr_found;
765
766 rcu_read_lock();
767restart:
768 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
769 (void ***)pages, start, nr_pages);
770 ret = 0;
771 for (i = 0; i < nr_found; i++) {
772 struct page *page;
773repeat:
774 page = radix_tree_deref_slot((void **)pages[i]);
775 if (unlikely(!page))
776 continue;
777 /*
778 * this can only trigger if nr_found == 1, making livelock
779 * a non issue.
780 */
781 if (unlikely(page == RADIX_TREE_RETRY))
782 goto restart;
750 783
751 read_lock_irq(&mapping->tree_lock); 784 if (!page_cache_get_speculative(page))
752 ret = radix_tree_gang_lookup(&mapping->page_tree, 785 goto repeat;
753 (void **)pages, start, nr_pages); 786
754 for (i = 0; i < ret; i++) 787 /* Has the page moved? */
755 page_cache_get(pages[i]); 788 if (unlikely(page != *((void **)pages[i]))) {
756 read_unlock_irq(&mapping->tree_lock); 789 page_cache_release(page);
790 goto repeat;
791 }
792
793 pages[ret] = page;
794 ret++;
795 }
796 rcu_read_unlock();
757 return ret; 797 return ret;
758} 798}
759 799
@@ -774,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
774{ 814{
775 unsigned int i; 815 unsigned int i;
776 unsigned int ret; 816 unsigned int ret;
817 unsigned int nr_found;
818
819 rcu_read_lock();
820restart:
821 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
822 (void ***)pages, index, nr_pages);
823 ret = 0;
824 for (i = 0; i < nr_found; i++) {
825 struct page *page;
826repeat:
827 page = radix_tree_deref_slot((void **)pages[i]);
828 if (unlikely(!page))
829 continue;
830 /*
831 * this can only trigger if nr_found == 1, making livelock
832 * a non issue.
833 */
834 if (unlikely(page == RADIX_TREE_RETRY))
835 goto restart;
777 836
778 read_lock_irq(&mapping->tree_lock); 837 if (page->mapping == NULL || page->index != index)
779 ret = radix_tree_gang_lookup(&mapping->page_tree,
780 (void **)pages, index, nr_pages);
781 for (i = 0; i < ret; i++) {
782 if (pages[i]->mapping == NULL || pages[i]->index != index)
783 break; 838 break;
784 839
785 page_cache_get(pages[i]); 840 if (!page_cache_get_speculative(page))
841 goto repeat;
842
843 /* Has the page moved? */
844 if (unlikely(page != *((void **)pages[i]))) {
845 page_cache_release(page);
846 goto repeat;
847 }
848
849 pages[ret] = page;
850 ret++;
786 index++; 851 index++;
787 } 852 }
788 read_unlock_irq(&mapping->tree_lock); 853 rcu_read_unlock();
789 return i; 854 return ret;
790} 855}
791EXPORT_SYMBOL(find_get_pages_contig); 856EXPORT_SYMBOL(find_get_pages_contig);
792 857
@@ -806,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
806{ 871{
807 unsigned int i; 872 unsigned int i;
808 unsigned int ret; 873 unsigned int ret;
874 unsigned int nr_found;
875
876 rcu_read_lock();
877restart:
878 nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
879 (void ***)pages, *index, nr_pages, tag);
880 ret = 0;
881 for (i = 0; i < nr_found; i++) {
882 struct page *page;
883repeat:
884 page = radix_tree_deref_slot((void **)pages[i]);
885 if (unlikely(!page))
886 continue;
887 /*
888 * this can only trigger if nr_found == 1, making livelock
889 * a non issue.
890 */
891 if (unlikely(page == RADIX_TREE_RETRY))
892 goto restart;
893
894 if (!page_cache_get_speculative(page))
895 goto repeat;
896
897 /* Has the page moved? */
898 if (unlikely(page != *((void **)pages[i]))) {
899 page_cache_release(page);
900 goto repeat;
901 }
902
903 pages[ret] = page;
904 ret++;
905 }
906 rcu_read_unlock();
809 907
810 read_lock_irq(&mapping->tree_lock);
811 ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
812 (void **)pages, *index, nr_pages, tag);
813 for (i = 0; i < ret; i++)
814 page_cache_get(pages[i]);
815 if (ret) 908 if (ret)
816 *index = pages[ret - 1]->index + 1; 909 *index = pages[ret - 1]->index + 1;
817 read_unlock_irq(&mapping->tree_lock); 910
818 return ret; 911 return ret;
819} 912}
820EXPORT_SYMBOL(find_get_pages_tag); 913EXPORT_SYMBOL(find_get_pages_tag);
@@ -1665,8 +1758,9 @@ static int __remove_suid(struct dentry *dentry, int kill)
1665 return notify_change(dentry, &newattrs); 1758 return notify_change(dentry, &newattrs);
1666} 1759}
1667 1760
1668int remove_suid(struct dentry *dentry) 1761int file_remove_suid(struct file *file)
1669{ 1762{
1763 struct dentry *dentry = file->f_path.dentry;
1670 int killsuid = should_remove_suid(dentry); 1764 int killsuid = should_remove_suid(dentry);
1671 int killpriv = security_inode_need_killpriv(dentry); 1765 int killpriv = security_inode_need_killpriv(dentry);
1672 int error = 0; 1766 int error = 0;
@@ -1680,7 +1774,7 @@ int remove_suid(struct dentry *dentry)
1680 1774
1681 return error; 1775 return error;
1682} 1776}
1683EXPORT_SYMBOL(remove_suid); 1777EXPORT_SYMBOL(file_remove_suid);
1684 1778
1685static size_t __iovec_copy_from_user_inatomic(char *vaddr, 1779static size_t __iovec_copy_from_user_inatomic(char *vaddr,
1686 const struct iovec *iov, size_t base, size_t bytes) 1780 const struct iovec *iov, size_t base, size_t bytes)
@@ -2436,7 +2530,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2436 if (count == 0) 2530 if (count == 0)
2437 goto out; 2531 goto out;
2438 2532
2439 err = remove_suid(file->f_path.dentry); 2533 err = file_remove_suid(file);
2440 if (err) 2534 if (err)
2441 goto out; 2535 goto out;
2442 2536
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 3e744abcce9d..98a3f31ccd6a 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -380,7 +380,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
380 if (count == 0) 380 if (count == 0)
381 goto out_backing; 381 goto out_backing;
382 382
383 ret = remove_suid(filp->f_path.dentry); 383 ret = file_remove_suid(filp);
384 if (ret) 384 if (ret)
385 goto out_backing; 385 goto out_backing;
386 386
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a8bf4ab01f86..3be79dc18c5c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1026,18 +1026,6 @@ static void __init report_hugepages(void)
1026 } 1026 }
1027} 1027}
1028 1028
1029static unsigned int cpuset_mems_nr(unsigned int *array)
1030{
1031 int node;
1032 unsigned int nr = 0;
1033
1034 for_each_node_mask(node, cpuset_current_mems_allowed)
1035 nr += array[node];
1036
1037 return nr;
1038}
1039
1040#ifdef CONFIG_SYSCTL
1041#ifdef CONFIG_HIGHMEM 1029#ifdef CONFIG_HIGHMEM
1042static void try_to_free_low(struct hstate *h, unsigned long count) 1030static void try_to_free_low(struct hstate *h, unsigned long count)
1043{ 1031{
@@ -1386,6 +1374,18 @@ static int __init hugetlb_default_setup(char *s)
1386} 1374}
1387__setup("default_hugepagesz=", hugetlb_default_setup); 1375__setup("default_hugepagesz=", hugetlb_default_setup);
1388 1376
1377static unsigned int cpuset_mems_nr(unsigned int *array)
1378{
1379 int node;
1380 unsigned int nr = 0;
1381
1382 for_each_node_mask(node, cpuset_current_mems_allowed)
1383 nr += array[node];
1384
1385 return nr;
1386}
1387
1388#ifdef CONFIG_SYSCTL
1389int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1389int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1390 struct file *file, void __user *buffer, 1390 struct file *file, void __user *buffer,
1391 size_t *length, loff_t *ppos) 1391 size_t *length, loff_t *ppos)
diff --git a/mm/memory.c b/mm/memory.c
index 262e3eb6601a..a8ca04faaea6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
374 * 374 *
375 * The calling function must still handle the error. 375 * The calling function must still handle the error.
376 */ 376 */
377void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) 377static void print_bad_pte(struct vm_area_struct *vma, pte_t pte,
378 unsigned long vaddr)
378{ 379{
379 printk(KERN_ERR "Bad pte = %08llx, process = %s, " 380 printk(KERN_ERR "Bad pte = %08llx, process = %s, "
380 "vm_flags = %lx, vaddr = %lx\n", 381 "vm_flags = %lx, vaddr = %lx\n",
diff --git a/mm/migrate.c b/mm/migrate.c
index d8c65a65c61d..153572fb60b8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
285 285
286 page = migration_entry_to_page(entry); 286 page = migration_entry_to_page(entry);
287 287
288 get_page(page); 288 /*
289 * Once radix-tree replacement of page migration started, page_count
290 * *must* be zero. And, we don't want to call wait_on_page_locked()
291 * against a page without get_page().
292 * So, we use get_page_unless_zero(), here. Even failed, page fault
293 * will occur again.
294 */
295 if (!get_page_unless_zero(page))
296 goto out;
289 pte_unmap_unlock(ptep, ptl); 297 pte_unmap_unlock(ptep, ptl);
290 wait_on_page_locked(page); 298 wait_on_page_locked(page);
291 put_page(page); 299 put_page(page);
@@ -305,6 +313,7 @@ out:
305static int migrate_page_move_mapping(struct address_space *mapping, 313static int migrate_page_move_mapping(struct address_space *mapping,
306 struct page *newpage, struct page *page) 314 struct page *newpage, struct page *page)
307{ 315{
316 int expected_count;
308 void **pslot; 317 void **pslot;
309 318
310 if (!mapping) { 319 if (!mapping) {
@@ -314,14 +323,20 @@ static int migrate_page_move_mapping(struct address_space *mapping,
314 return 0; 323 return 0;
315 } 324 }
316 325
317 write_lock_irq(&mapping->tree_lock); 326 spin_lock_irq(&mapping->tree_lock);
318 327
319 pslot = radix_tree_lookup_slot(&mapping->page_tree, 328 pslot = radix_tree_lookup_slot(&mapping->page_tree,
320 page_index(page)); 329 page_index(page));
321 330
322 if (page_count(page) != 2 + !!PagePrivate(page) || 331 expected_count = 2 + !!PagePrivate(page);
332 if (page_count(page) != expected_count ||
323 (struct page *)radix_tree_deref_slot(pslot) != page) { 333 (struct page *)radix_tree_deref_slot(pslot) != page) {
324 write_unlock_irq(&mapping->tree_lock); 334 spin_unlock_irq(&mapping->tree_lock);
335 return -EAGAIN;
336 }
337
338 if (!page_freeze_refs(page, expected_count)) {
339 spin_unlock_irq(&mapping->tree_lock);
325 return -EAGAIN; 340 return -EAGAIN;
326 } 341 }
327 342
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
338 353
339 radix_tree_replace_slot(pslot, newpage); 354 radix_tree_replace_slot(pslot, newpage);
340 355
356 page_unfreeze_refs(page, expected_count);
341 /* 357 /*
342 * Drop cache reference from old page. 358 * Drop cache reference from old page.
343 * We know this isn't the last reference. 359 * We know this isn't the last reference.
@@ -357,10 +373,9 @@ static int migrate_page_move_mapping(struct address_space *mapping,
357 __dec_zone_page_state(page, NR_FILE_PAGES); 373 __dec_zone_page_state(page, NR_FILE_PAGES);
358 __inc_zone_page_state(newpage, NR_FILE_PAGES); 374 __inc_zone_page_state(newpage, NR_FILE_PAGES);
359 375
360 write_unlock_irq(&mapping->tree_lock); 376 spin_unlock_irq(&mapping->tree_lock);
361 if (!PageSwapCache(newpage)) { 377 if (!PageSwapCache(newpage))
362 mem_cgroup_uncharge_cache_page(page); 378 mem_cgroup_uncharge_cache_page(page);
363 }
364 379
365 return 0; 380 return 0;
366} 381}
diff --git a/mm/nommu.c b/mm/nommu.c
index 4462b6a3fcb9..5edccd9c9218 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -22,7 +22,7 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/vmalloc.h> 24#include <linux/vmalloc.h>
25#include <linux/ptrace.h> 25#include <linux/tracehook.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
@@ -745,7 +745,7 @@ static unsigned long determine_vm_flags(struct file *file,
745 * it's being traced - otherwise breakpoints set in it may interfere 745 * it's being traced - otherwise breakpoints set in it may interfere
746 * with another untraced process 746 * with another untraced process
747 */ 747 */
748 if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED)) 748 if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current))
749 vm_flags &= ~VM_MAYSHARE; 749 vm_flags &= ~VM_MAYSHARE;
750 750
751 return vm_flags; 751 return vm_flags;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 94c6d8988ab3..24de8b65fdbd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1088,7 +1088,7 @@ int __set_page_dirty_nobuffers(struct page *page)
1088 if (!mapping) 1088 if (!mapping)
1089 return 1; 1089 return 1;
1090 1090
1091 write_lock_irq(&mapping->tree_lock); 1091 spin_lock_irq(&mapping->tree_lock);
1092 mapping2 = page_mapping(page); 1092 mapping2 = page_mapping(page);
1093 if (mapping2) { /* Race with truncate? */ 1093 if (mapping2) { /* Race with truncate? */
1094 BUG_ON(mapping2 != mapping); 1094 BUG_ON(mapping2 != mapping);
@@ -1102,7 +1102,7 @@ int __set_page_dirty_nobuffers(struct page *page)
1102 radix_tree_tag_set(&mapping->page_tree, 1102 radix_tree_tag_set(&mapping->page_tree,
1103 page_index(page), PAGECACHE_TAG_DIRTY); 1103 page_index(page), PAGECACHE_TAG_DIRTY);
1104 } 1104 }
1105 write_unlock_irq(&mapping->tree_lock); 1105 spin_unlock_irq(&mapping->tree_lock);
1106 if (mapping->host) { 1106 if (mapping->host) {
1107 /* !PageAnon && !swapper_space */ 1107 /* !PageAnon && !swapper_space */
1108 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 1108 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1258,7 +1258,7 @@ int test_clear_page_writeback(struct page *page)
1258 struct backing_dev_info *bdi = mapping->backing_dev_info; 1258 struct backing_dev_info *bdi = mapping->backing_dev_info;
1259 unsigned long flags; 1259 unsigned long flags;
1260 1260
1261 write_lock_irqsave(&mapping->tree_lock, flags); 1261 spin_lock_irqsave(&mapping->tree_lock, flags);
1262 ret = TestClearPageWriteback(page); 1262 ret = TestClearPageWriteback(page);
1263 if (ret) { 1263 if (ret) {
1264 radix_tree_tag_clear(&mapping->page_tree, 1264 radix_tree_tag_clear(&mapping->page_tree,
@@ -1269,7 +1269,7 @@ int test_clear_page_writeback(struct page *page)
1269 __bdi_writeout_inc(bdi); 1269 __bdi_writeout_inc(bdi);
1270 } 1270 }
1271 } 1271 }
1272 write_unlock_irqrestore(&mapping->tree_lock, flags); 1272 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1273 } else { 1273 } else {
1274 ret = TestClearPageWriteback(page); 1274 ret = TestClearPageWriteback(page);
1275 } 1275 }
@@ -1287,7 +1287,7 @@ int test_set_page_writeback(struct page *page)
1287 struct backing_dev_info *bdi = mapping->backing_dev_info; 1287 struct backing_dev_info *bdi = mapping->backing_dev_info;
1288 unsigned long flags; 1288 unsigned long flags;
1289 1289
1290 write_lock_irqsave(&mapping->tree_lock, flags); 1290 spin_lock_irqsave(&mapping->tree_lock, flags);
1291 ret = TestSetPageWriteback(page); 1291 ret = TestSetPageWriteback(page);
1292 if (!ret) { 1292 if (!ret) {
1293 radix_tree_tag_set(&mapping->page_tree, 1293 radix_tree_tag_set(&mapping->page_tree,
@@ -1300,7 +1300,7 @@ int test_set_page_writeback(struct page *page)
1300 radix_tree_tag_clear(&mapping->page_tree, 1300 radix_tree_tag_clear(&mapping->page_tree,
1301 page_index(page), 1301 page_index(page),
1302 PAGECACHE_TAG_DIRTY); 1302 PAGECACHE_TAG_DIRTY);
1303 write_unlock_irqrestore(&mapping->tree_lock, flags); 1303 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1304 } else { 1304 } else {
1305 ret = TestSetPageWriteback(page); 1305 ret = TestSetPageWriteback(page);
1306 } 1306 }
diff --git a/mm/readahead.c b/mm/readahead.c
index d8723a5f6496..77e8ddf945e9 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -382,9 +382,9 @@ ondemand_readahead(struct address_space *mapping,
382 if (hit_readahead_marker) { 382 if (hit_readahead_marker) {
383 pgoff_t start; 383 pgoff_t start;
384 384
385 read_lock_irq(&mapping->tree_lock); 385 rcu_read_lock();
386 start = radix_tree_next_hole(&mapping->page_tree, offset, max+1); 386 start = radix_tree_next_hole(&mapping->page_tree, offset,max+1);
387 read_unlock_irq(&mapping->tree_lock); 387 rcu_read_unlock();
388 388
389 if (!start || start - offset > max) 389 if (!start || start - offset > max)
390 return 0; 390 return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index abbd29f7c43f..39ae5a9bf382 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -138,7 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma)
138 anon_vma_free(anon_vma); 138 anon_vma_free(anon_vma);
139} 139}
140 140
141static void anon_vma_ctor(struct kmem_cache *cachep, void *data) 141static void anon_vma_ctor(void *data)
142{ 142{
143 struct anon_vma *anon_vma = data; 143 struct anon_vma *anon_vma = data;
144 144
diff --git a/mm/shmem.c b/mm/shmem.c
index f92fea94d037..952d361774bb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -936,7 +936,7 @@ found:
936 spin_lock(&info->lock); 936 spin_lock(&info->lock);
937 ptr = shmem_swp_entry(info, idx, NULL); 937 ptr = shmem_swp_entry(info, idx, NULL);
938 if (ptr && ptr->val == entry.val) { 938 if (ptr && ptr->val == entry.val) {
939 error = add_to_page_cache(page, inode->i_mapping, 939 error = add_to_page_cache_locked(page, inode->i_mapping,
940 idx, GFP_NOWAIT); 940 idx, GFP_NOWAIT);
941 /* does mem_cgroup_uncharge_cache_page on error */ 941 /* does mem_cgroup_uncharge_cache_page on error */
942 } else /* we must compensate for our precharge above */ 942 } else /* we must compensate for our precharge above */
@@ -1301,8 +1301,8 @@ repeat:
1301 SetPageUptodate(filepage); 1301 SetPageUptodate(filepage);
1302 set_page_dirty(filepage); 1302 set_page_dirty(filepage);
1303 swap_free(swap); 1303 swap_free(swap);
1304 } else if (!(error = add_to_page_cache( 1304 } else if (!(error = add_to_page_cache_locked(swappage, mapping,
1305 swappage, mapping, idx, GFP_NOWAIT))) { 1305 idx, GFP_NOWAIT))) {
1306 info->flags |= SHMEM_PAGEIN; 1306 info->flags |= SHMEM_PAGEIN;
1307 shmem_swp_set(info, entry, 0); 1307 shmem_swp_set(info, entry, 0);
1308 shmem_swp_unmap(entry); 1308 shmem_swp_unmap(entry);
@@ -2352,7 +2352,7 @@ static void shmem_destroy_inode(struct inode *inode)
2352 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 2352 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2353} 2353}
2354 2354
2355static void init_once(struct kmem_cache *cachep, void *foo) 2355static void init_once(void *foo)
2356{ 2356{
2357 struct shmem_inode_info *p = (struct shmem_inode_info *) foo; 2357 struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
2358 2358
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index f5664c5b9eb1..8e5aadd7dcd6 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -191,7 +191,7 @@ shmem_check_acl(struct inode *inode, int mask)
191 * shmem_permission - permission() inode operation 191 * shmem_permission - permission() inode operation
192 */ 192 */
193int 193int
194shmem_permission(struct inode *inode, int mask, struct nameidata *nd) 194shmem_permission(struct inode *inode, int mask)
195{ 195{
196 return generic_permission(inode, mask, shmem_check_acl); 196 return generic_permission(inode, mask, shmem_check_acl);
197} 197}
diff --git a/mm/slab.c b/mm/slab.c
index 052e7d64537e..918f04f7fef1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -406,7 +406,7 @@ struct kmem_cache {
406 unsigned int dflags; /* dynamic flags */ 406 unsigned int dflags; /* dynamic flags */
407 407
408 /* constructor func */ 408 /* constructor func */
409 void (*ctor)(struct kmem_cache *, void *); 409 void (*ctor)(void *obj);
410 410
411/* 5) cache creation/removal */ 411/* 5) cache creation/removal */
412 const char *name; 412 const char *name;
@@ -2137,8 +2137,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2137 */ 2137 */
2138struct kmem_cache * 2138struct kmem_cache *
2139kmem_cache_create (const char *name, size_t size, size_t align, 2139kmem_cache_create (const char *name, size_t size, size_t align,
2140 unsigned long flags, 2140 unsigned long flags, void (*ctor)(void *))
2141 void (*ctor)(struct kmem_cache *, void *))
2142{ 2141{
2143 size_t left_over, slab_size, ralign; 2142 size_t left_over, slab_size, ralign;
2144 struct kmem_cache *cachep = NULL, *pc; 2143 struct kmem_cache *cachep = NULL, *pc;
@@ -2653,7 +2652,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
2653 * They must also be threaded. 2652 * They must also be threaded.
2654 */ 2653 */
2655 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) 2654 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2656 cachep->ctor(cachep, objp + obj_offset(cachep)); 2655 cachep->ctor(objp + obj_offset(cachep));
2657 2656
2658 if (cachep->flags & SLAB_RED_ZONE) { 2657 if (cachep->flags & SLAB_RED_ZONE) {
2659 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 2658 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2669,7 +2668,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
2669 cachep->buffer_size / PAGE_SIZE, 0); 2668 cachep->buffer_size / PAGE_SIZE, 0);
2670#else 2669#else
2671 if (cachep->ctor) 2670 if (cachep->ctor)
2672 cachep->ctor(cachep, objp); 2671 cachep->ctor(objp);
2673#endif 2672#endif
2674 slab_bufctl(slabp)[i] = i + 1; 2673 slab_bufctl(slabp)[i] = i + 1;
2675 } 2674 }
@@ -3093,7 +3092,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3093#endif 3092#endif
3094 objp += obj_offset(cachep); 3093 objp += obj_offset(cachep);
3095 if (cachep->ctor && cachep->flags & SLAB_POISON) 3094 if (cachep->ctor && cachep->flags & SLAB_POISON)
3096 cachep->ctor(cachep, objp); 3095 cachep->ctor(objp);
3097#if ARCH_SLAB_MINALIGN 3096#if ARCH_SLAB_MINALIGN
3098 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { 3097 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3099 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", 3098 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
diff --git a/mm/slob.c b/mm/slob.c
index de268eb7ac70..d8fbd4d1bfa7 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -525,12 +525,11 @@ struct kmem_cache {
525 unsigned int size, align; 525 unsigned int size, align;
526 unsigned long flags; 526 unsigned long flags;
527 const char *name; 527 const char *name;
528 void (*ctor)(struct kmem_cache *, void *); 528 void (*ctor)(void *);
529}; 529};
530 530
531struct kmem_cache *kmem_cache_create(const char *name, size_t size, 531struct kmem_cache *kmem_cache_create(const char *name, size_t size,
532 size_t align, unsigned long flags, 532 size_t align, unsigned long flags, void (*ctor)(void *))
533 void (*ctor)(struct kmem_cache *, void *))
534{ 533{
535 struct kmem_cache *c; 534 struct kmem_cache *c;
536 535
@@ -575,7 +574,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
575 b = slob_new_page(flags, get_order(c->size), node); 574 b = slob_new_page(flags, get_order(c->size), node);
576 575
577 if (c->ctor) 576 if (c->ctor)
578 c->ctor(c, b); 577 c->ctor(b);
579 578
580 return b; 579 return b;
581} 580}
diff --git a/mm/slub.c b/mm/slub.c
index 77c21cf53ff9..b7e2cd5d82db 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1012,7 +1012,7 @@ __setup("slub_debug", setup_slub_debug);
1012 1012
1013static unsigned long kmem_cache_flags(unsigned long objsize, 1013static unsigned long kmem_cache_flags(unsigned long objsize,
1014 unsigned long flags, const char *name, 1014 unsigned long flags, const char *name,
1015 void (*ctor)(struct kmem_cache *, void *)) 1015 void (*ctor)(void *))
1016{ 1016{
1017 /* 1017 /*
1018 * Enable debugging if selected on the kernel commandline. 1018 * Enable debugging if selected on the kernel commandline.
@@ -1040,7 +1040,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1040static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1040static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1041static inline unsigned long kmem_cache_flags(unsigned long objsize, 1041static inline unsigned long kmem_cache_flags(unsigned long objsize,
1042 unsigned long flags, const char *name, 1042 unsigned long flags, const char *name,
1043 void (*ctor)(struct kmem_cache *, void *)) 1043 void (*ctor)(void *))
1044{ 1044{
1045 return flags; 1045 return flags;
1046} 1046}
@@ -1103,7 +1103,7 @@ static void setup_object(struct kmem_cache *s, struct page *page,
1103{ 1103{
1104 setup_object_debug(s, page, object); 1104 setup_object_debug(s, page, object);
1105 if (unlikely(s->ctor)) 1105 if (unlikely(s->ctor))
1106 s->ctor(s, object); 1106 s->ctor(object);
1107} 1107}
1108 1108
1109static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1109static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -2286,7 +2286,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2286static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, 2286static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2287 const char *name, size_t size, 2287 const char *name, size_t size,
2288 size_t align, unsigned long flags, 2288 size_t align, unsigned long flags,
2289 void (*ctor)(struct kmem_cache *, void *)) 2289 void (*ctor)(void *))
2290{ 2290{
2291 memset(s, 0, kmem_size); 2291 memset(s, 0, kmem_size);
2292 s->name = name; 2292 s->name = name;
@@ -3042,7 +3042,7 @@ static int slab_unmergeable(struct kmem_cache *s)
3042 3042
3043static struct kmem_cache *find_mergeable(size_t size, 3043static struct kmem_cache *find_mergeable(size_t size,
3044 size_t align, unsigned long flags, const char *name, 3044 size_t align, unsigned long flags, const char *name,
3045 void (*ctor)(struct kmem_cache *, void *)) 3045 void (*ctor)(void *))
3046{ 3046{
3047 struct kmem_cache *s; 3047 struct kmem_cache *s;
3048 3048
@@ -3082,8 +3082,7 @@ static struct kmem_cache *find_mergeable(size_t size,
3082} 3082}
3083 3083
3084struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3084struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3085 size_t align, unsigned long flags, 3085 size_t align, unsigned long flags, void (*ctor)(void *))
3086 void (*ctor)(struct kmem_cache *, void *))
3087{ 3086{
3088 struct kmem_cache *s; 3087 struct kmem_cache *s;
3089 3088
diff --git a/mm/sparse.c b/mm/sparse.c
index 8ffc08990008..5d9dbbb9d39e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -377,7 +377,7 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
377} 377}
378#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ 378#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
379 379
380struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) 380static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
381{ 381{
382 struct page *map; 382 struct page *map;
383 struct mem_section *ms = __nr_to_section(pnum); 383 struct mem_section *ms = __nr_to_section(pnum);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d8aadaf2a0ba..b8035b055129 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,7 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
39 39
40struct address_space swapper_space = { 40struct address_space swapper_space = {
41 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 41 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
42 .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock), 42 .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
43 .a_ops = &swap_aops, 43 .a_ops = &swap_aops,
44 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 44 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
45 .backing_dev_info = &swap_backing_dev_info, 45 .backing_dev_info = &swap_backing_dev_info,
@@ -56,7 +56,8 @@ static struct {
56 56
57void show_swap_cache_info(void) 57void show_swap_cache_info(void)
58{ 58{
59 printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n", 59 printk("%lu pages in swap cache\n", total_swapcache_pages);
60 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
60 swap_cache_info.add_total, swap_cache_info.del_total, 61 swap_cache_info.add_total, swap_cache_info.del_total,
61 swap_cache_info.find_success, swap_cache_info.find_total); 62 swap_cache_info.find_success, swap_cache_info.find_total);
62 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 63 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
@@ -64,7 +65,7 @@ void show_swap_cache_info(void)
64} 65}
65 66
66/* 67/*
67 * add_to_swap_cache resembles add_to_page_cache on swapper_space, 68 * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
68 * but sets SwapCache flag and private instead of mapping and index. 69 * but sets SwapCache flag and private instead of mapping and index.
69 */ 70 */
70int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) 71int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
@@ -76,19 +77,26 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
76 BUG_ON(PagePrivate(page)); 77 BUG_ON(PagePrivate(page));
77 error = radix_tree_preload(gfp_mask); 78 error = radix_tree_preload(gfp_mask);
78 if (!error) { 79 if (!error) {
79 write_lock_irq(&swapper_space.tree_lock); 80 page_cache_get(page);
81 SetPageSwapCache(page);
82 set_page_private(page, entry.val);
83
84 spin_lock_irq(&swapper_space.tree_lock);
80 error = radix_tree_insert(&swapper_space.page_tree, 85 error = radix_tree_insert(&swapper_space.page_tree,
81 entry.val, page); 86 entry.val, page);
82 if (!error) { 87 if (likely(!error)) {
83 page_cache_get(page);
84 SetPageSwapCache(page);
85 set_page_private(page, entry.val);
86 total_swapcache_pages++; 88 total_swapcache_pages++;
87 __inc_zone_page_state(page, NR_FILE_PAGES); 89 __inc_zone_page_state(page, NR_FILE_PAGES);
88 INC_CACHE_INFO(add_total); 90 INC_CACHE_INFO(add_total);
89 } 91 }
90 write_unlock_irq(&swapper_space.tree_lock); 92 spin_unlock_irq(&swapper_space.tree_lock);
91 radix_tree_preload_end(); 93 radix_tree_preload_end();
94
95 if (unlikely(error)) {
96 set_page_private(page, 0UL);
97 ClearPageSwapCache(page);
98 page_cache_release(page);
99 }
92 } 100 }
93 return error; 101 return error;
94} 102}
@@ -175,9 +183,9 @@ void delete_from_swap_cache(struct page *page)
175 183
176 entry.val = page_private(page); 184 entry.val = page_private(page);
177 185
178 write_lock_irq(&swapper_space.tree_lock); 186 spin_lock_irq(&swapper_space.tree_lock);
179 __delete_from_swap_cache(page); 187 __delete_from_swap_cache(page);
180 write_unlock_irq(&swapper_space.tree_lock); 188 spin_unlock_irq(&swapper_space.tree_lock);
181 189
182 swap_free(entry); 190 swap_free(entry);
183 page_cache_release(page); 191 page_cache_release(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2f33edb8bee9..6beb6251e99d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -33,8 +33,8 @@
33#include <asm/tlbflush.h> 33#include <asm/tlbflush.h>
34#include <linux/swapops.h> 34#include <linux/swapops.h>
35 35
36DEFINE_SPINLOCK(swap_lock); 36static DEFINE_SPINLOCK(swap_lock);
37unsigned int nr_swapfiles; 37static unsigned int nr_swapfiles;
38long total_swap_pages; 38long total_swap_pages;
39static int swap_overflow; 39static int swap_overflow;
40static int least_priority; 40static int least_priority;
@@ -44,7 +44,7 @@ static const char Unused_file[] = "Unused swap file entry ";
44static const char Bad_offset[] = "Bad swap offset entry "; 44static const char Bad_offset[] = "Bad swap offset entry ";
45static const char Unused_offset[] = "Unused swap offset entry "; 45static const char Unused_offset[] = "Unused swap offset entry ";
46 46
47struct swap_list_t swap_list = {-1, -1}; 47static struct swap_list_t swap_list = {-1, -1};
48 48
49static struct swap_info_struct swap_info[MAX_SWAPFILES]; 49static struct swap_info_struct swap_info[MAX_SWAPFILES];
50 50
@@ -369,13 +369,13 @@ int remove_exclusive_swap_page(struct page *page)
369 retval = 0; 369 retval = 0;
370 if (p->swap_map[swp_offset(entry)] == 1) { 370 if (p->swap_map[swp_offset(entry)] == 1) {
371 /* Recheck the page count with the swapcache lock held.. */ 371 /* Recheck the page count with the swapcache lock held.. */
372 write_lock_irq(&swapper_space.tree_lock); 372 spin_lock_irq(&swapper_space.tree_lock);
373 if ((page_count(page) == 2) && !PageWriteback(page)) { 373 if ((page_count(page) == 2) && !PageWriteback(page)) {
374 __delete_from_swap_cache(page); 374 __delete_from_swap_cache(page);
375 SetPageDirty(page); 375 SetPageDirty(page);
376 retval = 1; 376 retval = 1;
377 } 377 }
378 write_unlock_irq(&swapper_space.tree_lock); 378 spin_unlock_irq(&swapper_space.tree_lock);
379 } 379 }
380 spin_unlock(&swap_lock); 380 spin_unlock(&swap_lock);
381 381
diff --git a/mm/truncate.c b/mm/truncate.c
index b8961cb63414..e68443d74567 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -349,18 +349,18 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
349 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) 349 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
350 return 0; 350 return 0;
351 351
352 write_lock_irq(&mapping->tree_lock); 352 spin_lock_irq(&mapping->tree_lock);
353 if (PageDirty(page)) 353 if (PageDirty(page))
354 goto failed; 354 goto failed;
355 355
356 BUG_ON(PagePrivate(page)); 356 BUG_ON(PagePrivate(page));
357 __remove_from_page_cache(page); 357 __remove_from_page_cache(page);
358 write_unlock_irq(&mapping->tree_lock); 358 spin_unlock_irq(&mapping->tree_lock);
359 ClearPageUptodate(page); 359 ClearPageUptodate(page);
360 page_cache_release(page); /* pagecache ref */ 360 page_cache_release(page); /* pagecache ref */
361 return 1; 361 return 1;
362failed: 362failed:
363 write_unlock_irq(&mapping->tree_lock); 363 spin_unlock_irq(&mapping->tree_lock);
364 return 0; 364 return 0;
365} 365}
366 366
diff --git a/mm/util.c b/mm/util.c
index 8f18683825bc..9341ca77bd88 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1,7 +1,9 @@
1#include <linux/mm.h>
1#include <linux/slab.h> 2#include <linux/slab.h>
2#include <linux/string.h> 3#include <linux/string.h>
3#include <linux/module.h> 4#include <linux/module.h>
4#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/sched.h>
5#include <asm/uaccess.h> 7#include <asm/uaccess.h>
6 8
7/** 9/**
@@ -68,25 +70,22 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
68EXPORT_SYMBOL(kmemdup); 70EXPORT_SYMBOL(kmemdup);
69 71
70/** 72/**
71 * krealloc - reallocate memory. The contents will remain unchanged. 73 * __krealloc - like krealloc() but don't free @p.
72 * @p: object to reallocate memory for. 74 * @p: object to reallocate memory for.
73 * @new_size: how many bytes of memory are required. 75 * @new_size: how many bytes of memory are required.
74 * @flags: the type of memory to allocate. 76 * @flags: the type of memory to allocate.
75 * 77 *
76 * The contents of the object pointed to are preserved up to the 78 * This function is like krealloc() except it never frees the originally
77 * lesser of the new and old sizes. If @p is %NULL, krealloc() 79 * allocated buffer. Use this if you don't want to free the buffer immediately
78 * behaves exactly like kmalloc(). If @size is 0 and @p is not a 80 * like, for example, with RCU.
79 * %NULL pointer, the object pointed to is freed.
80 */ 81 */
81void *krealloc(const void *p, size_t new_size, gfp_t flags) 82void *__krealloc(const void *p, size_t new_size, gfp_t flags)
82{ 83{
83 void *ret; 84 void *ret;
84 size_t ks = 0; 85 size_t ks = 0;
85 86
86 if (unlikely(!new_size)) { 87 if (unlikely(!new_size))
87 kfree(p);
88 return ZERO_SIZE_PTR; 88 return ZERO_SIZE_PTR;
89 }
90 89
91 if (p) 90 if (p)
92 ks = ksize(p); 91 ks = ksize(p);
@@ -95,10 +94,37 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
95 return (void *)p; 94 return (void *)p;
96 95
97 ret = kmalloc_track_caller(new_size, flags); 96 ret = kmalloc_track_caller(new_size, flags);
98 if (ret && p) { 97 if (ret && p)
99 memcpy(ret, p, ks); 98 memcpy(ret, p, ks);
99
100 return ret;
101}
102EXPORT_SYMBOL(__krealloc);
103
104/**
105 * krealloc - reallocate memory. The contents will remain unchanged.
106 * @p: object to reallocate memory for.
107 * @new_size: how many bytes of memory are required.
108 * @flags: the type of memory to allocate.
109 *
110 * The contents of the object pointed to are preserved up to the
111 * lesser of the new and old sizes. If @p is %NULL, krealloc()
112 * behaves exactly like kmalloc(). If @size is 0 and @p is not a
113 * %NULL pointer, the object pointed to is freed.
114 */
115void *krealloc(const void *p, size_t new_size, gfp_t flags)
116{
117 void *ret;
118
119 if (unlikely(!new_size)) {
100 kfree(p); 120 kfree(p);
121 return ZERO_SIZE_PTR;
101 } 122 }
123
124 ret = __krealloc(p, new_size, flags);
125 if (ret && p != ret)
126 kfree(p);
127
102 return ret; 128 return ret;
103} 129}
104EXPORT_SYMBOL(krealloc); 130EXPORT_SYMBOL(krealloc);
@@ -136,3 +162,12 @@ char *strndup_user(const char __user *s, long n)
136 return p; 162 return p;
137} 163}
138EXPORT_SYMBOL(strndup_user); 164EXPORT_SYMBOL(strndup_user);
165
166#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
167void arch_pick_mmap_layout(struct mm_struct *mm)
168{
169 mm->mmap_base = TASK_UNMAPPED_BASE;
170 mm->get_unmapped_area = arch_get_unmapped_area;
171 mm->unmap_area = arch_unmap_area;
172}
173#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35f293816294..85b9a0d2c877 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -381,16 +381,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
381 return; 381 return;
382 382
383 if ((PAGE_SIZE-1) & (unsigned long)addr) { 383 if ((PAGE_SIZE-1) & (unsigned long)addr) {
384 printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr); 384 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
385 WARN_ON(1);
386 return; 385 return;
387 } 386 }
388 387
389 area = remove_vm_area(addr); 388 area = remove_vm_area(addr);
390 if (unlikely(!area)) { 389 if (unlikely(!area)) {
391 printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", 390 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
392 addr); 391 addr);
393 WARN_ON(1);
394 return; 392 return;
395 } 393 }
396 394
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26672c6cd3ce..8f71761bc4b7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -391,17 +391,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
391} 391}
392 392
393/* 393/*
394 * Attempt to detach a locked page from its ->mapping. If it is dirty or if 394 * Same as remove_mapping, but if the page is removed from the mapping, it
395 * someone else has a ref on the page, abort and return 0. If it was 395 * gets returned with a refcount of 0.
396 * successfully detached, return 1. Assumes the caller has a single ref on
397 * this page.
398 */ 396 */
399int remove_mapping(struct address_space *mapping, struct page *page) 397static int __remove_mapping(struct address_space *mapping, struct page *page)
400{ 398{
401 BUG_ON(!PageLocked(page)); 399 BUG_ON(!PageLocked(page));
402 BUG_ON(mapping != page_mapping(page)); 400 BUG_ON(mapping != page_mapping(page));
403 401
404 write_lock_irq(&mapping->tree_lock); 402 spin_lock_irq(&mapping->tree_lock);
405 /* 403 /*
406 * The non racy check for a busy page. 404 * The non racy check for a busy page.
407 * 405 *
@@ -427,28 +425,48 @@ int remove_mapping(struct address_space *mapping, struct page *page)
427 * Note that if SetPageDirty is always performed via set_page_dirty, 425 * Note that if SetPageDirty is always performed via set_page_dirty,
428 * and thus under tree_lock, then this ordering is not required. 426 * and thus under tree_lock, then this ordering is not required.
429 */ 427 */
430 if (unlikely(page_count(page) != 2)) 428 if (!page_freeze_refs(page, 2))
431 goto cannot_free; 429 goto cannot_free;
432 smp_rmb(); 430 /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
433 if (unlikely(PageDirty(page))) 431 if (unlikely(PageDirty(page))) {
432 page_unfreeze_refs(page, 2);
434 goto cannot_free; 433 goto cannot_free;
434 }
435 435
436 if (PageSwapCache(page)) { 436 if (PageSwapCache(page)) {
437 swp_entry_t swap = { .val = page_private(page) }; 437 swp_entry_t swap = { .val = page_private(page) };
438 __delete_from_swap_cache(page); 438 __delete_from_swap_cache(page);
439 write_unlock_irq(&mapping->tree_lock); 439 spin_unlock_irq(&mapping->tree_lock);
440 swap_free(swap); 440 swap_free(swap);
441 __put_page(page); /* The pagecache ref */ 441 } else {
442 return 1; 442 __remove_from_page_cache(page);
443 spin_unlock_irq(&mapping->tree_lock);
443 } 444 }
444 445
445 __remove_from_page_cache(page);
446 write_unlock_irq(&mapping->tree_lock);
447 __put_page(page);
448 return 1; 446 return 1;
449 447
450cannot_free: 448cannot_free:
451 write_unlock_irq(&mapping->tree_lock); 449 spin_unlock_irq(&mapping->tree_lock);
450 return 0;
451}
452
453/*
454 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
455 * someone else has a ref on the page, abort and return 0. If it was
456 * successfully detached, return 1. Assumes the caller has a single ref on
457 * this page.
458 */
459int remove_mapping(struct address_space *mapping, struct page *page)
460{
461 if (__remove_mapping(mapping, page)) {
462 /*
463 * Unfreezing the refcount with 1 rather than 2 effectively
464 * drops the pagecache ref for us without requiring another
465 * atomic operation.
466 */
467 page_unfreeze_refs(page, 1);
468 return 1;
469 }
452 return 0; 470 return 0;
453} 471}
454 472
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list,
598 if (PagePrivate(page)) { 616 if (PagePrivate(page)) {
599 if (!try_to_release_page(page, sc->gfp_mask)) 617 if (!try_to_release_page(page, sc->gfp_mask))
600 goto activate_locked; 618 goto activate_locked;
601 if (!mapping && page_count(page) == 1) 619 if (!mapping && page_count(page) == 1) {
602 goto free_it; 620 unlock_page(page);
621 if (put_page_testzero(page))
622 goto free_it;
623 else {
624 /*
625 * rare race with speculative reference.
626 * the speculative reference will free
627 * this page shortly, so we may
628 * increment nr_reclaimed here (and
629 * leave it off the LRU).
630 */
631 nr_reclaimed++;
632 continue;
633 }
634 }
603 } 635 }
604 636
605 if (!mapping || !remove_mapping(mapping, page)) 637 if (!mapping || !__remove_mapping(mapping, page))
606 goto keep_locked; 638 goto keep_locked;
607 639
608free_it:
609 unlock_page(page); 640 unlock_page(page);
641free_it:
610 nr_reclaimed++; 642 nr_reclaimed++;
611 if (!pagevec_add(&freed_pvec, page)) 643 if (!pagevec_add(&freed_pvec, page)) {
612 __pagevec_release_nonlru(&freed_pvec); 644 __pagevec_free(&freed_pvec);
645 pagevec_reinit(&freed_pvec);
646 }
613 continue; 647 continue;
614 648
615activate_locked: 649activate_locked:
@@ -623,7 +657,7 @@ keep:
623 } 657 }
624 list_splice(&ret_pages, page_list); 658 list_splice(&ret_pages, page_list);
625 if (pagevec_count(&freed_pvec)) 659 if (pagevec_count(&freed_pvec))
626 __pagevec_release_nonlru(&freed_pvec); 660 __pagevec_free(&freed_pvec);
627 count_vm_events(PGACTIVATE, pgactivate); 661 count_vm_events(PGACTIVATE, pgactivate);
628 return nr_reclaimed; 662 return nr_reclaimed;
629} 663}