aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorFelix Blyakher <felixb@sgi.com>2009-04-01 17:58:39 -0400
committerFelix Blyakher <felixb@sgi.com>2009-04-01 17:58:39 -0400
commitf36345ff9a4a77f2cc576a2777b6256d5c8798fa (patch)
tree7ae4c607f6baae74060c2e385f744e171fbbf92b /mm
parent1aacc064e029f0017384e463121b98f06d3a2cc3 (diff)
parent8b53ef33d9d8fa5f771ae11cc6a6e7bc0182beec (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/Kconfig.debug17
-rw-r--r--mm/Makefile1
-rw-r--r--mm/debug-pagealloc.c129
-rw-r--r--mm/highmem.c45
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/internal.h8
-rw-r--r--mm/memory.c33
-rw-r--r--mm/oom_kill.c12
-rw-r--r--mm/page-writeback.c42
-rw-r--r--mm/page_alloc.c29
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/sparse.c4
-rw-r--r--mm/swap.c23
-rw-r--r--mm/util.c30
-rw-r--r--mm/vmalloc.c19
-rw-r--r--mm/vmscan.c101
-rw-r--r--mm/vmstat.c11
18 files changed, 363 insertions, 159 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index a5b77811fdf..b53427ad30a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -206,7 +206,6 @@ config VIRT_TO_BUS
206config UNEVICTABLE_LRU 206config UNEVICTABLE_LRU
207 bool "Add LRU list to track non-evictable pages" 207 bool "Add LRU list to track non-evictable pages"
208 default y 208 default y
209 depends on MMU
210 help 209 help
211 Keeps unevictable pages off of the active and inactive pageout 210 Keeps unevictable pages off of the active and inactive pageout
212 lists, so kswapd will not waste CPU time or have its balancing 211 lists, so kswapd will not waste CPU time or have its balancing
@@ -214,5 +213,13 @@ config UNEVICTABLE_LRU
214 will use one page flag and increase the code size a little, 213 will use one page flag and increase the code size a little,
215 say Y unless you know what you are doing. 214 say Y unless you know what you are doing.
216 215
216config HAVE_MLOCK
217 bool
218 default y if MMU=y
219
220config HAVE_MLOCKED_PAGE_BIT
221 bool
222 default y if HAVE_MLOCK=y && UNEVICTABLE_LRU=y
223
217config MMU_NOTIFIER 224config MMU_NOTIFIER
218 bool 225 bool
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
new file mode 100644
index 00000000000..c8d62d49a44
--- /dev/null
+++ b/mm/Kconfig.debug
@@ -0,0 +1,17 @@
1config WANT_PAGE_DEBUG_FLAGS
2 bool
3
4config PAGE_POISONING
5 bool "Debug page memory allocations"
6 depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC
7 depends on !HIBERNATION
8 select DEBUG_PAGEALLOC
9 select WANT_PAGE_DEBUG_FLAGS
10 help
11 Fill the pages with poison patterns after free_pages() and verify
12 the patterns before alloc_pages(). This results in a large slowdown,
13 but helps to find certain types of memory corruptions.
14
15 This option cannot enalbe with hibernation. Otherwise, it will get
16 wrong messages for memory corruption because the free pages are not
17 saved to the suspend image.
diff --git a/mm/Makefile b/mm/Makefile
index 818569b68f4..ec73c68b601 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
24obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 24obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
25obj-$(CONFIG_SLOB) += slob.o 25obj-$(CONFIG_SLOB) += slob.o
26obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 26obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
27obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
27obj-$(CONFIG_SLAB) += slab.o 28obj-$(CONFIG_SLAB) += slab.o
28obj-$(CONFIG_SLUB) += slub.o 29obj-$(CONFIG_SLUB) += slub.o
29obj-$(CONFIG_FAILSLAB) += failslab.o 30obj-$(CONFIG_FAILSLAB) += failslab.o
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
new file mode 100644
index 00000000000..a1e3324de2b
--- /dev/null
+++ b/mm/debug-pagealloc.c
@@ -0,0 +1,129 @@
1#include <linux/kernel.h>
2#include <linux/mm.h>
3#include <linux/page-debug-flags.h>
4#include <linux/poison.h>
5
6static inline void set_page_poison(struct page *page)
7{
8 __set_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags);
9}
10
11static inline void clear_page_poison(struct page *page)
12{
13 __clear_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags);
14}
15
16static inline bool page_poison(struct page *page)
17{
18 return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags);
19}
20
21static void poison_highpage(struct page *page)
22{
23 /*
24 * Page poisoning for highmem pages is not implemented.
25 *
26 * This can be called from interrupt contexts.
27 * So we need to create a new kmap_atomic slot for this
28 * application and it will need interrupt protection.
29 */
30}
31
32static void poison_page(struct page *page)
33{
34 void *addr;
35
36 if (PageHighMem(page)) {
37 poison_highpage(page);
38 return;
39 }
40 set_page_poison(page);
41 addr = page_address(page);
42 memset(addr, PAGE_POISON, PAGE_SIZE);
43}
44
45static void poison_pages(struct page *page, int n)
46{
47 int i;
48
49 for (i = 0; i < n; i++)
50 poison_page(page + i);
51}
52
53static bool single_bit_flip(unsigned char a, unsigned char b)
54{
55 unsigned char error = a ^ b;
56
57 return error && !(error & (error - 1));
58}
59
60static void check_poison_mem(unsigned char *mem, size_t bytes)
61{
62 unsigned char *start;
63 unsigned char *end;
64
65 for (start = mem; start < mem + bytes; start++) {
66 if (*start != PAGE_POISON)
67 break;
68 }
69 if (start == mem + bytes)
70 return;
71
72 for (end = mem + bytes - 1; end > start; end--) {
73 if (*end != PAGE_POISON)
74 break;
75 }
76
77 if (!printk_ratelimit())
78 return;
79 else if (start == end && single_bit_flip(*start, PAGE_POISON))
80 printk(KERN_ERR "pagealloc: single bit error\n");
81 else
82 printk(KERN_ERR "pagealloc: memory corruption\n");
83
84 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start,
85 end - start + 1, 1);
86 dump_stack();
87}
88
89static void unpoison_highpage(struct page *page)
90{
91 /*
92 * See comment in poison_highpage().
93 * Highmem pages should not be poisoned for now
94 */
95 BUG_ON(page_poison(page));
96}
97
98static void unpoison_page(struct page *page)
99{
100 if (PageHighMem(page)) {
101 unpoison_highpage(page);
102 return;
103 }
104 if (page_poison(page)) {
105 void *addr = page_address(page);
106
107 check_poison_mem(addr, PAGE_SIZE);
108 clear_page_poison(page);
109 }
110}
111
112static void unpoison_pages(struct page *page, int n)
113{
114 int i;
115
116 for (i = 0; i < n; i++)
117 unpoison_page(page + i);
118}
119
120void kernel_map_pages(struct page *page, int numpages, int enable)
121{
122 if (!debug_pagealloc_enabled)
123 return;
124
125 if (enable)
126 unpoison_pages(page, numpages);
127 else
128 poison_pages(page, numpages);
129}
diff --git a/mm/highmem.c b/mm/highmem.c
index 910198037bf..68eb1d9b63f 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -422,3 +422,48 @@ void __init page_address_init(void)
422} 422}
423 423
424#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ 424#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
425
426#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)
427
428void debug_kmap_atomic(enum km_type type)
429{
430 static unsigned warn_count = 10;
431
432 if (unlikely(warn_count == 0))
433 return;
434
435 if (unlikely(in_interrupt())) {
436 if (in_irq()) {
437 if (type != KM_IRQ0 && type != KM_IRQ1 &&
438 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
439 type != KM_BOUNCE_READ) {
440 WARN_ON(1);
441 warn_count--;
442 }
443 } else if (!irqs_disabled()) { /* softirq */
444 if (type != KM_IRQ0 && type != KM_IRQ1 &&
445 type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
446 type != KM_SKB_SUNRPC_DATA &&
447 type != KM_SKB_DATA_SOFTIRQ &&
448 type != KM_BOUNCE_READ) {
449 WARN_ON(1);
450 warn_count--;
451 }
452 }
453 }
454
455 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
456 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
457 if (!irqs_disabled()) {
458 WARN_ON(1);
459 warn_count--;
460 }
461 } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
462 if (irq_count() == 0 && !irqs_disabled()) {
463 WARN_ON(1);
464 warn_count--;
465 }
466 }
467}
468
469#endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 107da3d809a..28c655ba935 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -918,7 +918,7 @@ static void return_unused_surplus_pages(struct hstate *h,
918 * an instantiated the change should be committed via vma_commit_reservation. 918 * an instantiated the change should be committed via vma_commit_reservation.
919 * No action is required on failure. 919 * No action is required on failure.
920 */ 920 */
921static int vma_needs_reservation(struct hstate *h, 921static long vma_needs_reservation(struct hstate *h,
922 struct vm_area_struct *vma, unsigned long addr) 922 struct vm_area_struct *vma, unsigned long addr)
923{ 923{
924 struct address_space *mapping = vma->vm_file->f_mapping; 924 struct address_space *mapping = vma->vm_file->f_mapping;
@@ -933,7 +933,7 @@ static int vma_needs_reservation(struct hstate *h,
933 return 1; 933 return 1;
934 934
935 } else { 935 } else {
936 int err; 936 long err;
937 pgoff_t idx = vma_hugecache_offset(h, vma, addr); 937 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
938 struct resv_map *reservations = vma_resv_map(vma); 938 struct resv_map *reservations = vma_resv_map(vma);
939 939
@@ -969,7 +969,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
969 struct page *page; 969 struct page *page;
970 struct address_space *mapping = vma->vm_file->f_mapping; 970 struct address_space *mapping = vma->vm_file->f_mapping;
971 struct inode *inode = mapping->host; 971 struct inode *inode = mapping->host;
972 unsigned int chg; 972 long chg;
973 973
974 /* 974 /*
975 * Processes that did not create the mapping will have no reserves and 975 * Processes that did not create the mapping will have no reserves and
diff --git a/mm/internal.h b/mm/internal.h
index 478223b73a2..987bb03fbdd 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -63,6 +63,7 @@ static inline unsigned long page_order(struct page *page)
63 return page_private(page); 63 return page_private(page);
64} 64}
65 65
66#ifdef CONFIG_HAVE_MLOCK
66extern long mlock_vma_pages_range(struct vm_area_struct *vma, 67extern long mlock_vma_pages_range(struct vm_area_struct *vma,
67 unsigned long start, unsigned long end); 68 unsigned long start, unsigned long end);
68extern void munlock_vma_pages_range(struct vm_area_struct *vma, 69extern void munlock_vma_pages_range(struct vm_area_struct *vma,
@@ -71,6 +72,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
71{ 72{
72 munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); 73 munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
73} 74}
75#endif
74 76
75#ifdef CONFIG_UNEVICTABLE_LRU 77#ifdef CONFIG_UNEVICTABLE_LRU
76/* 78/*
@@ -90,7 +92,7 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old)
90} 92}
91#endif 93#endif
92 94
93#ifdef CONFIG_UNEVICTABLE_LRU 95#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
94/* 96/*
95 * Called only in fault path via page_evictable() for a new page 97 * Called only in fault path via page_evictable() for a new page
96 * to determine if it's being mapped into a LOCKED vma. 98 * to determine if it's being mapped into a LOCKED vma.
@@ -165,7 +167,7 @@ static inline void free_page_mlock(struct page *page)
165 } 167 }
166} 168}
167 169
168#else /* CONFIG_UNEVICTABLE_LRU */ 170#else /* CONFIG_HAVE_MLOCKED_PAGE_BIT */
169static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) 171static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
170{ 172{
171 return 0; 173 return 0;
@@ -175,7 +177,7 @@ static inline void mlock_vma_page(struct page *page) { }
175static inline void mlock_migrate_page(struct page *new, struct page *old) { } 177static inline void mlock_migrate_page(struct page *new, struct page *old) { }
176static inline void free_page_mlock(struct page *page) { } 178static inline void free_page_mlock(struct page *page) { }
177 179
178#endif /* CONFIG_UNEVICTABLE_LRU */ 180#endif /* CONFIG_HAVE_MLOCKED_PAGE_BIT */
179 181
180/* 182/*
181 * Return the mem_map entry representing the 'offset' subpage within 183 * Return the mem_map entry representing the 'offset' subpage within
diff --git a/mm/memory.c b/mm/memory.c
index 2032ad2fc34..cf6873e91c6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1151,6 +1151,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1151 if ((flags & FOLL_WRITE) && 1151 if ((flags & FOLL_WRITE) &&
1152 !pte_dirty(pte) && !PageDirty(page)) 1152 !pte_dirty(pte) && !PageDirty(page))
1153 set_page_dirty(page); 1153 set_page_dirty(page);
1154 /*
1155 * pte_mkyoung() would be more correct here, but atomic care
1156 * is needed to avoid losing the dirty bit: it is easier to use
1157 * mark_page_accessed().
1158 */
1154 mark_page_accessed(page); 1159 mark_page_accessed(page);
1155 } 1160 }
1156unlock: 1161unlock:
@@ -1940,6 +1945,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1940 * get_user_pages(.write=1, .force=1). 1945 * get_user_pages(.write=1, .force=1).
1941 */ 1946 */
1942 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { 1947 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1948 struct vm_fault vmf;
1949 int tmp;
1950
1951 vmf.virtual_address = (void __user *)(address &
1952 PAGE_MASK);
1953 vmf.pgoff = old_page->index;
1954 vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1955 vmf.page = old_page;
1956
1943 /* 1957 /*
1944 * Notify the address space that the page is about to 1958 * Notify the address space that the page is about to
1945 * become writable so that it can prohibit this or wait 1959 * become writable so that it can prohibit this or wait
@@ -1951,8 +1965,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1951 page_cache_get(old_page); 1965 page_cache_get(old_page);
1952 pte_unmap_unlock(page_table, ptl); 1966 pte_unmap_unlock(page_table, ptl);
1953 1967
1954 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) 1968 tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1969 if (unlikely(tmp &
1970 (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1971 ret = tmp;
1955 goto unwritable_page; 1972 goto unwritable_page;
1973 }
1956 1974
1957 /* 1975 /*
1958 * Since we dropped the lock we need to revalidate 1976 * Since we dropped the lock we need to revalidate
@@ -2101,7 +2119,7 @@ oom:
2101 2119
2102unwritable_page: 2120unwritable_page:
2103 page_cache_release(old_page); 2121 page_cache_release(old_page);
2104 return VM_FAULT_SIGBUS; 2122 return ret;
2105} 2123}
2106 2124
2107/* 2125/*
@@ -2435,8 +2453,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2435 count_vm_event(PGMAJFAULT); 2453 count_vm_event(PGMAJFAULT);
2436 } 2454 }
2437 2455
2438 mark_page_accessed(page);
2439
2440 lock_page(page); 2456 lock_page(page);
2441 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2457 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2442 2458
@@ -2645,9 +2661,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2645 * to become writable 2661 * to become writable
2646 */ 2662 */
2647 if (vma->vm_ops->page_mkwrite) { 2663 if (vma->vm_ops->page_mkwrite) {
2664 int tmp;
2665
2648 unlock_page(page); 2666 unlock_page(page);
2649 if (vma->vm_ops->page_mkwrite(vma, page) < 0) { 2667 vmf.flags |= FAULT_FLAG_MKWRITE;
2650 ret = VM_FAULT_SIGBUS; 2668 tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
2669 if (unlikely(tmp &
2670 (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
2671 ret = tmp;
2651 anon = 1; /* no anon but release vmf.page */ 2672 anon = 1; /* no anon but release vmf.page */
2652 goto out_unlocked; 2673 goto out_unlocked;
2653 } 2674 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 40ba05061a4..d3b9bac085b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -55,7 +55,7 @@ static DEFINE_SPINLOCK(zone_scan_lock);
55 55
56unsigned long badness(struct task_struct *p, unsigned long uptime) 56unsigned long badness(struct task_struct *p, unsigned long uptime)
57{ 57{
58 unsigned long points, cpu_time, run_time, s; 58 unsigned long points, cpu_time, run_time;
59 struct mm_struct *mm; 59 struct mm_struct *mm;
60 struct task_struct *child; 60 struct task_struct *child;
61 61
@@ -110,12 +110,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
110 else 110 else
111 run_time = 0; 111 run_time = 0;
112 112
113 s = int_sqrt(cpu_time); 113 if (cpu_time)
114 if (s) 114 points /= int_sqrt(cpu_time);
115 points /= s; 115 if (run_time)
116 s = int_sqrt(int_sqrt(run_time)); 116 points /= int_sqrt(int_sqrt(run_time));
117 if (s)
118 points /= s;
119 117
120 /* 118 /*
121 * Niced processes are most likely less important, so double 119 * Niced processes are most likely less important, so double
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 40ca7cdb653..30351f0063a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -92,14 +92,14 @@ int vm_dirty_ratio = 20;
92unsigned long vm_dirty_bytes; 92unsigned long vm_dirty_bytes;
93 93
94/* 94/*
95 * The interval between `kupdate'-style writebacks, in jiffies 95 * The interval between `kupdate'-style writebacks
96 */ 96 */
97int dirty_writeback_interval = 5 * HZ; 97unsigned int dirty_writeback_interval = 5 * 100; /* sentiseconds */
98 98
99/* 99/*
100 * The longest number of jiffies for which data is allowed to remain dirty 100 * The longest time for which data is allowed to remain dirty
101 */ 101 */
102int dirty_expire_interval = 30 * HZ; 102unsigned int dirty_expire_interval = 30 * 100; /* sentiseconds */
103 103
104/* 104/*
105 * Flag that makes the machine dump writes/reads and block dirtyings. 105 * Flag that makes the machine dump writes/reads and block dirtyings.
@@ -770,9 +770,9 @@ static void wb_kupdate(unsigned long arg)
770 770
771 sync_supers(); 771 sync_supers();
772 772
773 oldest_jif = jiffies - dirty_expire_interval; 773 oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval);
774 start_jif = jiffies; 774 start_jif = jiffies;
775 next_jif = start_jif + dirty_writeback_interval; 775 next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
776 nr_to_write = global_page_state(NR_FILE_DIRTY) + 776 nr_to_write = global_page_state(NR_FILE_DIRTY) +
777 global_page_state(NR_UNSTABLE_NFS) + 777 global_page_state(NR_UNSTABLE_NFS) +
778 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 778 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
@@ -801,9 +801,10 @@ static void wb_kupdate(unsigned long arg)
801int dirty_writeback_centisecs_handler(ctl_table *table, int write, 801int dirty_writeback_centisecs_handler(ctl_table *table, int write,
802 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 802 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
803{ 803{
804 proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos); 804 proc_dointvec(table, write, file, buffer, length, ppos);
805 if (dirty_writeback_interval) 805 if (dirty_writeback_interval)
806 mod_timer(&wb_timer, jiffies + dirty_writeback_interval); 806 mod_timer(&wb_timer, jiffies +
807 msecs_to_jiffies(dirty_writeback_interval * 10));
807 else 808 else
808 del_timer(&wb_timer); 809 del_timer(&wb_timer);
809 return 0; 810 return 0;
@@ -905,7 +906,8 @@ void __init page_writeback_init(void)
905{ 906{
906 int shift; 907 int shift;
907 908
908 mod_timer(&wb_timer, jiffies + dirty_writeback_interval); 909 mod_timer(&wb_timer,
910 jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
909 writeback_set_ratelimit(); 911 writeback_set_ratelimit();
910 register_cpu_notifier(&ratelimit_nb); 912 register_cpu_notifier(&ratelimit_nb);
911 913
@@ -1198,6 +1200,20 @@ int __set_page_dirty_no_writeback(struct page *page)
1198} 1200}
1199 1201
1200/* 1202/*
1203 * Helper function for set_page_dirty family.
1204 * NOTE: This relies on being atomic wrt interrupts.
1205 */
1206void account_page_dirtied(struct page *page, struct address_space *mapping)
1207{
1208 if (mapping_cap_account_dirty(mapping)) {
1209 __inc_zone_page_state(page, NR_FILE_DIRTY);
1210 __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
1211 task_dirty_inc(current);
1212 task_io_account_write(PAGE_CACHE_SIZE);
1213 }
1214}
1215
1216/*
1201 * For address_spaces which do not use buffers. Just tag the page as dirty in 1217 * For address_spaces which do not use buffers. Just tag the page as dirty in
1202 * its radix tree. 1218 * its radix tree.
1203 * 1219 *
@@ -1226,13 +1242,7 @@ int __set_page_dirty_nobuffers(struct page *page)
1226 if (mapping2) { /* Race with truncate? */ 1242 if (mapping2) { /* Race with truncate? */
1227 BUG_ON(mapping2 != mapping); 1243 BUG_ON(mapping2 != mapping);
1228 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); 1244 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
1229 if (mapping_cap_account_dirty(mapping)) { 1245 account_page_dirtied(page, mapping);
1230 __inc_zone_page_state(page, NR_FILE_DIRTY);
1231 __inc_bdi_stat(mapping->backing_dev_info,
1232 BDI_RECLAIMABLE);
1233 task_dirty_inc(current);
1234 task_io_account_write(PAGE_CACHE_SIZE);
1235 }
1236 radix_tree_tag_set(&mapping->page_tree, 1246 radix_tree_tag_set(&mapping->page_tree,
1237 page_index(page), PAGECACHE_TAG_DIRTY); 1247 page_index(page), PAGECACHE_TAG_DIRTY);
1238 } 1248 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a3803ea8c27..0284e528748 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -922,13 +922,10 @@ static void drain_pages(unsigned int cpu)
922 unsigned long flags; 922 unsigned long flags;
923 struct zone *zone; 923 struct zone *zone;
924 924
925 for_each_zone(zone) { 925 for_each_populated_zone(zone) {
926 struct per_cpu_pageset *pset; 926 struct per_cpu_pageset *pset;
927 struct per_cpu_pages *pcp; 927 struct per_cpu_pages *pcp;
928 928
929 if (!populated_zone(zone))
930 continue;
931
932 pset = zone_pcp(zone, cpu); 929 pset = zone_pcp(zone, cpu);
933 930
934 pcp = &pset->pcp; 931 pcp = &pset->pcp;
@@ -1585,7 +1582,8 @@ nofail_alloc:
1585 reclaim_state.reclaimed_slab = 0; 1582 reclaim_state.reclaimed_slab = 0;
1586 p->reclaim_state = &reclaim_state; 1583 p->reclaim_state = &reclaim_state;
1587 1584
1588 did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); 1585 did_some_progress = try_to_free_pages(zonelist, order,
1586 gfp_mask, nodemask);
1589 1587
1590 p->reclaim_state = NULL; 1588 p->reclaim_state = NULL;
1591 lockdep_clear_current_reclaim_state(); 1589 lockdep_clear_current_reclaim_state();
@@ -1879,10 +1877,7 @@ void show_free_areas(void)
1879 int cpu; 1877 int cpu;
1880 struct zone *zone; 1878 struct zone *zone;
1881 1879
1882 for_each_zone(zone) { 1880 for_each_populated_zone(zone) {
1883 if (!populated_zone(zone))
1884 continue;
1885
1886 show_node(zone); 1881 show_node(zone);
1887 printk("%s per-cpu:\n", zone->name); 1882 printk("%s per-cpu:\n", zone->name);
1888 1883
@@ -1922,12 +1917,9 @@ void show_free_areas(void)
1922 global_page_state(NR_PAGETABLE), 1917 global_page_state(NR_PAGETABLE),
1923 global_page_state(NR_BOUNCE)); 1918 global_page_state(NR_BOUNCE));
1924 1919
1925 for_each_zone(zone) { 1920 for_each_populated_zone(zone) {
1926 int i; 1921 int i;
1927 1922
1928 if (!populated_zone(zone))
1929 continue;
1930
1931 show_node(zone); 1923 show_node(zone);
1932 printk("%s" 1924 printk("%s"
1933 " free:%lukB" 1925 " free:%lukB"
@@ -1967,12 +1959,9 @@ void show_free_areas(void)
1967 printk("\n"); 1959 printk("\n");
1968 } 1960 }
1969 1961
1970 for_each_zone(zone) { 1962 for_each_populated_zone(zone) {
1971 unsigned long nr[MAX_ORDER], flags, order, total = 0; 1963 unsigned long nr[MAX_ORDER], flags, order, total = 0;
1972 1964
1973 if (!populated_zone(zone))
1974 continue;
1975
1976 show_node(zone); 1965 show_node(zone);
1977 printk("%s: ", zone->name); 1966 printk("%s: ", zone->name);
1978 1967
@@ -2784,11 +2773,7 @@ static int __cpuinit process_zones(int cpu)
2784 2773
2785 node_set_state(node, N_CPU); /* this node has a cpu */ 2774 node_set_state(node, N_CPU); /* this node has a cpu */
2786 2775
2787 for_each_zone(zone) { 2776 for_each_populated_zone(zone) {
2788
2789 if (!populated_zone(zone))
2790 continue;
2791
2792 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), 2777 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
2793 GFP_KERNEL, node); 2778 GFP_KERNEL, node);
2794 if (!zone_pcp(zone, cpu)) 2779 if (!zone_pcp(zone, cpu))
diff --git a/mm/shmem.c b/mm/shmem.c
index 7ec78e24a30..d94d2e9146b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1068,8 +1068,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1068 swap_duplicate(swap); 1068 swap_duplicate(swap);
1069 BUG_ON(page_mapped(page)); 1069 BUG_ON(page_mapped(page));
1070 page_cache_release(page); /* pagecache ref */ 1070 page_cache_release(page); /* pagecache ref */
1071 set_page_dirty(page); 1071 swap_writepage(page, wbc);
1072 unlock_page(page);
1073 if (inode) { 1072 if (inode) {
1074 mutex_lock(&shmem_swaplist_mutex); 1073 mutex_lock(&shmem_swaplist_mutex);
1075 /* move instead of add in case we're racing */ 1074 /* move instead of add in case we're racing */
diff --git a/mm/sparse.c b/mm/sparse.c
index 083f5b63e7a..da432d9f0ae 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -164,9 +164,7 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
164 WARN_ON_ONCE(1); 164 WARN_ON_ONCE(1);
165 *start_pfn = max_sparsemem_pfn; 165 *start_pfn = max_sparsemem_pfn;
166 *end_pfn = max_sparsemem_pfn; 166 *end_pfn = max_sparsemem_pfn;
167 } 167 } else if (*end_pfn > max_sparsemem_pfn) {
168
169 if (*end_pfn > max_sparsemem_pfn) {
170 mminit_dprintk(MMINIT_WARNING, "pfnvalidation", 168 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
171 "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n", 169 "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
172 *start_pfn, *end_pfn, max_sparsemem_pfn); 170 *start_pfn, *end_pfn, max_sparsemem_pfn);
diff --git a/mm/swap.c b/mm/swap.c
index 8adb9feb61e..6e83084c1f6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -457,29 +457,6 @@ void pagevec_strip(struct pagevec *pvec)
457} 457}
458 458
459/** 459/**
460 * pagevec_swap_free - try to free swap space from the pages in a pagevec
461 * @pvec: pagevec with swapcache pages to free the swap space of
462 *
463 * The caller needs to hold an extra reference to each page and
464 * not hold the page lock on the pages. This function uses a
465 * trylock on the page lock so it may not always free the swap
466 * space associated with a page.
467 */
468void pagevec_swap_free(struct pagevec *pvec)
469{
470 int i;
471
472 for (i = 0; i < pagevec_count(pvec); i++) {
473 struct page *page = pvec->pages[i];
474
475 if (PageSwapCache(page) && trylock_page(page)) {
476 try_to_free_swap(page);
477 unlock_page(page);
478 }
479 }
480}
481
482/**
483 * pagevec_lookup - gang pagecache lookup 460 * pagevec_lookup - gang pagecache lookup
484 * @pvec: Where the resulting pages are placed 461 * @pvec: Where the resulting pages are placed
485 * @mapping: The address_space to search 462 * @mapping: The address_space to search
diff --git a/mm/util.c b/mm/util.c
index 37eaccdf305..7c122e49f76 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -70,6 +70,36 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
70EXPORT_SYMBOL(kmemdup); 70EXPORT_SYMBOL(kmemdup);
71 71
72/** 72/**
73 * memdup_user - duplicate memory region from user space
74 *
75 * @src: source address in user space
76 * @len: number of bytes to copy
77 *
78 * Returns an ERR_PTR() on failure.
79 */
80void *memdup_user(const void __user *src, size_t len)
81{
82 void *p;
83
84 /*
85 * Always use GFP_KERNEL, since copy_from_user() can sleep and
86 * cause pagefault, which makes it pointless to use GFP_NOFS
87 * or GFP_ATOMIC.
88 */
89 p = kmalloc_track_caller(len, GFP_KERNEL);
90 if (!p)
91 return ERR_PTR(-ENOMEM);
92
93 if (copy_from_user(p, src, len)) {
94 kfree(p);
95 return ERR_PTR(-EFAULT);
96 }
97
98 return p;
99}
100EXPORT_SYMBOL(memdup_user);
101
102/**
73 * __krealloc - like krealloc() but don't free @p. 103 * __krealloc - like krealloc() but don't free @p.
74 * @p: object to reallocate memory for. 104 * @p: object to reallocate memory for.
75 * @new_size: how many bytes of memory are required. 105 * @new_size: how many bytes of memory are required.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index af58324c361..fab19876b4d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -671,10 +671,7 @@ struct vmap_block {
671 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); 671 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
672 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); 672 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
673 union { 673 union {
674 struct { 674 struct list_head free_list;
675 struct list_head free_list;
676 struct list_head dirty_list;
677 };
678 struct rcu_head rcu_head; 675 struct rcu_head rcu_head;
679 }; 676 };
680}; 677};
@@ -741,7 +738,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
741 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); 738 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
742 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); 739 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
743 INIT_LIST_HEAD(&vb->free_list); 740 INIT_LIST_HEAD(&vb->free_list);
744 INIT_LIST_HEAD(&vb->dirty_list);
745 741
746 vb_idx = addr_to_vb_idx(va->va_start); 742 vb_idx = addr_to_vb_idx(va->va_start);
747 spin_lock(&vmap_block_tree_lock); 743 spin_lock(&vmap_block_tree_lock);
@@ -772,12 +768,7 @@ static void free_vmap_block(struct vmap_block *vb)
772 struct vmap_block *tmp; 768 struct vmap_block *tmp;
773 unsigned long vb_idx; 769 unsigned long vb_idx;
774 770
775 spin_lock(&vb->vbq->lock); 771 BUG_ON(!list_empty(&vb->free_list));
776 if (!list_empty(&vb->free_list))
777 list_del(&vb->free_list);
778 if (!list_empty(&vb->dirty_list))
779 list_del(&vb->dirty_list);
780 spin_unlock(&vb->vbq->lock);
781 772
782 vb_idx = addr_to_vb_idx(vb->va->va_start); 773 vb_idx = addr_to_vb_idx(vb->va->va_start);
783 spin_lock(&vmap_block_tree_lock); 774 spin_lock(&vmap_block_tree_lock);
@@ -862,11 +853,7 @@ static void vb_free(const void *addr, unsigned long size)
862 853
863 spin_lock(&vb->lock); 854 spin_lock(&vb->lock);
864 bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); 855 bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
865 if (!vb->dirty) { 856
866 spin_lock(&vb->vbq->lock);
867 list_add(&vb->dirty_list, &vb->vbq->dirty);
868 spin_unlock(&vb->vbq->lock);
869 }
870 vb->dirty += 1UL << order; 857 vb->dirty += 1UL << order;
871 if (vb->dirty == VMAP_BBMAP_BITS) { 858 if (vb->dirty == VMAP_BBMAP_BITS) {
872 BUG_ON(vb->free || !list_empty(&vb->free_list)); 859 BUG_ON(vb->free || !list_empty(&vb->free_list));
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 479e4671939..06e72693b45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -60,8 +60,8 @@ struct scan_control {
60 60
61 int may_writepage; 61 int may_writepage;
62 62
63 /* Can pages be swapped as part of reclaim? */ 63 /* Can mapped pages be reclaimed? */
64 int may_swap; 64 int may_unmap;
65 65
66 /* This context's SWAP_CLUSTER_MAX. If freeing memory for 66 /* This context's SWAP_CLUSTER_MAX. If freeing memory for
67 * suspend, we effectively ignore SWAP_CLUSTER_MAX. 67 * suspend, we effectively ignore SWAP_CLUSTER_MAX.
@@ -78,6 +78,12 @@ struct scan_control {
78 /* Which cgroup do we reclaim from */ 78 /* Which cgroup do we reclaim from */
79 struct mem_cgroup *mem_cgroup; 79 struct mem_cgroup *mem_cgroup;
80 80
81 /*
82 * Nodemask of nodes allowed by the caller. If NULL, all nodes
83 * are scanned.
84 */
85 nodemask_t *nodemask;
86
81 /* Pluggable isolate pages callback */ 87 /* Pluggable isolate pages callback */
82 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, 88 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
83 unsigned long *scanned, int order, int mode, 89 unsigned long *scanned, int order, int mode,
@@ -214,8 +220,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
214 do_div(delta, lru_pages + 1); 220 do_div(delta, lru_pages + 1);
215 shrinker->nr += delta; 221 shrinker->nr += delta;
216 if (shrinker->nr < 0) { 222 if (shrinker->nr < 0) {
217 printk(KERN_ERR "%s: nr=%ld\n", 223 printk(KERN_ERR "shrink_slab: %pF negative objects to "
218 __func__, shrinker->nr); 224 "delete nr=%ld\n",
225 shrinker->shrink, shrinker->nr);
219 shrinker->nr = max_pass; 226 shrinker->nr = max_pass;
220 } 227 }
221 228
@@ -606,7 +613,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
606 if (unlikely(!page_evictable(page, NULL))) 613 if (unlikely(!page_evictable(page, NULL)))
607 goto cull_mlocked; 614 goto cull_mlocked;
608 615
609 if (!sc->may_swap && page_mapped(page)) 616 if (!sc->may_unmap && page_mapped(page))
610 goto keep_locked; 617 goto keep_locked;
611 618
612 /* Double the slab pressure for mapped and swapcache pages */ 619 /* Double the slab pressure for mapped and swapcache pages */
@@ -1298,17 +1305,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1298 } 1305 }
1299 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1306 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1300 pgdeactivate += pgmoved; 1307 pgdeactivate += pgmoved;
1301 if (buffer_heads_over_limit) {
1302 spin_unlock_irq(&zone->lru_lock);
1303 pagevec_strip(&pvec);
1304 spin_lock_irq(&zone->lru_lock);
1305 }
1306 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1308 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1307 __count_vm_events(PGDEACTIVATE, pgdeactivate); 1309 __count_vm_events(PGDEACTIVATE, pgdeactivate);
1308 spin_unlock_irq(&zone->lru_lock); 1310 spin_unlock_irq(&zone->lru_lock);
1309 if (vm_swap_full()) 1311 if (buffer_heads_over_limit)
1310 pagevec_swap_free(&pvec); 1312 pagevec_strip(&pvec);
1311
1312 pagevec_release(&pvec); 1313 pagevec_release(&pvec);
1313} 1314}
1314 1315
@@ -1543,7 +1544,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
1543 struct zone *zone; 1544 struct zone *zone;
1544 1545
1545 sc->all_unreclaimable = 1; 1546 sc->all_unreclaimable = 1;
1546 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1547 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
1548 sc->nodemask) {
1547 if (!populated_zone(zone)) 1549 if (!populated_zone(zone))
1548 continue; 1550 continue;
1549 /* 1551 /*
@@ -1688,17 +1690,18 @@ out:
1688} 1690}
1689 1691
1690unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 1692unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1691 gfp_t gfp_mask) 1693 gfp_t gfp_mask, nodemask_t *nodemask)
1692{ 1694{
1693 struct scan_control sc = { 1695 struct scan_control sc = {
1694 .gfp_mask = gfp_mask, 1696 .gfp_mask = gfp_mask,
1695 .may_writepage = !laptop_mode, 1697 .may_writepage = !laptop_mode,
1696 .swap_cluster_max = SWAP_CLUSTER_MAX, 1698 .swap_cluster_max = SWAP_CLUSTER_MAX,
1697 .may_swap = 1, 1699 .may_unmap = 1,
1698 .swappiness = vm_swappiness, 1700 .swappiness = vm_swappiness,
1699 .order = order, 1701 .order = order,
1700 .mem_cgroup = NULL, 1702 .mem_cgroup = NULL,
1701 .isolate_pages = isolate_pages_global, 1703 .isolate_pages = isolate_pages_global,
1704 .nodemask = nodemask,
1702 }; 1705 };
1703 1706
1704 return do_try_to_free_pages(zonelist, &sc); 1707 return do_try_to_free_pages(zonelist, &sc);
@@ -1713,17 +1716,18 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1713{ 1716{
1714 struct scan_control sc = { 1717 struct scan_control sc = {
1715 .may_writepage = !laptop_mode, 1718 .may_writepage = !laptop_mode,
1716 .may_swap = 1, 1719 .may_unmap = 1,
1717 .swap_cluster_max = SWAP_CLUSTER_MAX, 1720 .swap_cluster_max = SWAP_CLUSTER_MAX,
1718 .swappiness = swappiness, 1721 .swappiness = swappiness,
1719 .order = 0, 1722 .order = 0,
1720 .mem_cgroup = mem_cont, 1723 .mem_cgroup = mem_cont,
1721 .isolate_pages = mem_cgroup_isolate_pages, 1724 .isolate_pages = mem_cgroup_isolate_pages,
1725 .nodemask = NULL, /* we don't care the placement */
1722 }; 1726 };
1723 struct zonelist *zonelist; 1727 struct zonelist *zonelist;
1724 1728
1725 if (noswap) 1729 if (noswap)
1726 sc.may_swap = 0; 1730 sc.may_unmap = 0;
1727 1731
1728 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 1732 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1729 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 1733 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1762,7 +1766,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1762 struct reclaim_state *reclaim_state = current->reclaim_state; 1766 struct reclaim_state *reclaim_state = current->reclaim_state;
1763 struct scan_control sc = { 1767 struct scan_control sc = {
1764 .gfp_mask = GFP_KERNEL, 1768 .gfp_mask = GFP_KERNEL,
1765 .may_swap = 1, 1769 .may_unmap = 1,
1766 .swap_cluster_max = SWAP_CLUSTER_MAX, 1770 .swap_cluster_max = SWAP_CLUSTER_MAX,
1767 .swappiness = vm_swappiness, 1771 .swappiness = vm_swappiness,
1768 .order = order, 1772 .order = order,
@@ -2050,22 +2054,19 @@ unsigned long global_lru_pages(void)
2050#ifdef CONFIG_PM 2054#ifdef CONFIG_PM
2051/* 2055/*
2052 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages 2056 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
2053 * from LRU lists system-wide, for given pass and priority, and returns the 2057 * from LRU lists system-wide, for given pass and priority.
2054 * number of reclaimed pages
2055 * 2058 *
2056 * For pass > 3 we also try to shrink the LRU lists that contain a few pages 2059 * For pass > 3 we also try to shrink the LRU lists that contain a few pages
2057 */ 2060 */
2058static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, 2061static void shrink_all_zones(unsigned long nr_pages, int prio,
2059 int pass, struct scan_control *sc) 2062 int pass, struct scan_control *sc)
2060{ 2063{
2061 struct zone *zone; 2064 struct zone *zone;
2062 unsigned long ret = 0; 2065 unsigned long nr_reclaimed = 0;
2063 2066
2064 for_each_zone(zone) { 2067 for_each_populated_zone(zone) {
2065 enum lru_list l; 2068 enum lru_list l;
2066 2069
2067 if (!populated_zone(zone))
2068 continue;
2069 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) 2070 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
2070 continue; 2071 continue;
2071 2072
@@ -2084,14 +2085,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
2084 2085
2085 zone->lru[l].nr_scan = 0; 2086 zone->lru[l].nr_scan = 0;
2086 nr_to_scan = min(nr_pages, lru_pages); 2087 nr_to_scan = min(nr_pages, lru_pages);
2087 ret += shrink_list(l, nr_to_scan, zone, 2088 nr_reclaimed += shrink_list(l, nr_to_scan, zone,
2088 sc, prio); 2089 sc, prio);
2089 if (ret >= nr_pages) 2090 if (nr_reclaimed >= nr_pages) {
2090 return ret; 2091 sc->nr_reclaimed = nr_reclaimed;
2092 return;
2093 }
2091 } 2094 }
2092 } 2095 }
2093 } 2096 }
2094 return ret; 2097 sc->nr_reclaimed = nr_reclaimed;
2095} 2098}
2096 2099
2097/* 2100/*
@@ -2105,13 +2108,11 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
2105unsigned long shrink_all_memory(unsigned long nr_pages) 2108unsigned long shrink_all_memory(unsigned long nr_pages)
2106{ 2109{
2107 unsigned long lru_pages, nr_slab; 2110 unsigned long lru_pages, nr_slab;
2108 unsigned long ret = 0;
2109 int pass; 2111 int pass;
2110 struct reclaim_state reclaim_state; 2112 struct reclaim_state reclaim_state;
2111 struct scan_control sc = { 2113 struct scan_control sc = {
2112 .gfp_mask = GFP_KERNEL, 2114 .gfp_mask = GFP_KERNEL,
2113 .may_swap = 0, 2115 .may_unmap = 0,
2114 .swap_cluster_max = nr_pages,
2115 .may_writepage = 1, 2116 .may_writepage = 1,
2116 .isolate_pages = isolate_pages_global, 2117 .isolate_pages = isolate_pages_global,
2117 }; 2118 };
@@ -2127,8 +2128,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2127 if (!reclaim_state.reclaimed_slab) 2128 if (!reclaim_state.reclaimed_slab)
2128 break; 2129 break;
2129 2130
2130 ret += reclaim_state.reclaimed_slab; 2131 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2131 if (ret >= nr_pages) 2132 if (sc.nr_reclaimed >= nr_pages)
2132 goto out; 2133 goto out;
2133 2134
2134 nr_slab -= reclaim_state.reclaimed_slab; 2135 nr_slab -= reclaim_state.reclaimed_slab;
@@ -2147,21 +2148,22 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2147 2148
2148 /* Force reclaiming mapped pages in the passes #3 and #4 */ 2149 /* Force reclaiming mapped pages in the passes #3 and #4 */
2149 if (pass > 2) 2150 if (pass > 2)
2150 sc.may_swap = 1; 2151 sc.may_unmap = 1;
2151 2152
2152 for (prio = DEF_PRIORITY; prio >= 0; prio--) { 2153 for (prio = DEF_PRIORITY; prio >= 0; prio--) {
2153 unsigned long nr_to_scan = nr_pages - ret; 2154 unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed;
2154 2155
2155 sc.nr_scanned = 0; 2156 sc.nr_scanned = 0;
2156 ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); 2157 sc.swap_cluster_max = nr_to_scan;
2157 if (ret >= nr_pages) 2158 shrink_all_zones(nr_to_scan, prio, pass, &sc);
2159 if (sc.nr_reclaimed >= nr_pages)
2158 goto out; 2160 goto out;
2159 2161
2160 reclaim_state.reclaimed_slab = 0; 2162 reclaim_state.reclaimed_slab = 0;
2161 shrink_slab(sc.nr_scanned, sc.gfp_mask, 2163 shrink_slab(sc.nr_scanned, sc.gfp_mask,
2162 global_lru_pages()); 2164 global_lru_pages());
2163 ret += reclaim_state.reclaimed_slab; 2165 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2164 if (ret >= nr_pages) 2166 if (sc.nr_reclaimed >= nr_pages)
2165 goto out; 2167 goto out;
2166 2168
2167 if (sc.nr_scanned && prio < DEF_PRIORITY - 2) 2169 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
@@ -2170,21 +2172,23 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2170 } 2172 }
2171 2173
2172 /* 2174 /*
2173 * If ret = 0, we could not shrink LRUs, but there may be something 2175 * If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be
2174 * in slab caches 2176 * something in slab caches
2175 */ 2177 */
2176 if (!ret) { 2178 if (!sc.nr_reclaimed) {
2177 do { 2179 do {
2178 reclaim_state.reclaimed_slab = 0; 2180 reclaim_state.reclaimed_slab = 0;
2179 shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages()); 2181 shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
2180 ret += reclaim_state.reclaimed_slab; 2182 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2181 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); 2183 } while (sc.nr_reclaimed < nr_pages &&
2184 reclaim_state.reclaimed_slab > 0);
2182 } 2185 }
2183 2186
2187
2184out: 2188out:
2185 current->reclaim_state = NULL; 2189 current->reclaim_state = NULL;
2186 2190
2187 return ret; 2191 return sc.nr_reclaimed;
2188} 2192}
2189#endif 2193#endif
2190 2194
@@ -2290,11 +2294,12 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2290 int priority; 2294 int priority;
2291 struct scan_control sc = { 2295 struct scan_control sc = {
2292 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), 2296 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
2293 .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), 2297 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
2294 .swap_cluster_max = max_t(unsigned long, nr_pages, 2298 .swap_cluster_max = max_t(unsigned long, nr_pages,
2295 SWAP_CLUSTER_MAX), 2299 SWAP_CLUSTER_MAX),
2296 .gfp_mask = gfp_mask, 2300 .gfp_mask = gfp_mask,
2297 .swappiness = vm_swappiness, 2301 .swappiness = vm_swappiness,
2302 .order = order,
2298 .isolate_pages = isolate_pages_global, 2303 .isolate_pages = isolate_pages_global,
2299 }; 2304 };
2300 unsigned long slab_reclaimable; 2305 unsigned long slab_reclaimable;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8cd81ea1ddc..9826766f127 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -135,11 +135,7 @@ static void refresh_zone_stat_thresholds(void)
135 int cpu; 135 int cpu;
136 int threshold; 136 int threshold;
137 137
138 for_each_zone(zone) { 138 for_each_populated_zone(zone) {
139
140 if (!zone->present_pages)
141 continue;
142
143 threshold = calculate_threshold(zone); 139 threshold = calculate_threshold(zone);
144 140
145 for_each_online_cpu(cpu) 141 for_each_online_cpu(cpu)
@@ -301,12 +297,9 @@ void refresh_cpu_vm_stats(int cpu)
301 int i; 297 int i;
302 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 298 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
303 299
304 for_each_zone(zone) { 300 for_each_populated_zone(zone) {
305 struct per_cpu_pageset *p; 301 struct per_cpu_pageset *p;
306 302
307 if (!populated_zone(zone))
308 continue;
309
310 p = zone_pcp(zone, cpu); 303 p = zone_pcp(zone, cpu);
311 304
312 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 305 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)