aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/highmem.c6
-rw-r--r--mm/memory.c8
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page-writeback.c93
-rw-r--r--mm/page_alloc.c472
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c124
-rw-r--r--mm/swap.c4
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmscan.c68
-rw-r--r--mm/vmstat.c614
17 files changed, 773 insertions, 655 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 0b8f73f2ed16..9dd824c11eeb 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 page_alloc.o page-writeback.o pdflush.o \ 11 page_alloc.o page-writeback.o pdflush.o \
12 readahead.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o \
13 prio_tree.o util.o mmzone.o $(mmu-y) 13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
14 14
15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
16obj-$(CONFIG_HUGETLBFS) += hugetlb.o 16obj-$(CONFIG_HUGETLBFS) += hugetlb.o
diff --git a/mm/filemap.c b/mm/filemap.c
index f02ca30372cc..d087fc3d3281 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -119,7 +119,7 @@ void __remove_from_page_cache(struct page *page)
119 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
120 page->mapping = NULL; 120 page->mapping = NULL;
121 mapping->nrpages--; 121 mapping->nrpages--;
122 pagecache_acct(-1); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
123} 123}
124 124
125void remove_from_page_cache(struct page *page) 125void remove_from_page_cache(struct page *page)
@@ -448,7 +448,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
448 page->mapping = mapping; 448 page->mapping = mapping;
449 page->index = offset; 449 page->index = offset;
450 mapping->nrpages++; 450 mapping->nrpages++;
451 pagecache_acct(1); 451 __inc_zone_page_state(page, NR_FILE_PAGES);
452 } 452 }
453 write_unlock_irq(&mapping->tree_lock); 453 write_unlock_irq(&mapping->tree_lock);
454 radix_tree_preload_end(); 454 radix_tree_preload_end();
@@ -1415,7 +1415,7 @@ retry_find:
1415 */ 1415 */
1416 if (!did_readaround) { 1416 if (!did_readaround) {
1417 majmin = VM_FAULT_MAJOR; 1417 majmin = VM_FAULT_MAJOR;
1418 inc_page_state(pgmajfault); 1418 count_vm_event(PGMAJFAULT);
1419 } 1419 }
1420 did_readaround = 1; 1420 did_readaround = 1;
1421 ra_pages = max_sane_readahead(file->f_ra.ra_pages); 1421 ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1486,7 +1486,7 @@ no_cached_page:
1486page_not_uptodate: 1486page_not_uptodate:
1487 if (!did_readaround) { 1487 if (!did_readaround) {
1488 majmin = VM_FAULT_MAJOR; 1488 majmin = VM_FAULT_MAJOR;
1489 inc_page_state(pgmajfault); 1489 count_vm_event(PGMAJFAULT);
1490 } 1490 }
1491 lock_page(page); 1491 lock_page(page);
1492 1492
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b274fdf9d08..9b2a5403c447 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -315,8 +315,8 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
315 if (bvec->bv_page == org_vec->bv_page) 315 if (bvec->bv_page == org_vec->bv_page)
316 continue; 316 continue;
317 317
318 mempool_free(bvec->bv_page, pool); 318 dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
319 dec_page_state(nr_bounce); 319 mempool_free(bvec->bv_page, pool);
320 } 320 }
321 321
322 bio_endio(bio_orig, bio_orig->bi_size, err); 322 bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -397,7 +397,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
397 to->bv_page = mempool_alloc(pool, q->bounce_gfp); 397 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
398 to->bv_len = from->bv_len; 398 to->bv_len = from->bv_len;
399 to->bv_offset = from->bv_offset; 399 to->bv_offset = from->bv_offset;
400 inc_page_state(nr_bounce); 400 inc_zone_page_state(to->bv_page, NR_BOUNCE);
401 401
402 if (rw == WRITE) { 402 if (rw == WRITE) {
403 char *vto, *vfrom; 403 char *vto, *vfrom;
diff --git a/mm/memory.c b/mm/memory.c
index 247b5c312b9b..7e2a4b1580e3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -126,7 +126,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
126 pmd_clear(pmd); 126 pmd_clear(pmd);
127 pte_lock_deinit(page); 127 pte_lock_deinit(page);
128 pte_free_tlb(tlb, page); 128 pte_free_tlb(tlb, page);
129 dec_page_state(nr_page_table_pages); 129 dec_zone_page_state(page, NR_PAGETABLE);
130 tlb->mm->nr_ptes--; 130 tlb->mm->nr_ptes--;
131} 131}
132 132
@@ -311,7 +311,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
311 pte_free(new); 311 pte_free(new);
312 } else { 312 } else {
313 mm->nr_ptes++; 313 mm->nr_ptes++;
314 inc_page_state(nr_page_table_pages); 314 inc_zone_page_state(new, NR_PAGETABLE);
315 pmd_populate(mm, pmd, new); 315 pmd_populate(mm, pmd, new);
316 } 316 }
317 spin_unlock(&mm->page_table_lock); 317 spin_unlock(&mm->page_table_lock);
@@ -1951,7 +1951,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
1951 1951
1952 /* Had to read the page from swap area: Major fault */ 1952 /* Had to read the page from swap area: Major fault */
1953 ret = VM_FAULT_MAJOR; 1953 ret = VM_FAULT_MAJOR;
1954 inc_page_state(pgmajfault); 1954 count_vm_event(PGMAJFAULT);
1955 grab_swap_token(); 1955 grab_swap_token();
1956 } 1956 }
1957 1957
@@ -2324,7 +2324,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2324 2324
2325 __set_current_state(TASK_RUNNING); 2325 __set_current_state(TASK_RUNNING);
2326 2326
2327 inc_page_state(pgfault); 2327 count_vm_event(PGFAULT);
2328 2328
2329 if (unlikely(is_vm_hugetlb_page(vma))) 2329 if (unlikely(is_vm_hugetlb_page(vma)))
2330 return hugetlb_fault(mm, vma, address, write_access); 2330 return hugetlb_fault(mm, vma, address, write_access);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6b9740bbf4c0..e07e27e846a2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1209,10 +1209,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1209 1209
1210 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); 1210 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
1211 page = __alloc_pages(gfp, order, zl); 1211 page = __alloc_pages(gfp, order, zl);
1212 if (page && page_zone(page) == zl->zones[0]) { 1212 if (page && page_zone(page) == zl->zones[0])
1213 zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; 1213 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
1214 put_cpu();
1215 }
1216 return page; 1214 return page;
1217} 1215}
1218 1216
diff --git a/mm/mmap.c b/mm/mmap.c
index 6446c6134b04..c1868ecdbc5f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -96,7 +96,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
96 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 96 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
97 unsigned long n; 97 unsigned long n;
98 98
99 free = get_page_cache_size(); 99 free = global_page_state(NR_FILE_PAGES);
100 free += nr_swap_pages; 100 free += nr_swap_pages;
101 101
102 /* 102 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 029fadac0fb5..5151c44a8257 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1122,7 +1122,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
1122 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 1122 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1123 unsigned long n; 1123 unsigned long n;
1124 1124
1125 free = get_page_cache_size(); 1125 free = global_page_state(NR_FILE_PAGES);
1126 free += nr_swap_pages; 1126 free += nr_swap_pages;
1127 1127
1128 /* 1128 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4ec7026c7bab..e630188ccc40 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -99,22 +99,6 @@ EXPORT_SYMBOL(laptop_mode);
99 99
100static void background_writeout(unsigned long _min_pages); 100static void background_writeout(unsigned long _min_pages);
101 101
102struct writeback_state
103{
104 unsigned long nr_dirty;
105 unsigned long nr_unstable;
106 unsigned long nr_mapped;
107 unsigned long nr_writeback;
108};
109
110static void get_writeback_state(struct writeback_state *wbs)
111{
112 wbs->nr_dirty = read_page_state(nr_dirty);
113 wbs->nr_unstable = read_page_state(nr_unstable);
114 wbs->nr_mapped = read_page_state(nr_mapped);
115 wbs->nr_writeback = read_page_state(nr_writeback);
116}
117
118/* 102/*
119 * Work out the current dirty-memory clamping and background writeout 103 * Work out the current dirty-memory clamping and background writeout
120 * thresholds. 104 * thresholds.
@@ -133,8 +117,8 @@ static void get_writeback_state(struct writeback_state *wbs)
133 * clamping level. 117 * clamping level.
134 */ 118 */
135static void 119static void
136get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, 120get_dirty_limits(long *pbackground, long *pdirty,
137 struct address_space *mapping) 121 struct address_space *mapping)
138{ 122{
139 int background_ratio; /* Percentages */ 123 int background_ratio; /* Percentages */
140 int dirty_ratio; 124 int dirty_ratio;
@@ -144,8 +128,6 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
144 unsigned long available_memory = total_pages; 128 unsigned long available_memory = total_pages;
145 struct task_struct *tsk; 129 struct task_struct *tsk;
146 130
147 get_writeback_state(wbs);
148
149#ifdef CONFIG_HIGHMEM 131#ifdef CONFIG_HIGHMEM
150 /* 132 /*
151 * If this mapping can only allocate from low memory, 133 * If this mapping can only allocate from low memory,
@@ -156,7 +138,9 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
156#endif 138#endif
157 139
158 140
159 unmapped_ratio = 100 - (wbs->nr_mapped * 100) / total_pages; 141 unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
142 global_page_state(NR_ANON_PAGES)) * 100) /
143 total_pages;
160 144
161 dirty_ratio = vm_dirty_ratio; 145 dirty_ratio = vm_dirty_ratio;
162 if (dirty_ratio > unmapped_ratio / 2) 146 if (dirty_ratio > unmapped_ratio / 2)
@@ -189,7 +173,6 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
189 */ 173 */
190static void balance_dirty_pages(struct address_space *mapping) 174static void balance_dirty_pages(struct address_space *mapping)
191{ 175{
192 struct writeback_state wbs;
193 long nr_reclaimable; 176 long nr_reclaimable;
194 long background_thresh; 177 long background_thresh;
195 long dirty_thresh; 178 long dirty_thresh;
@@ -207,11 +190,12 @@ static void balance_dirty_pages(struct address_space *mapping)
207 .range_cyclic = 1, 190 .range_cyclic = 1,
208 }; 191 };
209 192
210 get_dirty_limits(&wbs, &background_thresh, 193 get_dirty_limits(&background_thresh, &dirty_thresh, mapping);
211 &dirty_thresh, mapping); 194 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
212 nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; 195 global_page_state(NR_UNSTABLE_NFS);
213 if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) 196 if (nr_reclaimable + global_page_state(NR_WRITEBACK) <=
214 break; 197 dirty_thresh)
198 break;
215 199
216 if (!dirty_exceeded) 200 if (!dirty_exceeded)
217 dirty_exceeded = 1; 201 dirty_exceeded = 1;
@@ -224,11 +208,14 @@ static void balance_dirty_pages(struct address_space *mapping)
224 */ 208 */
225 if (nr_reclaimable) { 209 if (nr_reclaimable) {
226 writeback_inodes(&wbc); 210 writeback_inodes(&wbc);
227 get_dirty_limits(&wbs, &background_thresh, 211 get_dirty_limits(&background_thresh,
228 &dirty_thresh, mapping); 212 &dirty_thresh, mapping);
229 nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; 213 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
230 if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) 214 global_page_state(NR_UNSTABLE_NFS);
231 break; 215 if (nr_reclaimable +
216 global_page_state(NR_WRITEBACK)
217 <= dirty_thresh)
218 break;
232 pages_written += write_chunk - wbc.nr_to_write; 219 pages_written += write_chunk - wbc.nr_to_write;
233 if (pages_written >= write_chunk) 220 if (pages_written >= write_chunk)
234 break; /* We've done our duty */ 221 break; /* We've done our duty */
@@ -236,8 +223,9 @@ static void balance_dirty_pages(struct address_space *mapping)
236 blk_congestion_wait(WRITE, HZ/10); 223 blk_congestion_wait(WRITE, HZ/10);
237 } 224 }
238 225
239 if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded) 226 if (nr_reclaimable + global_page_state(NR_WRITEBACK)
240 dirty_exceeded = 0; 227 <= dirty_thresh && dirty_exceeded)
228 dirty_exceeded = 0;
241 229
242 if (writeback_in_progress(bdi)) 230 if (writeback_in_progress(bdi))
243 return; /* pdflush is already working this queue */ 231 return; /* pdflush is already working this queue */
@@ -299,12 +287,11 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
299 287
300void throttle_vm_writeout(void) 288void throttle_vm_writeout(void)
301{ 289{
302 struct writeback_state wbs;
303 long background_thresh; 290 long background_thresh;
304 long dirty_thresh; 291 long dirty_thresh;
305 292
306 for ( ; ; ) { 293 for ( ; ; ) {
307 get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); 294 get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
308 295
309 /* 296 /*
310 * Boost the allowable dirty threshold a bit for page 297 * Boost the allowable dirty threshold a bit for page
@@ -312,8 +299,9 @@ void throttle_vm_writeout(void)
312 */ 299 */
313 dirty_thresh += dirty_thresh / 10; /* wheeee... */ 300 dirty_thresh += dirty_thresh / 10; /* wheeee... */
314 301
315 if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh) 302 if (global_page_state(NR_UNSTABLE_NFS) +
316 break; 303 global_page_state(NR_WRITEBACK) <= dirty_thresh)
304 break;
317 blk_congestion_wait(WRITE, HZ/10); 305 blk_congestion_wait(WRITE, HZ/10);
318 } 306 }
319} 307}
@@ -336,12 +324,12 @@ static void background_writeout(unsigned long _min_pages)
336 }; 324 };
337 325
338 for ( ; ; ) { 326 for ( ; ; ) {
339 struct writeback_state wbs;
340 long background_thresh; 327 long background_thresh;
341 long dirty_thresh; 328 long dirty_thresh;
342 329
343 get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); 330 get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
344 if (wbs.nr_dirty + wbs.nr_unstable < background_thresh 331 if (global_page_state(NR_FILE_DIRTY) +
332 global_page_state(NR_UNSTABLE_NFS) < background_thresh
345 && min_pages <= 0) 333 && min_pages <= 0)
346 break; 334 break;
347 wbc.encountered_congestion = 0; 335 wbc.encountered_congestion = 0;
@@ -365,12 +353,9 @@ static void background_writeout(unsigned long _min_pages)
365 */ 353 */
366int wakeup_pdflush(long nr_pages) 354int wakeup_pdflush(long nr_pages)
367{ 355{
368 if (nr_pages == 0) { 356 if (nr_pages == 0)
369 struct writeback_state wbs; 357 nr_pages = global_page_state(NR_FILE_DIRTY) +
370 358 global_page_state(NR_UNSTABLE_NFS);
371 get_writeback_state(&wbs);
372 nr_pages = wbs.nr_dirty + wbs.nr_unstable;
373 }
374 return pdflush_operation(background_writeout, nr_pages); 359 return pdflush_operation(background_writeout, nr_pages);
375} 360}
376 361
@@ -401,7 +386,6 @@ static void wb_kupdate(unsigned long arg)
401 unsigned long start_jif; 386 unsigned long start_jif;
402 unsigned long next_jif; 387 unsigned long next_jif;
403 long nr_to_write; 388 long nr_to_write;
404 struct writeback_state wbs;
405 struct writeback_control wbc = { 389 struct writeback_control wbc = {
406 .bdi = NULL, 390 .bdi = NULL,
407 .sync_mode = WB_SYNC_NONE, 391 .sync_mode = WB_SYNC_NONE,
@@ -414,11 +398,11 @@ static void wb_kupdate(unsigned long arg)
414 398
415 sync_supers(); 399 sync_supers();
416 400
417 get_writeback_state(&wbs);
418 oldest_jif = jiffies - dirty_expire_interval; 401 oldest_jif = jiffies - dirty_expire_interval;
419 start_jif = jiffies; 402 start_jif = jiffies;
420 next_jif = start_jif + dirty_writeback_interval; 403 next_jif = start_jif + dirty_writeback_interval;
421 nr_to_write = wbs.nr_dirty + wbs.nr_unstable + 404 nr_to_write = global_page_state(NR_FILE_DIRTY) +
405 global_page_state(NR_UNSTABLE_NFS) +
422 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 406 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
423 while (nr_to_write > 0) { 407 while (nr_to_write > 0) {
424 wbc.encountered_congestion = 0; 408 wbc.encountered_congestion = 0;
@@ -640,7 +624,8 @@ int __set_page_dirty_nobuffers(struct page *page)
640 if (mapping2) { /* Race with truncate? */ 624 if (mapping2) { /* Race with truncate? */
641 BUG_ON(mapping2 != mapping); 625 BUG_ON(mapping2 != mapping);
642 if (mapping_cap_account_dirty(mapping)) 626 if (mapping_cap_account_dirty(mapping))
643 inc_page_state(nr_dirty); 627 __inc_zone_page_state(page,
628 NR_FILE_DIRTY);
644 radix_tree_tag_set(&mapping->page_tree, 629 radix_tree_tag_set(&mapping->page_tree,
645 page_index(page), PAGECACHE_TAG_DIRTY); 630 page_index(page), PAGECACHE_TAG_DIRTY);
646 } 631 }
@@ -727,9 +712,9 @@ int test_clear_page_dirty(struct page *page)
727 radix_tree_tag_clear(&mapping->page_tree, 712 radix_tree_tag_clear(&mapping->page_tree,
728 page_index(page), 713 page_index(page),
729 PAGECACHE_TAG_DIRTY); 714 PAGECACHE_TAG_DIRTY);
730 write_unlock_irqrestore(&mapping->tree_lock, flags);
731 if (mapping_cap_account_dirty(mapping)) 715 if (mapping_cap_account_dirty(mapping))
732 dec_page_state(nr_dirty); 716 __dec_zone_page_state(page, NR_FILE_DIRTY);
717 write_unlock_irqrestore(&mapping->tree_lock, flags);
733 return 1; 718 return 1;
734 } 719 }
735 write_unlock_irqrestore(&mapping->tree_lock, flags); 720 write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -760,7 +745,7 @@ int clear_page_dirty_for_io(struct page *page)
760 if (mapping) { 745 if (mapping) {
761 if (TestClearPageDirty(page)) { 746 if (TestClearPageDirty(page)) {
762 if (mapping_cap_account_dirty(mapping)) 747 if (mapping_cap_account_dirty(mapping))
763 dec_page_state(nr_dirty); 748 dec_zone_page_state(page, NR_FILE_DIRTY);
764 return 1; 749 return 1;
765 } 750 }
766 return 0; 751 return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60f2feddbe5d..3e792a583f3b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -455,7 +455,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
455 455
456 kernel_map_pages(page, 1 << order, 0); 456 kernel_map_pages(page, 1 << order, 0);
457 local_irq_save(flags); 457 local_irq_save(flags);
458 __mod_page_state(pgfree, 1 << order); 458 __count_vm_events(PGFREE, 1 << order);
459 free_one_page(page_zone(page), page, order); 459 free_one_page(page_zone(page), page, order);
460 local_irq_restore(flags); 460 local_irq_restore(flags);
461} 461}
@@ -708,27 +708,6 @@ void drain_local_pages(void)
708} 708}
709#endif /* CONFIG_PM */ 709#endif /* CONFIG_PM */
710 710
711static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
712{
713#ifdef CONFIG_NUMA
714 pg_data_t *pg = z->zone_pgdat;
715 pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
716 struct per_cpu_pageset *p;
717
718 p = zone_pcp(z, cpu);
719 if (pg == orig) {
720 p->numa_hit++;
721 } else {
722 p->numa_miss++;
723 zone_pcp(zonelist->zones[0], cpu)->numa_foreign++;
724 }
725 if (pg == NODE_DATA(numa_node_id()))
726 p->local_node++;
727 else
728 p->other_node++;
729#endif
730}
731
732/* 711/*
733 * Free a 0-order page 712 * Free a 0-order page
734 */ 713 */
@@ -749,7 +728,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
749 728
750 pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; 729 pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
751 local_irq_save(flags); 730 local_irq_save(flags);
752 __inc_page_state(pgfree); 731 __count_vm_event(PGFREE);
753 list_add(&page->lru, &pcp->list); 732 list_add(&page->lru, &pcp->list);
754 pcp->count++; 733 pcp->count++;
755 if (pcp->count >= pcp->high) { 734 if (pcp->count >= pcp->high) {
@@ -825,8 +804,8 @@ again:
825 goto failed; 804 goto failed;
826 } 805 }
827 806
828 __mod_page_state_zone(zone, pgalloc, 1 << order); 807 __count_zone_vm_events(PGALLOC, zone, 1 << order);
829 zone_statistics(zonelist, zone, cpu); 808 zone_statistics(zonelist, zone);
830 local_irq_restore(flags); 809 local_irq_restore(flags);
831 put_cpu(); 810 put_cpu();
832 811
@@ -1230,141 +1209,6 @@ static void show_node(struct zone *zone)
1230#define show_node(zone) do { } while (0) 1209#define show_node(zone) do { } while (0)
1231#endif 1210#endif
1232 1211
1233/*
1234 * Accumulate the page_state information across all CPUs.
1235 * The result is unavoidably approximate - it can change
1236 * during and after execution of this function.
1237 */
1238static DEFINE_PER_CPU(struct page_state, page_states) = {0};
1239
1240atomic_t nr_pagecache = ATOMIC_INIT(0);
1241EXPORT_SYMBOL(nr_pagecache);
1242#ifdef CONFIG_SMP
1243DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
1244#endif
1245
1246static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1247{
1248 unsigned cpu;
1249
1250 memset(ret, 0, nr * sizeof(unsigned long));
1251 cpus_and(*cpumask, *cpumask, cpu_online_map);
1252
1253 for_each_cpu_mask(cpu, *cpumask) {
1254 unsigned long *in;
1255 unsigned long *out;
1256 unsigned off;
1257 unsigned next_cpu;
1258
1259 in = (unsigned long *)&per_cpu(page_states, cpu);
1260
1261 next_cpu = next_cpu(cpu, *cpumask);
1262 if (likely(next_cpu < NR_CPUS))
1263 prefetch(&per_cpu(page_states, next_cpu));
1264
1265 out = (unsigned long *)ret;
1266 for (off = 0; off < nr; off++)
1267 *out++ += *in++;
1268 }
1269}
1270
1271void get_page_state_node(struct page_state *ret, int node)
1272{
1273 int nr;
1274 cpumask_t mask = node_to_cpumask(node);
1275
1276 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
1277 nr /= sizeof(unsigned long);
1278
1279 __get_page_state(ret, nr+1, &mask);
1280}
1281
1282void get_page_state(struct page_state *ret)
1283{
1284 int nr;
1285 cpumask_t mask = CPU_MASK_ALL;
1286
1287 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
1288 nr /= sizeof(unsigned long);
1289
1290 __get_page_state(ret, nr + 1, &mask);
1291}
1292
1293void get_full_page_state(struct page_state *ret)
1294{
1295 cpumask_t mask = CPU_MASK_ALL;
1296
1297 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
1298}
1299
1300unsigned long read_page_state_offset(unsigned long offset)
1301{
1302 unsigned long ret = 0;
1303 int cpu;
1304
1305 for_each_online_cpu(cpu) {
1306 unsigned long in;
1307
1308 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
1309 ret += *((unsigned long *)in);
1310 }
1311 return ret;
1312}
1313
1314void __mod_page_state_offset(unsigned long offset, unsigned long delta)
1315{
1316 void *ptr;
1317
1318 ptr = &__get_cpu_var(page_states);
1319 *(unsigned long *)(ptr + offset) += delta;
1320}
1321EXPORT_SYMBOL(__mod_page_state_offset);
1322
1323void mod_page_state_offset(unsigned long offset, unsigned long delta)
1324{
1325 unsigned long flags;
1326 void *ptr;
1327
1328 local_irq_save(flags);
1329 ptr = &__get_cpu_var(page_states);
1330 *(unsigned long *)(ptr + offset) += delta;
1331 local_irq_restore(flags);
1332}
1333EXPORT_SYMBOL(mod_page_state_offset);
1334
1335void __get_zone_counts(unsigned long *active, unsigned long *inactive,
1336 unsigned long *free, struct pglist_data *pgdat)
1337{
1338 struct zone *zones = pgdat->node_zones;
1339 int i;
1340
1341 *active = 0;
1342 *inactive = 0;
1343 *free = 0;
1344 for (i = 0; i < MAX_NR_ZONES; i++) {
1345 *active += zones[i].nr_active;
1346 *inactive += zones[i].nr_inactive;
1347 *free += zones[i].free_pages;
1348 }
1349}
1350
1351void get_zone_counts(unsigned long *active,
1352 unsigned long *inactive, unsigned long *free)
1353{
1354 struct pglist_data *pgdat;
1355
1356 *active = 0;
1357 *inactive = 0;
1358 *free = 0;
1359 for_each_online_pgdat(pgdat) {
1360 unsigned long l, m, n;
1361 __get_zone_counts(&l, &m, &n, pgdat);
1362 *active += l;
1363 *inactive += m;
1364 *free += n;
1365 }
1366}
1367
1368void si_meminfo(struct sysinfo *val) 1212void si_meminfo(struct sysinfo *val)
1369{ 1213{
1370 val->totalram = totalram_pages; 1214 val->totalram = totalram_pages;
@@ -1405,7 +1249,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1405 */ 1249 */
1406void show_free_areas(void) 1250void show_free_areas(void)
1407{ 1251{
1408 struct page_state ps;
1409 int cpu, temperature; 1252 int cpu, temperature;
1410 unsigned long active; 1253 unsigned long active;
1411 unsigned long inactive; 1254 unsigned long inactive;
@@ -1437,7 +1280,6 @@ void show_free_areas(void)
1437 } 1280 }
1438 } 1281 }
1439 1282
1440 get_page_state(&ps);
1441 get_zone_counts(&active, &inactive, &free); 1283 get_zone_counts(&active, &inactive, &free);
1442 1284
1443 printk("Free pages: %11ukB (%ukB HighMem)\n", 1285 printk("Free pages: %11ukB (%ukB HighMem)\n",
@@ -1448,13 +1290,13 @@ void show_free_areas(void)
1448 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", 1290 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
1449 active, 1291 active,
1450 inactive, 1292 inactive,
1451 ps.nr_dirty, 1293 global_page_state(NR_FILE_DIRTY),
1452 ps.nr_writeback, 1294 global_page_state(NR_WRITEBACK),
1453 ps.nr_unstable, 1295 global_page_state(NR_UNSTABLE_NFS),
1454 nr_free_pages(), 1296 nr_free_pages(),
1455 ps.nr_slab, 1297 global_page_state(NR_SLAB),
1456 ps.nr_mapped, 1298 global_page_state(NR_FILE_MAPPED),
1457 ps.nr_page_table_pages); 1299 global_page_state(NR_PAGETABLE));
1458 1300
1459 for_each_zone(zone) { 1301 for_each_zone(zone) {
1460 int i; 1302 int i;
@@ -2179,6 +2021,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2179 zone->nr_scan_inactive = 0; 2021 zone->nr_scan_inactive = 0;
2180 zone->nr_active = 0; 2022 zone->nr_active = 0;
2181 zone->nr_inactive = 0; 2023 zone->nr_inactive = 0;
2024 zap_zone_vm_stats(zone);
2182 atomic_set(&zone->reclaim_in_progress, 0); 2025 atomic_set(&zone->reclaim_in_progress, 0);
2183 if (!size) 2026 if (!size)
2184 continue; 2027 continue;
@@ -2252,307 +2095,18 @@ void __init free_area_init(unsigned long *zones_size)
2252 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); 2095 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
2253} 2096}
2254 2097
2255#ifdef CONFIG_PROC_FS
2256
2257#include <linux/seq_file.h>
2258
2259static void *frag_start(struct seq_file *m, loff_t *pos)
2260{
2261 pg_data_t *pgdat;
2262 loff_t node = *pos;
2263 for (pgdat = first_online_pgdat();
2264 pgdat && node;
2265 pgdat = next_online_pgdat(pgdat))
2266 --node;
2267
2268 return pgdat;
2269}
2270
2271static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
2272{
2273 pg_data_t *pgdat = (pg_data_t *)arg;
2274
2275 (*pos)++;
2276 return next_online_pgdat(pgdat);
2277}
2278
2279static void frag_stop(struct seq_file *m, void *arg)
2280{
2281}
2282
2283/*
2284 * This walks the free areas for each zone.
2285 */
2286static int frag_show(struct seq_file *m, void *arg)
2287{
2288 pg_data_t *pgdat = (pg_data_t *)arg;
2289 struct zone *zone;
2290 struct zone *node_zones = pgdat->node_zones;
2291 unsigned long flags;
2292 int order;
2293
2294 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2295 if (!populated_zone(zone))
2296 continue;
2297
2298 spin_lock_irqsave(&zone->lock, flags);
2299 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
2300 for (order = 0; order < MAX_ORDER; ++order)
2301 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
2302 spin_unlock_irqrestore(&zone->lock, flags);
2303 seq_putc(m, '\n');
2304 }
2305 return 0;
2306}
2307
2308struct seq_operations fragmentation_op = {
2309 .start = frag_start,
2310 .next = frag_next,
2311 .stop = frag_stop,
2312 .show = frag_show,
2313};
2314
2315/*
2316 * Output information about zones in @pgdat.
2317 */
2318static int zoneinfo_show(struct seq_file *m, void *arg)
2319{
2320 pg_data_t *pgdat = arg;
2321 struct zone *zone;
2322 struct zone *node_zones = pgdat->node_zones;
2323 unsigned long flags;
2324
2325 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
2326 int i;
2327
2328 if (!populated_zone(zone))
2329 continue;
2330
2331 spin_lock_irqsave(&zone->lock, flags);
2332 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
2333 seq_printf(m,
2334 "\n pages free %lu"
2335 "\n min %lu"
2336 "\n low %lu"
2337 "\n high %lu"
2338 "\n active %lu"
2339 "\n inactive %lu"
2340 "\n scanned %lu (a: %lu i: %lu)"
2341 "\n spanned %lu"
2342 "\n present %lu",
2343 zone->free_pages,
2344 zone->pages_min,
2345 zone->pages_low,
2346 zone->pages_high,
2347 zone->nr_active,
2348 zone->nr_inactive,
2349 zone->pages_scanned,
2350 zone->nr_scan_active, zone->nr_scan_inactive,
2351 zone->spanned_pages,
2352 zone->present_pages);
2353 seq_printf(m,
2354 "\n protection: (%lu",
2355 zone->lowmem_reserve[0]);
2356 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
2357 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
2358 seq_printf(m,
2359 ")"
2360 "\n pagesets");
2361 for_each_online_cpu(i) {
2362 struct per_cpu_pageset *pageset;
2363 int j;
2364
2365 pageset = zone_pcp(zone, i);
2366 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
2367 if (pageset->pcp[j].count)
2368 break;
2369 }
2370 if (j == ARRAY_SIZE(pageset->pcp))
2371 continue;
2372 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
2373 seq_printf(m,
2374 "\n cpu: %i pcp: %i"
2375 "\n count: %i"
2376 "\n high: %i"
2377 "\n batch: %i",
2378 i, j,
2379 pageset->pcp[j].count,
2380 pageset->pcp[j].high,
2381 pageset->pcp[j].batch);
2382 }
2383#ifdef CONFIG_NUMA
2384 seq_printf(m,
2385 "\n numa_hit: %lu"
2386 "\n numa_miss: %lu"
2387 "\n numa_foreign: %lu"
2388 "\n interleave_hit: %lu"
2389 "\n local_node: %lu"
2390 "\n other_node: %lu",
2391 pageset->numa_hit,
2392 pageset->numa_miss,
2393 pageset->numa_foreign,
2394 pageset->interleave_hit,
2395 pageset->local_node,
2396 pageset->other_node);
2397#endif
2398 }
2399 seq_printf(m,
2400 "\n all_unreclaimable: %u"
2401 "\n prev_priority: %i"
2402 "\n temp_priority: %i"
2403 "\n start_pfn: %lu",
2404 zone->all_unreclaimable,
2405 zone->prev_priority,
2406 zone->temp_priority,
2407 zone->zone_start_pfn);
2408 spin_unlock_irqrestore(&zone->lock, flags);
2409 seq_putc(m, '\n');
2410 }
2411 return 0;
2412}
2413
2414struct seq_operations zoneinfo_op = {
2415 .start = frag_start, /* iterate over all zones. The same as in
2416 * fragmentation. */
2417 .next = frag_next,
2418 .stop = frag_stop,
2419 .show = zoneinfo_show,
2420};
2421
2422static char *vmstat_text[] = {
2423 "nr_dirty",
2424 "nr_writeback",
2425 "nr_unstable",
2426 "nr_page_table_pages",
2427 "nr_mapped",
2428 "nr_slab",
2429
2430 "pgpgin",
2431 "pgpgout",
2432 "pswpin",
2433 "pswpout",
2434
2435 "pgalloc_high",
2436 "pgalloc_normal",
2437 "pgalloc_dma32",
2438 "pgalloc_dma",
2439
2440 "pgfree",
2441 "pgactivate",
2442 "pgdeactivate",
2443
2444 "pgfault",
2445 "pgmajfault",
2446
2447 "pgrefill_high",
2448 "pgrefill_normal",
2449 "pgrefill_dma32",
2450 "pgrefill_dma",
2451
2452 "pgsteal_high",
2453 "pgsteal_normal",
2454 "pgsteal_dma32",
2455 "pgsteal_dma",
2456
2457 "pgscan_kswapd_high",
2458 "pgscan_kswapd_normal",
2459 "pgscan_kswapd_dma32",
2460 "pgscan_kswapd_dma",
2461
2462 "pgscan_direct_high",
2463 "pgscan_direct_normal",
2464 "pgscan_direct_dma32",
2465 "pgscan_direct_dma",
2466
2467 "pginodesteal",
2468 "slabs_scanned",
2469 "kswapd_steal",
2470 "kswapd_inodesteal",
2471 "pageoutrun",
2472 "allocstall",
2473
2474 "pgrotated",
2475 "nr_bounce",
2476};
2477
2478static void *vmstat_start(struct seq_file *m, loff_t *pos)
2479{
2480 struct page_state *ps;
2481
2482 if (*pos >= ARRAY_SIZE(vmstat_text))
2483 return NULL;
2484
2485 ps = kmalloc(sizeof(*ps), GFP_KERNEL);
2486 m->private = ps;
2487 if (!ps)
2488 return ERR_PTR(-ENOMEM);
2489 get_full_page_state(ps);
2490 ps->pgpgin /= 2; /* sectors -> kbytes */
2491 ps->pgpgout /= 2;
2492 return (unsigned long *)ps + *pos;
2493}
2494
2495static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
2496{
2497 (*pos)++;
2498 if (*pos >= ARRAY_SIZE(vmstat_text))
2499 return NULL;
2500 return (unsigned long *)m->private + *pos;
2501}
2502
2503static int vmstat_show(struct seq_file *m, void *arg)
2504{
2505 unsigned long *l = arg;
2506 unsigned long off = l - (unsigned long *)m->private;
2507
2508 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
2509 return 0;
2510}
2511
2512static void vmstat_stop(struct seq_file *m, void *arg)
2513{
2514 kfree(m->private);
2515 m->private = NULL;
2516}
2517
2518struct seq_operations vmstat_op = {
2519 .start = vmstat_start,
2520 .next = vmstat_next,
2521 .stop = vmstat_stop,
2522 .show = vmstat_show,
2523};
2524
2525#endif /* CONFIG_PROC_FS */
2526
2527#ifdef CONFIG_HOTPLUG_CPU 2098#ifdef CONFIG_HOTPLUG_CPU
2528static int page_alloc_cpu_notify(struct notifier_block *self, 2099static int page_alloc_cpu_notify(struct notifier_block *self,
2529 unsigned long action, void *hcpu) 2100 unsigned long action, void *hcpu)
2530{ 2101{
2531 int cpu = (unsigned long)hcpu; 2102 int cpu = (unsigned long)hcpu;
2532 long *count;
2533 unsigned long *src, *dest;
2534 2103
2535 if (action == CPU_DEAD) { 2104 if (action == CPU_DEAD) {
2536 int i;
2537
2538 /* Drain local pagecache count. */
2539 count = &per_cpu(nr_pagecache_local, cpu);
2540 atomic_add(*count, &nr_pagecache);
2541 *count = 0;
2542 local_irq_disable(); 2105 local_irq_disable();
2543 __drain_pages(cpu); 2106 __drain_pages(cpu);
2544 2107 vm_events_fold_cpu(cpu);
2545 /* Add dead cpu's page_states to our own. */
2546 dest = (unsigned long *)&__get_cpu_var(page_states);
2547 src = (unsigned long *)&per_cpu(page_states, cpu);
2548
2549 for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
2550 i++) {
2551 dest[i] += src[i];
2552 src[i] = 0;
2553 }
2554
2555 local_irq_enable(); 2108 local_irq_enable();
2109 refresh_cpu_vm_stats(cpu);
2556 } 2110 }
2557 return NOTIFY_OK; 2111 return NOTIFY_OK;
2558} 2112}
diff --git a/mm/page_io.c b/mm/page_io.c
index bb2b0d53889c..88029948d00a 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -101,7 +101,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
101 } 101 }
102 if (wbc->sync_mode == WB_SYNC_ALL) 102 if (wbc->sync_mode == WB_SYNC_ALL)
103 rw |= (1 << BIO_RW_SYNC); 103 rw |= (1 << BIO_RW_SYNC);
104 inc_page_state(pswpout); 104 count_vm_event(PSWPOUT);
105 set_page_writeback(page); 105 set_page_writeback(page);
106 unlock_page(page); 106 unlock_page(page);
107 submit_bio(rw, bio); 107 submit_bio(rw, bio);
@@ -123,7 +123,7 @@ int swap_readpage(struct file *file, struct page *page)
123 ret = -ENOMEM; 123 ret = -ENOMEM;
124 goto out; 124 goto out;
125 } 125 }
126 inc_page_state(pswpin); 126 count_vm_event(PSWPIN);
127 submit_bio(READ, bio); 127 submit_bio(READ, bio);
128out: 128out:
129 return ret; 129 return ret;
diff --git a/mm/rmap.c b/mm/rmap.c
index e76909e880ca..40158b59729e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -455,7 +455,7 @@ static void __page_set_anon_rmap(struct page *page,
455 * nr_mapped state can be updated without turning off 455 * nr_mapped state can be updated without turning off
456 * interrupts because it is not modified via interrupt. 456 * interrupts because it is not modified via interrupt.
457 */ 457 */
458 __inc_page_state(nr_mapped); 458 __inc_zone_page_state(page, NR_ANON_PAGES);
459} 459}
460 460
461/** 461/**
@@ -499,7 +499,7 @@ void page_add_new_anon_rmap(struct page *page,
499void page_add_file_rmap(struct page *page) 499void page_add_file_rmap(struct page *page)
500{ 500{
501 if (atomic_inc_and_test(&page->_mapcount)) 501 if (atomic_inc_and_test(&page->_mapcount))
502 __inc_page_state(nr_mapped); 502 __inc_zone_page_state(page, NR_FILE_MAPPED);
503} 503}
504 504
505/** 505/**
@@ -531,7 +531,8 @@ void page_remove_rmap(struct page *page)
531 */ 531 */
532 if (page_test_and_clear_dirty(page)) 532 if (page_test_and_clear_dirty(page))
533 set_page_dirty(page); 533 set_page_dirty(page);
534 __dec_page_state(nr_mapped); 534 __dec_zone_page_state(page,
535 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
535 } 536 }
536} 537}
537 538
diff --git a/mm/shmem.c b/mm/shmem.c
index 83c9fea1e0e3..db21c51531ca 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1044,12 +1044,12 @@ repeat:
1044 swappage = lookup_swap_cache(swap); 1044 swappage = lookup_swap_cache(swap);
1045 if (!swappage) { 1045 if (!swappage) {
1046 shmem_swp_unmap(entry); 1046 shmem_swp_unmap(entry);
1047 spin_unlock(&info->lock);
1048 /* here we actually do the io */ 1047 /* here we actually do the io */
1049 if (type && *type == VM_FAULT_MINOR) { 1048 if (type && *type == VM_FAULT_MINOR) {
1050 inc_page_state(pgmajfault); 1049 __count_vm_event(PGMAJFAULT);
1051 *type = VM_FAULT_MAJOR; 1050 *type = VM_FAULT_MAJOR;
1052 } 1051 }
1052 spin_unlock(&info->lock);
1053 swappage = shmem_swapin(info, swap, idx); 1053 swappage = shmem_swapin(info, swap, idx);
1054 if (!swappage) { 1054 if (!swappage) {
1055 spin_lock(&info->lock); 1055 spin_lock(&info->lock);
diff --git a/mm/slab.c b/mm/slab.c
index 233e39d14caf..3936af344542 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -309,6 +309,13 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
309#define SIZE_AC 1 309#define SIZE_AC 1
310#define SIZE_L3 (1 + MAX_NUMNODES) 310#define SIZE_L3 (1 + MAX_NUMNODES)
311 311
312static int drain_freelist(struct kmem_cache *cache,
313 struct kmem_list3 *l3, int tofree);
314static void free_block(struct kmem_cache *cachep, void **objpp, int len,
315 int node);
316static void enable_cpucache(struct kmem_cache *cachep);
317static void cache_reap(void *unused);
318
312/* 319/*
313 * This function must be completely optimized away if a constant is passed to 320 * This function must be completely optimized away if a constant is passed to
314 * it. Mostly the same as what is in linux/slab.h except it returns an index. 321 * it. Mostly the same as what is in linux/slab.h except it returns an index.
@@ -456,7 +463,7 @@ struct kmem_cache {
456#define STATS_DEC_ACTIVE(x) ((x)->num_active--) 463#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
457#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 464#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
458#define STATS_INC_GROWN(x) ((x)->grown++) 465#define STATS_INC_GROWN(x) ((x)->grown++)
459#define STATS_INC_REAPED(x) ((x)->reaped++) 466#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
460#define STATS_SET_HIGH(x) \ 467#define STATS_SET_HIGH(x) \
461 do { \ 468 do { \
462 if ((x)->num_active > (x)->high_mark) \ 469 if ((x)->num_active > (x)->high_mark) \
@@ -480,7 +487,7 @@ struct kmem_cache {
480#define STATS_DEC_ACTIVE(x) do { } while (0) 487#define STATS_DEC_ACTIVE(x) do { } while (0)
481#define STATS_INC_ALLOCED(x) do { } while (0) 488#define STATS_INC_ALLOCED(x) do { } while (0)
482#define STATS_INC_GROWN(x) do { } while (0) 489#define STATS_INC_GROWN(x) do { } while (0)
483#define STATS_INC_REAPED(x) do { } while (0) 490#define STATS_ADD_REAPED(x,y) do { } while (0)
484#define STATS_SET_HIGH(x) do { } while (0) 491#define STATS_SET_HIGH(x) do { } while (0)
485#define STATS_INC_ERR(x) do { } while (0) 492#define STATS_INC_ERR(x) do { } while (0)
486#define STATS_INC_NODEALLOCS(x) do { } while (0) 493#define STATS_INC_NODEALLOCS(x) do { } while (0)
@@ -700,12 +707,6 @@ int slab_is_available(void)
700 707
701static DEFINE_PER_CPU(struct work_struct, reap_work); 708static DEFINE_PER_CPU(struct work_struct, reap_work);
702 709
703static void free_block(struct kmem_cache *cachep, void **objpp, int len,
704 int node);
705static void enable_cpucache(struct kmem_cache *cachep);
706static void cache_reap(void *unused);
707static int __node_shrink(struct kmem_cache *cachep, int node);
708
709static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 710static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
710{ 711{
711 return cachep->array[smp_processor_id()]; 712 return cachep->array[smp_processor_id()];
@@ -1241,10 +1242,7 @@ free_array_cache:
1241 l3 = cachep->nodelists[node]; 1242 l3 = cachep->nodelists[node];
1242 if (!l3) 1243 if (!l3)
1243 continue; 1244 continue;
1244 spin_lock_irq(&l3->list_lock); 1245 drain_freelist(cachep, l3, l3->free_objects);
1245 /* free slabs belonging to this node */
1246 __node_shrink(cachep, node);
1247 spin_unlock_irq(&l3->list_lock);
1248 } 1246 }
1249 mutex_unlock(&cache_chain_mutex); 1247 mutex_unlock(&cache_chain_mutex);
1250 break; 1248 break;
@@ -1507,7 +1505,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1507 nr_pages = (1 << cachep->gfporder); 1505 nr_pages = (1 << cachep->gfporder);
1508 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1506 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1509 atomic_add(nr_pages, &slab_reclaim_pages); 1507 atomic_add(nr_pages, &slab_reclaim_pages);
1510 add_page_state(nr_slab, nr_pages); 1508 add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
1511 for (i = 0; i < nr_pages; i++) 1509 for (i = 0; i < nr_pages; i++)
1512 __SetPageSlab(page + i); 1510 __SetPageSlab(page + i);
1513 return page_address(page); 1511 return page_address(page);
@@ -1522,12 +1520,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1522 struct page *page = virt_to_page(addr); 1520 struct page *page = virt_to_page(addr);
1523 const unsigned long nr_freed = i; 1521 const unsigned long nr_freed = i;
1524 1522
1523 sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
1525 while (i--) { 1524 while (i--) {
1526 BUG_ON(!PageSlab(page)); 1525 BUG_ON(!PageSlab(page));
1527 __ClearPageSlab(page); 1526 __ClearPageSlab(page);
1528 page++; 1527 page++;
1529 } 1528 }
1530 sub_page_state(nr_slab, nr_freed);
1531 if (current->reclaim_state) 1529 if (current->reclaim_state)
1532 current->reclaim_state->reclaimed_slab += nr_freed; 1530 current->reclaim_state->reclaimed_slab += nr_freed;
1533 free_pages((unsigned long)addr, cachep->gfporder); 1531 free_pages((unsigned long)addr, cachep->gfporder);
@@ -2248,32 +2246,45 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2248 } 2246 }
2249} 2247}
2250 2248
2251static int __node_shrink(struct kmem_cache *cachep, int node) 2249/*
2250 * Remove slabs from the list of free slabs.
2251 * Specify the number of slabs to drain in tofree.
2252 *
2253 * Returns the actual number of slabs released.
2254 */
2255static int drain_freelist(struct kmem_cache *cache,
2256 struct kmem_list3 *l3, int tofree)
2252{ 2257{
2258 struct list_head *p;
2259 int nr_freed;
2253 struct slab *slabp; 2260 struct slab *slabp;
2254 struct kmem_list3 *l3 = cachep->nodelists[node];
2255 int ret;
2256 2261
2257 for (;;) { 2262 nr_freed = 0;
2258 struct list_head *p; 2263 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2259 2264
2265 spin_lock_irq(&l3->list_lock);
2260 p = l3->slabs_free.prev; 2266 p = l3->slabs_free.prev;
2261 if (p == &l3->slabs_free) 2267 if (p == &l3->slabs_free) {
2262 break; 2268 spin_unlock_irq(&l3->list_lock);
2269 goto out;
2270 }
2263 2271
2264 slabp = list_entry(l3->slabs_free.prev, struct slab, list); 2272 slabp = list_entry(p, struct slab, list);
2265#if DEBUG 2273#if DEBUG
2266 BUG_ON(slabp->inuse); 2274 BUG_ON(slabp->inuse);
2267#endif 2275#endif
2268 list_del(&slabp->list); 2276 list_del(&slabp->list);
2269 2277 /*
2270 l3->free_objects -= cachep->num; 2278 * Safe to drop the lock. The slab is no longer linked
2279 * to the cache.
2280 */
2281 l3->free_objects -= cache->num;
2271 spin_unlock_irq(&l3->list_lock); 2282 spin_unlock_irq(&l3->list_lock);
2272 slab_destroy(cachep, slabp); 2283 slab_destroy(cache, slabp);
2273 spin_lock_irq(&l3->list_lock); 2284 nr_freed++;
2274 } 2285 }
2275 ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial); 2286out:
2276 return ret; 2287 return nr_freed;
2277} 2288}
2278 2289
2279static int __cache_shrink(struct kmem_cache *cachep) 2290static int __cache_shrink(struct kmem_cache *cachep)
@@ -2286,11 +2297,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
2286 check_irq_on(); 2297 check_irq_on();
2287 for_each_online_node(i) { 2298 for_each_online_node(i) {
2288 l3 = cachep->nodelists[i]; 2299 l3 = cachep->nodelists[i];
2289 if (l3) { 2300 if (!l3)
2290 spin_lock_irq(&l3->list_lock); 2301 continue;
2291 ret += __node_shrink(cachep, i); 2302
2292 spin_unlock_irq(&l3->list_lock); 2303 drain_freelist(cachep, l3, l3->free_objects);
2293 } 2304
2305 ret += !list_empty(&l3->slabs_full) ||
2306 !list_empty(&l3->slabs_partial);
2294 } 2307 }
2295 return (ret ? 1 : 0); 2308 return (ret ? 1 : 0);
2296} 2309}
@@ -3694,10 +3707,6 @@ static void cache_reap(void *unused)
3694 } 3707 }
3695 3708
3696 list_for_each_entry(searchp, &cache_chain, next) { 3709 list_for_each_entry(searchp, &cache_chain, next) {
3697 struct list_head *p;
3698 int tofree;
3699 struct slab *slabp;
3700
3701 check_irq_on(); 3710 check_irq_on();
3702 3711
3703 /* 3712 /*
@@ -3722,47 +3731,22 @@ static void cache_reap(void *unused)
3722 3731
3723 drain_array(searchp, l3, l3->shared, 0, node); 3732 drain_array(searchp, l3, l3->shared, 0, node);
3724 3733
3725 if (l3->free_touched) { 3734 if (l3->free_touched)
3726 l3->free_touched = 0; 3735 l3->free_touched = 0;
3727 goto next; 3736 else {
3728 } 3737 int freed;
3729
3730 tofree = (l3->free_limit + 5 * searchp->num - 1) /
3731 (5 * searchp->num);
3732 do {
3733 /*
3734 * Do not lock if there are no free blocks.
3735 */
3736 if (list_empty(&l3->slabs_free))
3737 break;
3738
3739 spin_lock_irq(&l3->list_lock);
3740 p = l3->slabs_free.next;
3741 if (p == &(l3->slabs_free)) {
3742 spin_unlock_irq(&l3->list_lock);
3743 break;
3744 }
3745 3738
3746 slabp = list_entry(p, struct slab, list); 3739 freed = drain_freelist(searchp, l3, (l3->free_limit +
3747 BUG_ON(slabp->inuse); 3740 5 * searchp->num - 1) / (5 * searchp->num));
3748 list_del(&slabp->list); 3741 STATS_ADD_REAPED(searchp, freed);
3749 STATS_INC_REAPED(searchp); 3742 }
3750
3751 /*
3752 * Safe to drop the lock. The slab is no longer linked
3753 * to the cache. searchp cannot disappear, we hold
3754 * cache_chain_lock
3755 */
3756 l3->free_objects -= searchp->num;
3757 spin_unlock_irq(&l3->list_lock);
3758 slab_destroy(searchp, slabp);
3759 } while (--tofree > 0);
3760next: 3743next:
3761 cond_resched(); 3744 cond_resched();
3762 } 3745 }
3763 check_irq_on(); 3746 check_irq_on();
3764 mutex_unlock(&cache_chain_mutex); 3747 mutex_unlock(&cache_chain_mutex);
3765 next_reap_node(); 3748 next_reap_node();
3749 refresh_cpu_vm_stats(smp_processor_id());
3766 /* Set up the next iteration */ 3750 /* Set up the next iteration */
3767 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3751 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3768} 3752}
diff --git a/mm/swap.c b/mm/swap.c
index 990868afc1c6..8fd095c4ae51 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -87,7 +87,7 @@ int rotate_reclaimable_page(struct page *page)
87 spin_lock_irqsave(&zone->lru_lock, flags); 87 spin_lock_irqsave(&zone->lru_lock, flags);
88 if (PageLRU(page) && !PageActive(page)) { 88 if (PageLRU(page) && !PageActive(page)) {
89 list_move_tail(&page->lru, &zone->inactive_list); 89 list_move_tail(&page->lru, &zone->inactive_list);
90 inc_page_state(pgrotated); 90 __count_vm_event(PGROTATED);
91 } 91 }
92 if (!test_clear_page_writeback(page)) 92 if (!test_clear_page_writeback(page))
93 BUG(); 93 BUG();
@@ -107,7 +107,7 @@ void fastcall activate_page(struct page *page)
107 del_page_from_inactive_list(zone, page); 107 del_page_from_inactive_list(zone, page);
108 SetPageActive(page); 108 SetPageActive(page);
109 add_page_to_active_list(zone, page); 109 add_page_to_active_list(zone, page);
110 inc_page_state(pgactivate); 110 __count_vm_event(PGACTIVATE);
111 } 111 }
112 spin_unlock_irq(&zone->lru_lock); 112 spin_unlock_irq(&zone->lru_lock);
113} 113}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7535211bb495..fccbd9bba77b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -87,7 +87,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
87 SetPageSwapCache(page); 87 SetPageSwapCache(page);
88 set_page_private(page, entry.val); 88 set_page_private(page, entry.val);
89 total_swapcache_pages++; 89 total_swapcache_pages++;
90 pagecache_acct(1); 90 __inc_zone_page_state(page, NR_FILE_PAGES);
91 } 91 }
92 write_unlock_irq(&swapper_space.tree_lock); 92 write_unlock_irq(&swapper_space.tree_lock);
93 radix_tree_preload_end(); 93 radix_tree_preload_end();
@@ -132,7 +132,7 @@ void __delete_from_swap_cache(struct page *page)
132 set_page_private(page, 0); 132 set_page_private(page, 0);
133 ClearPageSwapCache(page); 133 ClearPageSwapCache(page);
134 total_swapcache_pages--; 134 total_swapcache_pages--;
135 pagecache_acct(-1); 135 __dec_zone_page_state(page, NR_FILE_PAGES);
136 INC_CACHE_INFO(del_total); 136 INC_CACHE_INFO(del_total);
137} 137}
138 138
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eeacb0d695c3..ff2ebe9458a3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -47,8 +47,6 @@ struct scan_control {
47 /* Incremented by the number of inactive pages that were scanned */ 47 /* Incremented by the number of inactive pages that were scanned */
48 unsigned long nr_scanned; 48 unsigned long nr_scanned;
49 49
50 unsigned long nr_mapped; /* From page_state */
51
52 /* This context's GFP mask */ 50 /* This context's GFP mask */
53 gfp_t gfp_mask; 51 gfp_t gfp_mask;
54 52
@@ -217,7 +215,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
217 break; 215 break;
218 if (shrink_ret < nr_before) 216 if (shrink_ret < nr_before)
219 ret += nr_before - shrink_ret; 217 ret += nr_before - shrink_ret;
220 mod_page_state(slabs_scanned, this_scan); 218 count_vm_events(SLABS_SCANNED, this_scan);
221 total_scan -= this_scan; 219 total_scan -= this_scan;
222 220
223 cond_resched(); 221 cond_resched();
@@ -571,7 +569,7 @@ keep:
571 list_splice(&ret_pages, page_list); 569 list_splice(&ret_pages, page_list);
572 if (pagevec_count(&freed_pvec)) 570 if (pagevec_count(&freed_pvec))
573 __pagevec_release_nonlru(&freed_pvec); 571 __pagevec_release_nonlru(&freed_pvec);
574 mod_page_state(pgactivate, pgactivate); 572 count_vm_events(PGACTIVATE, pgactivate);
575 return nr_reclaimed; 573 return nr_reclaimed;
576} 574}
577 575
@@ -661,11 +659,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
661 nr_reclaimed += nr_freed; 659 nr_reclaimed += nr_freed;
662 local_irq_disable(); 660 local_irq_disable();
663 if (current_is_kswapd()) { 661 if (current_is_kswapd()) {
664 __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); 662 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
665 __mod_page_state(kswapd_steal, nr_freed); 663 __count_vm_events(KSWAPD_STEAL, nr_freed);
666 } else 664 } else
667 __mod_page_state_zone(zone, pgscan_direct, nr_scan); 665 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
668 __mod_page_state_zone(zone, pgsteal, nr_freed); 666 __count_vm_events(PGACTIVATE, nr_freed);
669 667
670 if (nr_taken == 0) 668 if (nr_taken == 0)
671 goto done; 669 goto done;
@@ -744,7 +742,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
744 * how much memory 742 * how much memory
745 * is mapped. 743 * is mapped.
746 */ 744 */
747 mapped_ratio = (sc->nr_mapped * 100) / vm_total_pages; 745 mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
746 global_page_state(NR_ANON_PAGES)) * 100) /
747 vm_total_pages;
748 748
749 /* 749 /*
750 * Now decide how much we really want to unmap some pages. The 750 * Now decide how much we really want to unmap some pages. The
@@ -841,11 +841,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
841 } 841 }
842 } 842 }
843 zone->nr_active += pgmoved; 843 zone->nr_active += pgmoved;
844 spin_unlock(&zone->lru_lock);
845 844
846 __mod_page_state_zone(zone, pgrefill, pgscanned); 845 __count_zone_vm_events(PGREFILL, zone, pgscanned);
847 __mod_page_state(pgdeactivate, pgdeactivate); 846 __count_vm_events(PGDEACTIVATE, pgdeactivate);
848 local_irq_enable(); 847 spin_unlock_irq(&zone->lru_lock);
849 848
850 pagevec_release(&pvec); 849 pagevec_release(&pvec);
851} 850}
@@ -977,7 +976,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
977 .swappiness = vm_swappiness, 976 .swappiness = vm_swappiness,
978 }; 977 };
979 978
980 inc_page_state(allocstall); 979 count_vm_event(ALLOCSTALL);
981 980
982 for (i = 0; zones[i] != NULL; i++) { 981 for (i = 0; zones[i] != NULL; i++) {
983 struct zone *zone = zones[i]; 982 struct zone *zone = zones[i];
@@ -990,7 +989,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
990 } 989 }
991 990
992 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 991 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
993 sc.nr_mapped = read_page_state(nr_mapped);
994 sc.nr_scanned = 0; 992 sc.nr_scanned = 0;
995 if (!priority) 993 if (!priority)
996 disable_swap_token(); 994 disable_swap_token();
@@ -1075,9 +1073,7 @@ loop_again:
1075 total_scanned = 0; 1073 total_scanned = 0;
1076 nr_reclaimed = 0; 1074 nr_reclaimed = 0;
1077 sc.may_writepage = !laptop_mode; 1075 sc.may_writepage = !laptop_mode;
1078 sc.nr_mapped = read_page_state(nr_mapped); 1076 count_vm_event(PAGEOUTRUN);
1079
1080 inc_page_state(pageoutrun);
1081 1077
1082 for (i = 0; i < pgdat->nr_zones; i++) { 1078 for (i = 0; i < pgdat->nr_zones; i++) {
1083 struct zone *zone = pgdat->node_zones + i; 1079 struct zone *zone = pgdat->node_zones + i;
@@ -1365,7 +1361,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1365 for_each_zone(zone) 1361 for_each_zone(zone)
1366 lru_pages += zone->nr_active + zone->nr_inactive; 1362 lru_pages += zone->nr_active + zone->nr_inactive;
1367 1363
1368 nr_slab = read_page_state(nr_slab); 1364 nr_slab = global_page_state(NR_SLAB);
1369 /* If slab caches are huge, it's better to hit them first */ 1365 /* If slab caches are huge, it's better to hit them first */
1370 while (nr_slab >= lru_pages) { 1366 while (nr_slab >= lru_pages) {
1371 reclaim_state.reclaimed_slab = 0; 1367 reclaim_state.reclaimed_slab = 0;
@@ -1407,9 +1403,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1407 for (prio = DEF_PRIORITY; prio >= 0; prio--) { 1403 for (prio = DEF_PRIORITY; prio >= 0; prio--) {
1408 unsigned long nr_to_scan = nr_pages - ret; 1404 unsigned long nr_to_scan = nr_pages - ret;
1409 1405
1410 sc.nr_mapped = read_page_state(nr_mapped);
1411 sc.nr_scanned = 0; 1406 sc.nr_scanned = 0;
1412
1413 ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); 1407 ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
1414 if (ret >= nr_pages) 1408 if (ret >= nr_pages)
1415 goto out; 1409 goto out;
@@ -1523,11 +1517,6 @@ int zone_reclaim_mode __read_mostly;
1523#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ 1517#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
1524 1518
1525/* 1519/*
1526 * Mininum time between zone reclaim scans
1527 */
1528int zone_reclaim_interval __read_mostly = 30*HZ;
1529
1530/*
1531 * Priority for ZONE_RECLAIM. This determines the fraction of pages 1520 * Priority for ZONE_RECLAIM. This determines the fraction of pages
1532 * of a node considered for each zone_reclaim. 4 scans 1/16th of 1521 * of a node considered for each zone_reclaim. 4 scans 1/16th of
1533 * a zone. 1522 * a zone.
@@ -1548,7 +1537,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1548 struct scan_control sc = { 1537 struct scan_control sc = {
1549 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), 1538 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
1550 .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), 1539 .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
1551 .nr_mapped = read_page_state(nr_mapped),
1552 .swap_cluster_max = max_t(unsigned long, nr_pages, 1540 .swap_cluster_max = max_t(unsigned long, nr_pages,
1553 SWAP_CLUSTER_MAX), 1541 SWAP_CLUSTER_MAX),
1554 .gfp_mask = gfp_mask, 1542 .gfp_mask = gfp_mask,
@@ -1593,16 +1581,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1593 1581
1594 p->reclaim_state = NULL; 1582 p->reclaim_state = NULL;
1595 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); 1583 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
1596
1597 if (nr_reclaimed == 0) {
1598 /*
1599 * We were unable to reclaim enough pages to stay on node. We
1600 * now allow off node accesses for a certain time period before
1601 * trying again to reclaim pages from the local zone.
1602 */
1603 zone->last_unsuccessful_zone_reclaim = jiffies;
1604 }
1605
1606 return nr_reclaimed >= nr_pages; 1584 return nr_reclaimed >= nr_pages;
1607} 1585}
1608 1586
@@ -1612,13 +1590,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1612 int node_id; 1590 int node_id;
1613 1591
1614 /* 1592 /*
1615 * Do not reclaim if there was a recent unsuccessful attempt at zone 1593 * Do not reclaim if there are not enough reclaimable pages in this
1616 * reclaim. In that case we let allocations go off node for the 1594 * zone that would satify this allocations.
1617 * zone_reclaim_interval. Otherwise we would scan for each off-node 1595 *
1618 * page allocation. 1596 * All unmapped pagecache pages are reclaimable.
1597 *
1598 * Both counters may be temporarily off a bit so we use
1599 * SWAP_CLUSTER_MAX as the boundary. It may also be good to
1600 * leave a few frequently used unmapped pagecache pages around.
1619 */ 1601 */
1620 if (time_before(jiffies, 1602 if (zone_page_state(zone, NR_FILE_PAGES) -
1621 zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) 1603 zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX)
1622 return 0; 1604 return 0;
1623 1605
1624 /* 1606 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
new file mode 100644
index 000000000000..73b83d67bab6
--- /dev/null
+++ b/mm/vmstat.c
@@ -0,0 +1,614 @@
1/*
2 * linux/mm/vmstat.c
3 *
4 * Manages VM statistics
5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 *
7 * zoned VM statistics
8 * Copyright (C) 2006 Silicon Graphics, Inc.,
9 * Christoph Lameter <christoph@lameter.com>
10 */
11
12#include <linux/config.h>
13#include <linux/mm.h>
14#include <linux/module.h>
15
16void __get_zone_counts(unsigned long *active, unsigned long *inactive,
17 unsigned long *free, struct pglist_data *pgdat)
18{
19 struct zone *zones = pgdat->node_zones;
20 int i;
21
22 *active = 0;
23 *inactive = 0;
24 *free = 0;
25 for (i = 0; i < MAX_NR_ZONES; i++) {
26 *active += zones[i].nr_active;
27 *inactive += zones[i].nr_inactive;
28 *free += zones[i].free_pages;
29 }
30}
31
32void get_zone_counts(unsigned long *active,
33 unsigned long *inactive, unsigned long *free)
34{
35 struct pglist_data *pgdat;
36
37 *active = 0;
38 *inactive = 0;
39 *free = 0;
40 for_each_online_pgdat(pgdat) {
41 unsigned long l, m, n;
42 __get_zone_counts(&l, &m, &n, pgdat);
43 *active += l;
44 *inactive += m;
45 *free += n;
46 }
47}
48
49#ifdef CONFIG_VM_EVENT_COUNTERS
50DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
51EXPORT_PER_CPU_SYMBOL(vm_event_states);
52
53static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
54{
55 int cpu = 0;
56 int i;
57
58 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
59
60 cpu = first_cpu(*cpumask);
61 while (cpu < NR_CPUS) {
62 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
63
64 cpu = next_cpu(cpu, *cpumask);
65
66 if (cpu < NR_CPUS)
67 prefetch(&per_cpu(vm_event_states, cpu));
68
69
70 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
71 ret[i] += this->event[i];
72 }
73}
74
75/*
76 * Accumulate the vm event counters across all CPUs.
77 * The result is unavoidably approximate - it can change
78 * during and after execution of this function.
79*/
80void all_vm_events(unsigned long *ret)
81{
82 sum_vm_events(ret, &cpu_online_map);
83}
84
85#ifdef CONFIG_HOTPLUG
86/*
87 * Fold the foreign cpu events into our own.
88 *
89 * This is adding to the events on one processor
90 * but keeps the global counts constant.
91 */
92void vm_events_fold_cpu(int cpu)
93{
94 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
95 int i;
96
97 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
98 count_vm_events(i, fold_state->event[i]);
99 fold_state->event[i] = 0;
100 }
101}
102#endif /* CONFIG_HOTPLUG */
103
104#endif /* CONFIG_VM_EVENT_COUNTERS */
105
106/*
107 * Manage combined zone based / global counters
108 *
109 * vm_stat contains the global counters
110 */
111atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
112EXPORT_SYMBOL(vm_stat);
113
114#ifdef CONFIG_SMP
115
116#define STAT_THRESHOLD 32
117
118/*
119 * Determine pointer to currently valid differential byte given a zone and
120 * the item number.
121 *
122 * Preemption must be off
123 */
124static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
125{
126 return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
127}
128
129/*
130 * For use when we know that interrupts are disabled.
131 */
132void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
133 int delta)
134{
135 s8 *p;
136 long x;
137
138 p = diff_pointer(zone, item);
139 x = delta + *p;
140
141 if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
142 zone_page_state_add(x, zone, item);
143 x = 0;
144 }
145
146 *p = x;
147}
148EXPORT_SYMBOL(__mod_zone_page_state);
149
150/*
151 * For an unknown interrupt state
152 */
153void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
154 int delta)
155{
156 unsigned long flags;
157
158 local_irq_save(flags);
159 __mod_zone_page_state(zone, item, delta);
160 local_irq_restore(flags);
161}
162EXPORT_SYMBOL(mod_zone_page_state);
163
164/*
165 * Optimized increment and decrement functions.
166 *
167 * These are only for a single page and therefore can take a struct page *
168 * argument instead of struct zone *. This allows the inclusion of the code
169 * generated for page_zone(page) into the optimized functions.
170 *
171 * No overflow check is necessary and therefore the differential can be
172 * incremented or decremented in place which may allow the compilers to
173 * generate better code.
174 *
175 * The increment or decrement is known and therefore one boundary check can
176 * be omitted.
177 *
178 * Some processors have inc/dec instructions that are atomic vs an interrupt.
179 * However, the code must first determine the differential location in a zone
180 * based on the processor number and then inc/dec the counter. There is no
181 * guarantee without disabling preemption that the processor will not change
182 * in between and therefore the atomicity vs. interrupt cannot be exploited
183 * in a useful way here.
184 */
185static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
186{
187 s8 *p = diff_pointer(zone, item);
188
189 (*p)++;
190
191 if (unlikely(*p > STAT_THRESHOLD)) {
192 zone_page_state_add(*p, zone, item);
193 *p = 0;
194 }
195}
196
197void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
198{
199 __inc_zone_state(page_zone(page), item);
200}
201EXPORT_SYMBOL(__inc_zone_page_state);
202
203void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
204{
205 struct zone *zone = page_zone(page);
206 s8 *p = diff_pointer(zone, item);
207
208 (*p)--;
209
210 if (unlikely(*p < -STAT_THRESHOLD)) {
211 zone_page_state_add(*p, zone, item);
212 *p = 0;
213 }
214}
215EXPORT_SYMBOL(__dec_zone_page_state);
216
217void inc_zone_state(struct zone *zone, enum zone_stat_item item)
218{
219 unsigned long flags;
220
221 local_irq_save(flags);
222 __inc_zone_state(zone, item);
223 local_irq_restore(flags);
224}
225
226void inc_zone_page_state(struct page *page, enum zone_stat_item item)
227{
228 unsigned long flags;
229 struct zone *zone;
230
231 zone = page_zone(page);
232 local_irq_save(flags);
233 __inc_zone_state(zone, item);
234 local_irq_restore(flags);
235}
236EXPORT_SYMBOL(inc_zone_page_state);
237
238void dec_zone_page_state(struct page *page, enum zone_stat_item item)
239{
240 unsigned long flags;
241 struct zone *zone;
242 s8 *p;
243
244 zone = page_zone(page);
245 local_irq_save(flags);
246 p = diff_pointer(zone, item);
247
248 (*p)--;
249
250 if (unlikely(*p < -STAT_THRESHOLD)) {
251 zone_page_state_add(*p, zone, item);
252 *p = 0;
253 }
254 local_irq_restore(flags);
255}
256EXPORT_SYMBOL(dec_zone_page_state);
257
258/*
259 * Update the zone counters for one cpu.
260 */
261void refresh_cpu_vm_stats(int cpu)
262{
263 struct zone *zone;
264 int i;
265 unsigned long flags;
266
267 for_each_zone(zone) {
268 struct per_cpu_pageset *pcp;
269
270 pcp = zone_pcp(zone, cpu);
271
272 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
273 if (pcp->vm_stat_diff[i]) {
274 local_irq_save(flags);
275 zone_page_state_add(pcp->vm_stat_diff[i],
276 zone, i);
277 pcp->vm_stat_diff[i] = 0;
278 local_irq_restore(flags);
279 }
280 }
281}
282
283static void __refresh_cpu_vm_stats(void *dummy)
284{
285 refresh_cpu_vm_stats(smp_processor_id());
286}
287
288/*
289 * Consolidate all counters.
290 *
291 * Note that the result is less inaccurate but still inaccurate
292 * if concurrent processes are allowed to run.
293 */
294void refresh_vm_stats(void)
295{
296 on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
297}
298EXPORT_SYMBOL(refresh_vm_stats);
299
300#endif
301
302#ifdef CONFIG_NUMA
303/*
304 * zonelist = the list of zones passed to the allocator
305 * z = the zone from which the allocation occurred.
306 *
307 * Must be called with interrupts disabled.
308 */
309void zone_statistics(struct zonelist *zonelist, struct zone *z)
310{
311 if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
312 __inc_zone_state(z, NUMA_HIT);
313 } else {
314 __inc_zone_state(z, NUMA_MISS);
315 __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
316 }
317 if (z->zone_pgdat == NODE_DATA(numa_node_id()))
318 __inc_zone_state(z, NUMA_LOCAL);
319 else
320 __inc_zone_state(z, NUMA_OTHER);
321}
322#endif
323
324#ifdef CONFIG_PROC_FS
325
326#include <linux/seq_file.h>
327
328static void *frag_start(struct seq_file *m, loff_t *pos)
329{
330 pg_data_t *pgdat;
331 loff_t node = *pos;
332 for (pgdat = first_online_pgdat();
333 pgdat && node;
334 pgdat = next_online_pgdat(pgdat))
335 --node;
336
337 return pgdat;
338}
339
340static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
341{
342 pg_data_t *pgdat = (pg_data_t *)arg;
343
344 (*pos)++;
345 return next_online_pgdat(pgdat);
346}
347
348static void frag_stop(struct seq_file *m, void *arg)
349{
350}
351
352/*
353 * This walks the free areas for each zone.
354 */
355static int frag_show(struct seq_file *m, void *arg)
356{
357 pg_data_t *pgdat = (pg_data_t *)arg;
358 struct zone *zone;
359 struct zone *node_zones = pgdat->node_zones;
360 unsigned long flags;
361 int order;
362
363 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
364 if (!populated_zone(zone))
365 continue;
366
367 spin_lock_irqsave(&zone->lock, flags);
368 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
369 for (order = 0; order < MAX_ORDER; ++order)
370 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
371 spin_unlock_irqrestore(&zone->lock, flags);
372 seq_putc(m, '\n');
373 }
374 return 0;
375}
376
377struct seq_operations fragmentation_op = {
378 .start = frag_start,
379 .next = frag_next,
380 .stop = frag_stop,
381 .show = frag_show,
382};
383
384static char *vmstat_text[] = {
385 /* Zoned VM counters */
386 "nr_anon_pages",
387 "nr_mapped",
388 "nr_file_pages",
389 "nr_slab",
390 "nr_page_table_pages",
391 "nr_dirty",
392 "nr_writeback",
393 "nr_unstable",
394 "nr_bounce",
395
396#ifdef CONFIG_NUMA
397 "numa_hit",
398 "numa_miss",
399 "numa_foreign",
400 "numa_interleave",
401 "numa_local",
402 "numa_other",
403#endif
404
405#ifdef CONFIG_VM_EVENT_COUNTERS
406 "pgpgin",
407 "pgpgout",
408 "pswpin",
409 "pswpout",
410
411 "pgalloc_dma",
412 "pgalloc_dma32",
413 "pgalloc_normal",
414 "pgalloc_high",
415
416 "pgfree",
417 "pgactivate",
418 "pgdeactivate",
419
420 "pgfault",
421 "pgmajfault",
422
423 "pgrefill_dma",
424 "pgrefill_dma32",
425 "pgrefill_normal",
426 "pgrefill_high",
427
428 "pgsteal_dma",
429 "pgsteal_dma32",
430 "pgsteal_normal",
431 "pgsteal_high",
432
433 "pgscan_kswapd_dma",
434 "pgscan_kswapd_dma32",
435 "pgscan_kswapd_normal",
436 "pgscan_kswapd_high",
437
438 "pgscan_direct_dma",
439 "pgscan_direct_dma32",
440 "pgscan_direct_normal",
441 "pgscan_direct_high",
442
443 "pginodesteal",
444 "slabs_scanned",
445 "kswapd_steal",
446 "kswapd_inodesteal",
447 "pageoutrun",
448 "allocstall",
449
450 "pgrotated",
451#endif
452};
453
454/*
455 * Output information about zones in @pgdat.
456 */
457static int zoneinfo_show(struct seq_file *m, void *arg)
458{
459 pg_data_t *pgdat = arg;
460 struct zone *zone;
461 struct zone *node_zones = pgdat->node_zones;
462 unsigned long flags;
463
464 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
465 int i;
466
467 if (!populated_zone(zone))
468 continue;
469
470 spin_lock_irqsave(&zone->lock, flags);
471 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
472 seq_printf(m,
473 "\n pages free %lu"
474 "\n min %lu"
475 "\n low %lu"
476 "\n high %lu"
477 "\n active %lu"
478 "\n inactive %lu"
479 "\n scanned %lu (a: %lu i: %lu)"
480 "\n spanned %lu"
481 "\n present %lu",
482 zone->free_pages,
483 zone->pages_min,
484 zone->pages_low,
485 zone->pages_high,
486 zone->nr_active,
487 zone->nr_inactive,
488 zone->pages_scanned,
489 zone->nr_scan_active, zone->nr_scan_inactive,
490 zone->spanned_pages,
491 zone->present_pages);
492
493 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
494 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
495 zone_page_state(zone, i));
496
497 seq_printf(m,
498 "\n protection: (%lu",
499 zone->lowmem_reserve[0]);
500 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
501 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
502 seq_printf(m,
503 ")"
504 "\n pagesets");
505 for_each_online_cpu(i) {
506 struct per_cpu_pageset *pageset;
507 int j;
508
509 pageset = zone_pcp(zone, i);
510 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
511 if (pageset->pcp[j].count)
512 break;
513 }
514 if (j == ARRAY_SIZE(pageset->pcp))
515 continue;
516 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
517 seq_printf(m,
518 "\n cpu: %i pcp: %i"
519 "\n count: %i"
520 "\n high: %i"
521 "\n batch: %i",
522 i, j,
523 pageset->pcp[j].count,
524 pageset->pcp[j].high,
525 pageset->pcp[j].batch);
526 }
527 }
528 seq_printf(m,
529 "\n all_unreclaimable: %u"
530 "\n prev_priority: %i"
531 "\n temp_priority: %i"
532 "\n start_pfn: %lu",
533 zone->all_unreclaimable,
534 zone->prev_priority,
535 zone->temp_priority,
536 zone->zone_start_pfn);
537 spin_unlock_irqrestore(&zone->lock, flags);
538 seq_putc(m, '\n');
539 }
540 return 0;
541}
542
543struct seq_operations zoneinfo_op = {
544 .start = frag_start, /* iterate over all zones. The same as in
545 * fragmentation. */
546 .next = frag_next,
547 .stop = frag_stop,
548 .show = zoneinfo_show,
549};
550
551static void *vmstat_start(struct seq_file *m, loff_t *pos)
552{
553 unsigned long *v;
554#ifdef CONFIG_VM_EVENT_COUNTERS
555 unsigned long *e;
556#endif
557 int i;
558
559 if (*pos >= ARRAY_SIZE(vmstat_text))
560 return NULL;
561
562#ifdef CONFIG_VM_EVENT_COUNTERS
563 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
564 + sizeof(struct vm_event_state), GFP_KERNEL);
565#else
566 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
567 GFP_KERNEL);
568#endif
569 m->private = v;
570 if (!v)
571 return ERR_PTR(-ENOMEM);
572 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
573 v[i] = global_page_state(i);
574#ifdef CONFIG_VM_EVENT_COUNTERS
575 e = v + NR_VM_ZONE_STAT_ITEMS;
576 all_vm_events(e);
577 e[PGPGIN] /= 2; /* sectors -> kbytes */
578 e[PGPGOUT] /= 2;
579#endif
580 return v + *pos;
581}
582
583static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
584{
585 (*pos)++;
586 if (*pos >= ARRAY_SIZE(vmstat_text))
587 return NULL;
588 return (unsigned long *)m->private + *pos;
589}
590
591static int vmstat_show(struct seq_file *m, void *arg)
592{
593 unsigned long *l = arg;
594 unsigned long off = l - (unsigned long *)m->private;
595
596 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
597 return 0;
598}
599
600static void vmstat_stop(struct seq_file *m, void *arg)
601{
602 kfree(m->private);
603 m->private = NULL;
604}
605
606struct seq_operations vmstat_op = {
607 .start = vmstat_start,
608 .next = vmstat_next,
609 .stop = vmstat_stop,
610 .show = vmstat_show,
611};
612
613#endif /* CONFIG_PROC_FS */
614