aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/highmem.c3
-rw-r--r--mm/memory.c9
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mempool.c6
-rw-r--r--mm/page-writeback.c17
-rw-r--r--mm/page_alloc.c97
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slab.c246
-rw-r--r--mm/truncate.c11
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c51
-rw-r--r--mm/vmstat.c70
15 files changed, 252 insertions, 282 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index db7c55de92cd..7942b333e46c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -157,3 +157,9 @@ config RESOURCES_64BIT
157 default 64BIT 157 default 64BIT
158 help 158 help
159 This option allows memory and IO resources to be 64 bit. 159 This option allows memory and IO resources to be 64 bit.
160
161config ZONE_DMA_FLAG
162 int
163 default "0" if !ZONE_DMA
164 default "1"
165
diff --git a/mm/filemap.c b/mm/filemap.c
index f30ef28405d3..00414849a867 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -327,7 +327,7 @@ EXPORT_SYMBOL(sync_page_range);
327 * @pos: beginning offset in pages to write 327 * @pos: beginning offset in pages to write
328 * @count: number of bytes to write 328 * @count: number of bytes to write
329 * 329 *
330 * Note: Holding i_mutex across sync_page_range_nolock is not a good idea 330 * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
331 * as it forces O_SYNC writers to different parts of the same file 331 * as it forces O_SYNC writers to different parts of the same file
332 * to be serialised right until io completion. 332 * to be serialised right until io completion.
333 */ 333 */
@@ -784,7 +784,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
784 * @mapping: target address_space 784 * @mapping: target address_space
785 * @index: the page index 785 * @index: the page index
786 * 786 *
787 * Same as grab_cache_page, but do not wait if the page is unavailable. 787 * Same as grab_cache_page(), but do not wait if the page is unavailable.
788 * This is intended for speculative data generators, where the data can 788 * This is intended for speculative data generators, where the data can
789 * be regenerated if the page couldn't be grabbed. This routine should 789 * be regenerated if the page couldn't be grabbed. This routine should
790 * be safe to call while holding the lock for another page. 790 * be safe to call while holding the lock for another page.
diff --git a/mm/highmem.c b/mm/highmem.c
index 0206e7e5018c..51e1c1995fec 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -47,7 +47,8 @@ unsigned int nr_free_highpages (void)
47 unsigned int pages = 0; 47 unsigned int pages = 0;
48 48
49 for_each_online_pgdat(pgdat) 49 for_each_online_pgdat(pgdat)
50 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; 50 pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
51 NR_FREE_PAGES);
51 52
52 return pages; 53 return pages;
53} 54}
diff --git a/mm/memory.c b/mm/memory.c
index ef09f0acb1d8..072c1135ad37 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -678,7 +678,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
678 if (pte_dirty(ptent)) 678 if (pte_dirty(ptent))
679 set_page_dirty(page); 679 set_page_dirty(page);
680 if (pte_young(ptent)) 680 if (pte_young(ptent))
681 mark_page_accessed(page); 681 SetPageReferenced(page);
682 file_rss--; 682 file_rss--;
683 } 683 }
684 page_remove_rmap(page, vma); 684 page_remove_rmap(page, vma);
@@ -1531,8 +1531,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1531 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) 1531 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1532 goto unwritable_page; 1532 goto unwritable_page;
1533 1533
1534 page_cache_release(old_page);
1535
1536 /* 1534 /*
1537 * Since we dropped the lock we need to revalidate 1535 * Since we dropped the lock we need to revalidate
1538 * the PTE as someone else may have changed it. If 1536 * the PTE as someone else may have changed it. If
@@ -1541,6 +1539,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1541 */ 1539 */
1542 page_table = pte_offset_map_lock(mm, pmd, address, 1540 page_table = pte_offset_map_lock(mm, pmd, address,
1543 &ptl); 1541 &ptl);
1542 page_cache_release(old_page);
1544 if (!pte_same(*page_table, orig_pte)) 1543 if (!pte_same(*page_table, orig_pte))
1545 goto unlock; 1544 goto unlock;
1546 } 1545 }
@@ -1776,9 +1775,7 @@ restart:
1776} 1775}
1777 1776
1778/** 1777/**
1779 * unmap_mapping_range - unmap the portion of all mmaps 1778 * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
1780 * in the specified address_space corresponding to the specified
1781 * page range in the underlying file.
1782 * @mapping: the address space containing mmaps to be unmapped. 1779 * @mapping: the address space containing mmaps to be unmapped.
1783 * @holebegin: byte in first page to unmap, relative to the start of 1780 * @holebegin: byte in first page to unmap, relative to the start of
1784 * the underlying file. This will be rounded down to a PAGE_SIZE 1781 * the underlying file. This will be rounded down to a PAGE_SIZE
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c2aec0e1090d..259a706bd83e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
105 105
106/* Highest zone. An specific allocation for a zone below that is not 106/* Highest zone. An specific allocation for a zone below that is not
107 policied. */ 107 policied. */
108enum zone_type policy_zone = ZONE_DMA; 108enum zone_type policy_zone = 0;
109 109
110struct mempolicy default_policy = { 110struct mempolicy default_policy = {
111 .refcnt = ATOMIC_INIT(1), /* never free it */ 111 .refcnt = ATOMIC_INIT(1), /* never free it */
diff --git a/mm/mempool.c b/mm/mempool.c
index ccd8cb8cd41f..cc1ca86dfc24 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -46,9 +46,9 @@ static void free_pool(mempool_t *pool)
46 * @pool_data: optional private data available to the user-defined functions. 46 * @pool_data: optional private data available to the user-defined functions.
47 * 47 *
48 * this function creates and allocates a guaranteed size, preallocated 48 * this function creates and allocates a guaranteed size, preallocated
49 * memory pool. The pool can be used from the mempool_alloc and mempool_free 49 * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
50 * functions. This function might sleep. Both the alloc_fn() and the free_fn() 50 * functions. This function might sleep. Both the alloc_fn() and the free_fn()
51 * functions might sleep - as long as the mempool_alloc function is not called 51 * functions might sleep - as long as the mempool_alloc() function is not called
52 * from IRQ contexts. 52 * from IRQ contexts.
53 */ 53 */
54mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 54mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(mempool_destroy);
195 * mempool_create(). 195 * mempool_create().
196 * @gfp_mask: the usual allocation bitmask. 196 * @gfp_mask: the usual allocation bitmask.
197 * 197 *
198 * this function only sleeps if the alloc_fn function sleeps or 198 * this function only sleeps if the alloc_fn() function sleeps or
199 * returns NULL. Note that due to preallocation, this function 199 * returns NULL. Note that due to preallocation, this function
200 * *never* fails when called from process contexts. (it might 200 * *never* fails when called from process contexts. (it might
201 * fail if called from an IRQ context.) 201 * fail if called from an IRQ context.)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index be0efbde4994..f7e088f5a309 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -515,7 +515,7 @@ static int __cpuinit
515ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) 515ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
516{ 516{
517 writeback_set_ratelimit(); 517 writeback_set_ratelimit();
518 return 0; 518 return NOTIFY_DONE;
519} 519}
520 520
521static struct notifier_block __cpuinitdata ratelimit_nb = { 521static struct notifier_block __cpuinitdata ratelimit_nb = {
@@ -549,9 +549,7 @@ void __init page_writeback_init(void)
549} 549}
550 550
551/** 551/**
552 * generic_writepages - walk the list of dirty pages of the given 552 * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
553 * address space and writepage() all of them.
554 *
555 * @mapping: address space structure to write 553 * @mapping: address space structure to write
556 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 554 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
557 * 555 *
@@ -698,7 +696,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
698 696
699/** 697/**
700 * write_one_page - write out a single page and optionally wait on I/O 698 * write_one_page - write out a single page and optionally wait on I/O
701 *
702 * @page: the page to write 699 * @page: the page to write
703 * @wait: if true, wait on writeout 700 * @wait: if true, wait on writeout
704 * 701 *
@@ -737,6 +734,16 @@ int write_one_page(struct page *page, int wait)
737EXPORT_SYMBOL(write_one_page); 734EXPORT_SYMBOL(write_one_page);
738 735
739/* 736/*
737 * For address_spaces which do not use buffers nor write back.
738 */
739int __set_page_dirty_no_writeback(struct page *page)
740{
741 if (!PageDirty(page))
742 SetPageDirty(page);
743 return 0;
744}
745
746/*
740 * For address_spaces which do not use buffers. Just tag the page as dirty in 747 * For address_spaces which do not use buffers. Just tag the page as dirty in
741 * its radix tree. 748 * its radix tree.
742 * 749 *
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f12052dc23ff..d461b23a27a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -73,7 +73,9 @@ static void __free_pages_ok(struct page *page, unsigned int order);
73 * don't need any ZONE_NORMAL reservation 73 * don't need any ZONE_NORMAL reservation
74 */ 74 */
75int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 75int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
76#ifdef CONFIG_ZONE_DMA
76 256, 77 256,
78#endif
77#ifdef CONFIG_ZONE_DMA32 79#ifdef CONFIG_ZONE_DMA32
78 256, 80 256,
79#endif 81#endif
@@ -85,7 +87,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
85EXPORT_SYMBOL(totalram_pages); 87EXPORT_SYMBOL(totalram_pages);
86 88
87static char * const zone_names[MAX_NR_ZONES] = { 89static char * const zone_names[MAX_NR_ZONES] = {
90#ifdef CONFIG_ZONE_DMA
88 "DMA", 91 "DMA",
92#endif
89#ifdef CONFIG_ZONE_DMA32 93#ifdef CONFIG_ZONE_DMA32
90 "DMA32", 94 "DMA32",
91#endif 95#endif
@@ -395,7 +399,7 @@ static inline void __free_one_page(struct page *page,
395 VM_BUG_ON(page_idx & (order_size - 1)); 399 VM_BUG_ON(page_idx & (order_size - 1));
396 VM_BUG_ON(bad_range(zone, page)); 400 VM_BUG_ON(bad_range(zone, page));
397 401
398 zone->free_pages += order_size; 402 __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
399 while (order < MAX_ORDER-1) { 403 while (order < MAX_ORDER-1) {
400 unsigned long combined_idx; 404 unsigned long combined_idx;
401 struct free_area *area; 405 struct free_area *area;
@@ -631,7 +635,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
631 list_del(&page->lru); 635 list_del(&page->lru);
632 rmv_page_order(page); 636 rmv_page_order(page);
633 area->nr_free--; 637 area->nr_free--;
634 zone->free_pages -= 1UL << order; 638 __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
635 expand(zone, page, order, current_order, area); 639 expand(zone, page, order, current_order, area);
636 return page; 640 return page;
637 } 641 }
@@ -989,7 +993,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
989 int classzone_idx, int alloc_flags) 993 int classzone_idx, int alloc_flags)
990{ 994{
991 /* free_pages my go negative - that's OK */ 995 /* free_pages my go negative - that's OK */
992 long min = mark, free_pages = z->free_pages - (1 << order) + 1; 996 long min = mark;
997 long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
993 int o; 998 int o;
994 999
995 if (alloc_flags & ALLOC_HIGH) 1000 if (alloc_flags & ALLOC_HIGH)
@@ -1439,35 +1444,6 @@ fastcall void free_pages(unsigned long addr, unsigned int order)
1439 1444
1440EXPORT_SYMBOL(free_pages); 1445EXPORT_SYMBOL(free_pages);
1441 1446
1442/*
1443 * Total amount of free (allocatable) RAM:
1444 */
1445unsigned int nr_free_pages(void)
1446{
1447 unsigned int sum = 0;
1448 struct zone *zone;
1449
1450 for_each_zone(zone)
1451 sum += zone->free_pages;
1452
1453 return sum;
1454}
1455
1456EXPORT_SYMBOL(nr_free_pages);
1457
1458#ifdef CONFIG_NUMA
1459unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
1460{
1461 unsigned int sum = 0;
1462 enum zone_type i;
1463
1464 for (i = 0; i < MAX_NR_ZONES; i++)
1465 sum += pgdat->node_zones[i].free_pages;
1466
1467 return sum;
1468}
1469#endif
1470
1471static unsigned int nr_free_zone_pages(int offset) 1447static unsigned int nr_free_zone_pages(int offset)
1472{ 1448{
1473 /* Just pick one node, since fallback list is circular */ 1449 /* Just pick one node, since fallback list is circular */
@@ -1514,7 +1490,7 @@ void si_meminfo(struct sysinfo *val)
1514{ 1490{
1515 val->totalram = totalram_pages; 1491 val->totalram = totalram_pages;
1516 val->sharedram = 0; 1492 val->sharedram = 0;
1517 val->freeram = nr_free_pages(); 1493 val->freeram = global_page_state(NR_FREE_PAGES);
1518 val->bufferram = nr_blockdev_pages(); 1494 val->bufferram = nr_blockdev_pages();
1519 val->totalhigh = totalhigh_pages; 1495 val->totalhigh = totalhigh_pages;
1520 val->freehigh = nr_free_highpages(); 1496 val->freehigh = nr_free_highpages();
@@ -1529,10 +1505,11 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1529 pg_data_t *pgdat = NODE_DATA(nid); 1505 pg_data_t *pgdat = NODE_DATA(nid);
1530 1506
1531 val->totalram = pgdat->node_present_pages; 1507 val->totalram = pgdat->node_present_pages;
1532 val->freeram = nr_free_pages_pgdat(pgdat); 1508 val->freeram = node_page_state(nid, NR_FREE_PAGES);
1533#ifdef CONFIG_HIGHMEM 1509#ifdef CONFIG_HIGHMEM
1534 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; 1510 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
1535 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; 1511 val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
1512 NR_FREE_PAGES);
1536#else 1513#else
1537 val->totalhigh = 0; 1514 val->totalhigh = 0;
1538 val->freehigh = 0; 1515 val->freehigh = 0;
@@ -1551,9 +1528,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1551void show_free_areas(void) 1528void show_free_areas(void)
1552{ 1529{
1553 int cpu; 1530 int cpu;
1554 unsigned long active;
1555 unsigned long inactive;
1556 unsigned long free;
1557 struct zone *zone; 1531 struct zone *zone;
1558 1532
1559 for_each_zone(zone) { 1533 for_each_zone(zone) {
@@ -1577,16 +1551,14 @@ void show_free_areas(void)
1577 } 1551 }
1578 } 1552 }
1579 1553
1580 get_zone_counts(&active, &inactive, &free);
1581
1582 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" 1554 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
1583 " free:%u slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 1555 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
1584 active, 1556 global_page_state(NR_ACTIVE),
1585 inactive, 1557 global_page_state(NR_INACTIVE),
1586 global_page_state(NR_FILE_DIRTY), 1558 global_page_state(NR_FILE_DIRTY),
1587 global_page_state(NR_WRITEBACK), 1559 global_page_state(NR_WRITEBACK),
1588 global_page_state(NR_UNSTABLE_NFS), 1560 global_page_state(NR_UNSTABLE_NFS),
1589 nr_free_pages(), 1561 global_page_state(NR_FREE_PAGES),
1590 global_page_state(NR_SLAB_RECLAIMABLE) + 1562 global_page_state(NR_SLAB_RECLAIMABLE) +
1591 global_page_state(NR_SLAB_UNRECLAIMABLE), 1563 global_page_state(NR_SLAB_UNRECLAIMABLE),
1592 global_page_state(NR_FILE_MAPPED), 1564 global_page_state(NR_FILE_MAPPED),
@@ -1612,12 +1584,12 @@ void show_free_areas(void)
1612 " all_unreclaimable? %s" 1584 " all_unreclaimable? %s"
1613 "\n", 1585 "\n",
1614 zone->name, 1586 zone->name,
1615 K(zone->free_pages), 1587 K(zone_page_state(zone, NR_FREE_PAGES)),
1616 K(zone->pages_min), 1588 K(zone->pages_min),
1617 K(zone->pages_low), 1589 K(zone->pages_low),
1618 K(zone->pages_high), 1590 K(zone->pages_high),
1619 K(zone->nr_active), 1591 K(zone_page_state(zone, NR_ACTIVE)),
1620 K(zone->nr_inactive), 1592 K(zone_page_state(zone, NR_INACTIVE)),
1621 K(zone->present_pages), 1593 K(zone->present_pages),
1622 zone->pages_scanned, 1594 zone->pages_scanned,
1623 (zone->all_unreclaimable ? "yes" : "no") 1595 (zone->all_unreclaimable ? "yes" : "no")
@@ -2651,11 +2623,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2651 " %s zone: %lu pages exceeds realsize %lu\n", 2623 " %s zone: %lu pages exceeds realsize %lu\n",
2652 zone_names[j], memmap_pages, realsize); 2624 zone_names[j], memmap_pages, realsize);
2653 2625
2654 /* Account for reserved DMA pages */ 2626 /* Account for reserved pages */
2655 if (j == ZONE_DMA && realsize > dma_reserve) { 2627 if (j == 0 && realsize > dma_reserve) {
2656 realsize -= dma_reserve; 2628 realsize -= dma_reserve;
2657 printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", 2629 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
2658 dma_reserve); 2630 zone_names[0], dma_reserve);
2659 } 2631 }
2660 2632
2661 if (!is_highmem_idx(j)) 2633 if (!is_highmem_idx(j))
@@ -2675,7 +2647,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2675 spin_lock_init(&zone->lru_lock); 2647 spin_lock_init(&zone->lru_lock);
2676 zone_seqlock_init(zone); 2648 zone_seqlock_init(zone);
2677 zone->zone_pgdat = pgdat; 2649 zone->zone_pgdat = pgdat;
2678 zone->free_pages = 0;
2679 2650
2680 zone->prev_priority = DEF_PRIORITY; 2651 zone->prev_priority = DEF_PRIORITY;
2681 2652
@@ -2684,8 +2655,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2684 INIT_LIST_HEAD(&zone->inactive_list); 2655 INIT_LIST_HEAD(&zone->inactive_list);
2685 zone->nr_scan_active = 0; 2656 zone->nr_scan_active = 0;
2686 zone->nr_scan_inactive = 0; 2657 zone->nr_scan_inactive = 0;
2687 zone->nr_active = 0;
2688 zone->nr_inactive = 0;
2689 zap_zone_vm_stats(zone); 2658 zap_zone_vm_stats(zone);
2690 atomic_set(&zone->reclaim_in_progress, 0); 2659 atomic_set(&zone->reclaim_in_progress, 0);
2691 if (!size) 2660 if (!size)
@@ -2877,20 +2846,23 @@ static void __init sort_node_map(void)
2877 cmp_node_active_region, NULL); 2846 cmp_node_active_region, NULL);
2878} 2847}
2879 2848
2880/* Find the lowest pfn for a node. This depends on a sorted early_node_map */ 2849/* Find the lowest pfn for a node */
2881unsigned long __init find_min_pfn_for_node(unsigned long nid) 2850unsigned long __init find_min_pfn_for_node(unsigned long nid)
2882{ 2851{
2883 int i; 2852 int i;
2884 2853 unsigned long min_pfn = ULONG_MAX;
2885 /* Regions in the early_node_map can be in any order */
2886 sort_node_map();
2887 2854
2888 /* Assuming a sorted map, the first range found has the starting pfn */ 2855 /* Assuming a sorted map, the first range found has the starting pfn */
2889 for_each_active_range_index_in_nid(i, nid) 2856 for_each_active_range_index_in_nid(i, nid)
2890 return early_node_map[i].start_pfn; 2857 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
2891 2858
2892 printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); 2859 if (min_pfn == ULONG_MAX) {
2893 return 0; 2860 printk(KERN_WARNING
2861 "Could not find start_pfn for node %lu\n", nid);
2862 return 0;
2863 }
2864
2865 return min_pfn;
2894} 2866}
2895 2867
2896/** 2868/**
@@ -2939,6 +2911,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
2939 unsigned long nid; 2911 unsigned long nid;
2940 enum zone_type i; 2912 enum zone_type i;
2941 2913
2914 /* Sort early_node_map as initialisation assumes it is sorted */
2915 sort_node_map();
2916
2942 /* Record where the zone boundaries are */ 2917 /* Record where the zone boundaries are */
2943 memset(arch_zone_lowest_possible_pfn, 0, 2918 memset(arch_zone_lowest_possible_pfn, 0,
2944 sizeof(arch_zone_lowest_possible_pfn)); 2919 sizeof(arch_zone_lowest_possible_pfn));
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f539e8e827a..93d9ee692fd8 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -575,10 +575,6 @@ void handle_ra_miss(struct address_space *mapping,
575 */ 575 */
576unsigned long max_sane_readahead(unsigned long nr) 576unsigned long max_sane_readahead(unsigned long nr)
577{ 577{
578 unsigned long active; 578 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
579 unsigned long inactive; 579 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
580 unsigned long free;
581
582 __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
583 return min(nr, (inactive + free) / 2);
584} 580}
diff --git a/mm/shmem.c b/mm/shmem.c
index 70da7a0981bf..b38e17169271 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2316,7 +2316,7 @@ static void destroy_inodecache(void)
2316 2316
2317static const struct address_space_operations shmem_aops = { 2317static const struct address_space_operations shmem_aops = {
2318 .writepage = shmem_writepage, 2318 .writepage = shmem_writepage,
2319 .set_page_dirty = __set_page_dirty_nobuffers, 2319 .set_page_dirty = __set_page_dirty_no_writeback,
2320#ifdef CONFIG_TMPFS 2320#ifdef CONFIG_TMPFS
2321 .prepare_write = shmem_prepare_write, 2321 .prepare_write = shmem_prepare_write,
2322 .commit_write = simple_commit_write, 2322 .commit_write = simple_commit_write,
diff --git a/mm/slab.c b/mm/slab.c
index c6100628a6ef..70784b848b69 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -793,8 +793,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
793 * has cs_{dma,}cachep==NULL. Thus no special case 793 * has cs_{dma,}cachep==NULL. Thus no special case
794 * for large kmalloc calls required. 794 * for large kmalloc calls required.
795 */ 795 */
796#ifdef CONFIG_ZONE_DMA
796 if (unlikely(gfpflags & GFP_DMA)) 797 if (unlikely(gfpflags & GFP_DMA))
797 return csizep->cs_dmacachep; 798 return csizep->cs_dmacachep;
799#endif
798 return csizep->cs_cachep; 800 return csizep->cs_cachep;
799} 801}
800 802
@@ -1493,13 +1495,15 @@ void __init kmem_cache_init(void)
1493 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1495 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1494 NULL, NULL); 1496 NULL, NULL);
1495 } 1497 }
1496 1498#ifdef CONFIG_ZONE_DMA
1497 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1499 sizes->cs_dmacachep = kmem_cache_create(
1500 names->name_dma,
1498 sizes->cs_size, 1501 sizes->cs_size,
1499 ARCH_KMALLOC_MINALIGN, 1502 ARCH_KMALLOC_MINALIGN,
1500 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| 1503 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1501 SLAB_PANIC, 1504 SLAB_PANIC,
1502 NULL, NULL); 1505 NULL, NULL);
1506#endif
1503 sizes++; 1507 sizes++;
1504 names++; 1508 names++;
1505 } 1509 }
@@ -2321,7 +2325,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2321 cachep->slab_size = slab_size; 2325 cachep->slab_size = slab_size;
2322 cachep->flags = flags; 2326 cachep->flags = flags;
2323 cachep->gfpflags = 0; 2327 cachep->gfpflags = 0;
2324 if (flags & SLAB_CACHE_DMA) 2328 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2325 cachep->gfpflags |= GFP_DMA; 2329 cachep->gfpflags |= GFP_DMA;
2326 cachep->buffer_size = size; 2330 cachep->buffer_size = size;
2327 cachep->reciprocal_buffer_size = reciprocal_value(size); 2331 cachep->reciprocal_buffer_size = reciprocal_value(size);
@@ -2516,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2516 * kmem_cache_destroy - delete a cache 2520 * kmem_cache_destroy - delete a cache
2517 * @cachep: the cache to destroy 2521 * @cachep: the cache to destroy
2518 * 2522 *
2519 * Remove a struct kmem_cache object from the slab cache. 2523 * Remove a &struct kmem_cache object from the slab cache.
2520 * 2524 *
2521 * It is expected this function will be called by a module when it is 2525 * It is expected this function will be called by a module when it is
2522 * unloaded. This will remove the cache completely, and avoid a duplicate 2526 * unloaded. This will remove the cache completely, and avoid a duplicate
@@ -2643,10 +2647,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
2643 2647
2644static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2648static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2645{ 2649{
2646 if (flags & GFP_DMA) 2650 if (CONFIG_ZONE_DMA_FLAG) {
2647 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2651 if (flags & GFP_DMA)
2648 else 2652 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2649 BUG_ON(cachep->gfpflags & GFP_DMA); 2653 else
2654 BUG_ON(cachep->gfpflags & GFP_DMA);
2655 }
2650} 2656}
2651 2657
2652static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, 2658static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
@@ -2814,19 +2820,11 @@ failed:
2814 */ 2820 */
2815static void kfree_debugcheck(const void *objp) 2821static void kfree_debugcheck(const void *objp)
2816{ 2822{
2817 struct page *page;
2818
2819 if (!virt_addr_valid(objp)) { 2823 if (!virt_addr_valid(objp)) {
2820 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", 2824 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2821 (unsigned long)objp); 2825 (unsigned long)objp);
2822 BUG(); 2826 BUG();
2823 } 2827 }
2824 page = virt_to_page(objp);
2825 if (!PageSlab(page)) {
2826 printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n",
2827 (unsigned long)objp);
2828 BUG();
2829 }
2830} 2828}
2831 2829
2832static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) 2830static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
@@ -3197,35 +3195,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3197 return objp; 3195 return objp;
3198} 3196}
3199 3197
3200static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
3201 gfp_t flags, void *caller)
3202{
3203 unsigned long save_flags;
3204 void *objp = NULL;
3205
3206 cache_alloc_debugcheck_before(cachep, flags);
3207
3208 local_irq_save(save_flags);
3209
3210 if (unlikely(NUMA_BUILD &&
3211 current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
3212 objp = alternate_node_alloc(cachep, flags);
3213
3214 if (!objp)
3215 objp = ____cache_alloc(cachep, flags);
3216 /*
3217 * We may just have run out of memory on the local node.
3218 * ____cache_alloc_node() knows how to locate memory on other nodes
3219 */
3220 if (NUMA_BUILD && !objp)
3221 objp = ____cache_alloc_node(cachep, flags, numa_node_id());
3222 local_irq_restore(save_flags);
3223 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
3224 caller);
3225 prefetchw(objp);
3226 return objp;
3227}
3228
3229#ifdef CONFIG_NUMA 3198#ifdef CONFIG_NUMA
3230/* 3199/*
3231 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. 3200 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
@@ -3257,14 +3226,20 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3257 * allocator to do its reclaim / fallback magic. We then insert the 3226 * allocator to do its reclaim / fallback magic. We then insert the
3258 * slab into the proper nodelist and then allocate from it. 3227 * slab into the proper nodelist and then allocate from it.
3259 */ 3228 */
3260void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 3229static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3261{ 3230{
3262 struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) 3231 struct zonelist *zonelist;
3263 ->node_zonelists[gfp_zone(flags)]; 3232 gfp_t local_flags;
3264 struct zone **z; 3233 struct zone **z;
3265 void *obj = NULL; 3234 void *obj = NULL;
3266 int nid; 3235 int nid;
3267 gfp_t local_flags = (flags & GFP_LEVEL_MASK); 3236
3237 if (flags & __GFP_THISNODE)
3238 return NULL;
3239
3240 zonelist = &NODE_DATA(slab_node(current->mempolicy))
3241 ->node_zonelists[gfp_zone(flags)];
3242 local_flags = (flags & GFP_LEVEL_MASK);
3268 3243
3269retry: 3244retry:
3270 /* 3245 /*
@@ -3374,16 +3349,110 @@ must_grow:
3374 if (x) 3349 if (x)
3375 goto retry; 3350 goto retry;
3376 3351
3377 if (!(flags & __GFP_THISNODE)) 3352 return fallback_alloc(cachep, flags);
3378 /* Unable to grow the cache. Fall back to other nodes. */
3379 return fallback_alloc(cachep, flags);
3380
3381 return NULL;
3382 3353
3383done: 3354done:
3384 return obj; 3355 return obj;
3385} 3356}
3386#endif 3357
3358/**
3359 * kmem_cache_alloc_node - Allocate an object on the specified node
3360 * @cachep: The cache to allocate from.
3361 * @flags: See kmalloc().
3362 * @nodeid: node number of the target node.
3363 * @caller: return address of caller, used for debug information
3364 *
3365 * Identical to kmem_cache_alloc but it will allocate memory on the given
3366 * node, which can improve the performance for cpu bound structures.
3367 *
3368 * Fallback to other node is possible if __GFP_THISNODE is not set.
3369 */
3370static __always_inline void *
3371__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3372 void *caller)
3373{
3374 unsigned long save_flags;
3375 void *ptr;
3376
3377 cache_alloc_debugcheck_before(cachep, flags);
3378 local_irq_save(save_flags);
3379
3380 if (unlikely(nodeid == -1))
3381 nodeid = numa_node_id();
3382
3383 if (unlikely(!cachep->nodelists[nodeid])) {
3384 /* Node not bootstrapped yet */
3385 ptr = fallback_alloc(cachep, flags);
3386 goto out;
3387 }
3388
3389 if (nodeid == numa_node_id()) {
3390 /*
3391 * Use the locally cached objects if possible.
3392 * However ____cache_alloc does not allow fallback
3393 * to other nodes. It may fail while we still have
3394 * objects on other nodes available.
3395 */
3396 ptr = ____cache_alloc(cachep, flags);
3397 if (ptr)
3398 goto out;
3399 }
3400 /* ___cache_alloc_node can fall back to other nodes */
3401 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3402 out:
3403 local_irq_restore(save_flags);
3404 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3405
3406 return ptr;
3407}
3408
3409static __always_inline void *
3410__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3411{
3412 void *objp;
3413
3414 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3415 objp = alternate_node_alloc(cache, flags);
3416 if (objp)
3417 goto out;
3418 }
3419 objp = ____cache_alloc(cache, flags);
3420
3421 /*
3422 * We may just have run out of memory on the local node.
3423 * ____cache_alloc_node() knows how to locate memory on other nodes
3424 */
3425 if (!objp)
3426 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3427
3428 out:
3429 return objp;
3430}
3431#else
3432
3433static __always_inline void *
3434__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3435{
3436 return ____cache_alloc(cachep, flags);
3437}
3438
3439#endif /* CONFIG_NUMA */
3440
3441static __always_inline void *
3442__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3443{
3444 unsigned long save_flags;
3445 void *objp;
3446
3447 cache_alloc_debugcheck_before(cachep, flags);
3448 local_irq_save(save_flags);
3449 objp = __do_cache_alloc(cachep, flags);
3450 local_irq_restore(save_flags);
3451 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3452 prefetchw(objp);
3453
3454 return objp;
3455}
3387 3456
3388/* 3457/*
3389 * Caller needs to acquire correct kmem_list's list_lock 3458 * Caller needs to acquire correct kmem_list's list_lock
@@ -3582,57 +3651,6 @@ out:
3582} 3651}
3583 3652
3584#ifdef CONFIG_NUMA 3653#ifdef CONFIG_NUMA
3585/**
3586 * kmem_cache_alloc_node - Allocate an object on the specified node
3587 * @cachep: The cache to allocate from.
3588 * @flags: See kmalloc().
3589 * @nodeid: node number of the target node.
3590 * @caller: return address of caller, used for debug information
3591 *
3592 * Identical to kmem_cache_alloc but it will allocate memory on the given
3593 * node, which can improve the performance for cpu bound structures.
3594 *
3595 * Fallback to other node is possible if __GFP_THISNODE is not set.
3596 */
3597static __always_inline void *
3598__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3599 int nodeid, void *caller)
3600{
3601 unsigned long save_flags;
3602 void *ptr = NULL;
3603
3604 cache_alloc_debugcheck_before(cachep, flags);
3605 local_irq_save(save_flags);
3606
3607 if (unlikely(nodeid == -1))
3608 nodeid = numa_node_id();
3609
3610 if (likely(cachep->nodelists[nodeid])) {
3611 if (nodeid == numa_node_id()) {
3612 /*
3613 * Use the locally cached objects if possible.
3614 * However ____cache_alloc does not allow fallback
3615 * to other nodes. It may fail while we still have
3616 * objects on other nodes available.
3617 */
3618 ptr = ____cache_alloc(cachep, flags);
3619 }
3620 if (!ptr) {
3621 /* ___cache_alloc_node can fall back to other nodes */
3622 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3623 }
3624 } else {
3625 /* Node not bootstrapped yet */
3626 if (!(flags & __GFP_THISNODE))
3627 ptr = fallback_alloc(cachep, flags);
3628 }
3629
3630 local_irq_restore(save_flags);
3631 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3632
3633 return ptr;
3634}
3635
3636void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3654void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3637{ 3655{
3638 return __cache_alloc_node(cachep, flags, nodeid, 3656 return __cache_alloc_node(cachep, flags, nodeid,
@@ -3733,6 +3751,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3733 BUG_ON(virt_to_cache(objp) != cachep); 3751 BUG_ON(virt_to_cache(objp) != cachep);
3734 3752
3735 local_irq_save(flags); 3753 local_irq_save(flags);
3754 debug_check_no_locks_freed(objp, obj_size(cachep));
3736 __cache_free(cachep, objp); 3755 __cache_free(cachep, objp);
3737 local_irq_restore(flags); 3756 local_irq_restore(flags);
3738} 3757}
@@ -4017,18 +4036,17 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4017 * If we cannot acquire the cache chain mutex then just give up - we'll try 4036 * If we cannot acquire the cache chain mutex then just give up - we'll try
4018 * again on the next iteration. 4037 * again on the next iteration.
4019 */ 4038 */
4020static void cache_reap(struct work_struct *unused) 4039static void cache_reap(struct work_struct *w)
4021{ 4040{
4022 struct kmem_cache *searchp; 4041 struct kmem_cache *searchp;
4023 struct kmem_list3 *l3; 4042 struct kmem_list3 *l3;
4024 int node = numa_node_id(); 4043 int node = numa_node_id();
4044 struct delayed_work *work =
4045 container_of(w, struct delayed_work, work);
4025 4046
4026 if (!mutex_trylock(&cache_chain_mutex)) { 4047 if (!mutex_trylock(&cache_chain_mutex))
4027 /* Give up. Setup the next iteration. */ 4048 /* Give up. Setup the next iteration. */
4028 schedule_delayed_work(&__get_cpu_var(reap_work), 4049 goto out;
4029 round_jiffies_relative(REAPTIMEOUT_CPUC));
4030 return;
4031 }
4032 4050
4033 list_for_each_entry(searchp, &cache_chain, next) { 4051 list_for_each_entry(searchp, &cache_chain, next) {
4034 check_irq_on(); 4052 check_irq_on();
@@ -4071,9 +4089,9 @@ next:
4071 mutex_unlock(&cache_chain_mutex); 4089 mutex_unlock(&cache_chain_mutex);
4072 next_reap_node(); 4090 next_reap_node();
4073 refresh_cpu_vm_stats(smp_processor_id()); 4091 refresh_cpu_vm_stats(smp_processor_id());
4092out:
4074 /* Set up the next iteration */ 4093 /* Set up the next iteration */
4075 schedule_delayed_work(&__get_cpu_var(reap_work), 4094 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4076 round_jiffies_relative(REAPTIMEOUT_CPUC));
4077} 4095}
4078 4096
4079#ifdef CONFIG_PROC_FS 4097#ifdef CONFIG_PROC_FS
diff --git a/mm/truncate.c b/mm/truncate.c
index 5df947de7654..ebf3fcb4115b 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(cancel_dirty_page);
85 * 85 *
86 * We need to bale out if page->mapping is no longer equal to the original 86 * We need to bale out if page->mapping is no longer equal to the original
87 * mapping. This happens a) when the VM reclaimed the page while we waited on 87 * mapping. This happens a) when the VM reclaimed the page while we waited on
88 * its lock, b) when a concurrent invalidate_inode_pages got there first and 88 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
89 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 89 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
90 */ 90 */
91static void 91static void
@@ -106,7 +106,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
106} 106}
107 107
108/* 108/*
109 * This is for invalidate_inode_pages(). That function can be called at 109 * This is for invalidate_mapping_pages(). That function can be called at
110 * any time, and is not supposed to throw away dirty pages. But pages can 110 * any time, and is not supposed to throw away dirty pages. But pages can
111 * be marked dirty at any time too, so use remove_mapping which safely 111 * be marked dirty at any time too, so use remove_mapping which safely
112 * discards clean, unused pages. 112 * discards clean, unused pages.
@@ -310,12 +310,7 @@ unlock:
310 } 310 }
311 return ret; 311 return ret;
312} 312}
313 313EXPORT_SYMBOL(invalidate_mapping_pages);
314unsigned long invalidate_inode_pages(struct address_space *mapping)
315{
316 return invalidate_mapping_pages(mapping, 0, ~0UL);
317}
318EXPORT_SYMBOL(invalidate_inode_pages);
319 314
320/* 315/*
321 * This is like invalidate_complete_page(), except it ignores the page's 316 * This is like invalidate_complete_page(), except it ignores the page's
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 86897ee792d6..9eef486da909 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -699,7 +699,7 @@ finished:
699 * that it is big enough to cover the vma. Will return failure if 699 * that it is big enough to cover the vma. Will return failure if
700 * that criteria isn't met. 700 * that criteria isn't met.
701 * 701 *
702 * Similar to remap_pfn_range (see mm/memory.c) 702 * Similar to remap_pfn_range() (see mm/memory.c)
703 */ 703 */
704int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 704int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
705 unsigned long pgoff) 705 unsigned long pgoff)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7430df68cb64..0655d5fe73e8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -679,7 +679,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
679 nr_taken = isolate_lru_pages(sc->swap_cluster_max, 679 nr_taken = isolate_lru_pages(sc->swap_cluster_max,
680 &zone->inactive_list, 680 &zone->inactive_list,
681 &page_list, &nr_scan); 681 &page_list, &nr_scan);
682 zone->nr_inactive -= nr_taken; 682 __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
683 zone->pages_scanned += nr_scan; 683 zone->pages_scanned += nr_scan;
684 spin_unlock_irq(&zone->lru_lock); 684 spin_unlock_irq(&zone->lru_lock);
685 685
@@ -740,7 +740,8 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
740 740
741static inline int zone_is_near_oom(struct zone *zone) 741static inline int zone_is_near_oom(struct zone *zone)
742{ 742{
743 return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; 743 return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
744 + zone_page_state(zone, NR_INACTIVE))*3;
744} 745}
745 746
746/* 747/*
@@ -825,7 +826,7 @@ force_reclaim_mapped:
825 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, 826 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
826 &l_hold, &pgscanned); 827 &l_hold, &pgscanned);
827 zone->pages_scanned += pgscanned; 828 zone->pages_scanned += pgscanned;
828 zone->nr_active -= pgmoved; 829 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
829 spin_unlock_irq(&zone->lru_lock); 830 spin_unlock_irq(&zone->lru_lock);
830 831
831 while (!list_empty(&l_hold)) { 832 while (!list_empty(&l_hold)) {
@@ -857,7 +858,7 @@ force_reclaim_mapped:
857 list_move(&page->lru, &zone->inactive_list); 858 list_move(&page->lru, &zone->inactive_list);
858 pgmoved++; 859 pgmoved++;
859 if (!pagevec_add(&pvec, page)) { 860 if (!pagevec_add(&pvec, page)) {
860 zone->nr_inactive += pgmoved; 861 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
861 spin_unlock_irq(&zone->lru_lock); 862 spin_unlock_irq(&zone->lru_lock);
862 pgdeactivate += pgmoved; 863 pgdeactivate += pgmoved;
863 pgmoved = 0; 864 pgmoved = 0;
@@ -867,7 +868,7 @@ force_reclaim_mapped:
867 spin_lock_irq(&zone->lru_lock); 868 spin_lock_irq(&zone->lru_lock);
868 } 869 }
869 } 870 }
870 zone->nr_inactive += pgmoved; 871 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
871 pgdeactivate += pgmoved; 872 pgdeactivate += pgmoved;
872 if (buffer_heads_over_limit) { 873 if (buffer_heads_over_limit) {
873 spin_unlock_irq(&zone->lru_lock); 874 spin_unlock_irq(&zone->lru_lock);
@@ -885,14 +886,14 @@ force_reclaim_mapped:
885 list_move(&page->lru, &zone->active_list); 886 list_move(&page->lru, &zone->active_list);
886 pgmoved++; 887 pgmoved++;
887 if (!pagevec_add(&pvec, page)) { 888 if (!pagevec_add(&pvec, page)) {
888 zone->nr_active += pgmoved; 889 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
889 pgmoved = 0; 890 pgmoved = 0;
890 spin_unlock_irq(&zone->lru_lock); 891 spin_unlock_irq(&zone->lru_lock);
891 __pagevec_release(&pvec); 892 __pagevec_release(&pvec);
892 spin_lock_irq(&zone->lru_lock); 893 spin_lock_irq(&zone->lru_lock);
893 } 894 }
894 } 895 }
895 zone->nr_active += pgmoved; 896 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
896 897
897 __count_zone_vm_events(PGREFILL, zone, pgscanned); 898 __count_zone_vm_events(PGREFILL, zone, pgscanned);
898 __count_vm_events(PGDEACTIVATE, pgdeactivate); 899 __count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -918,14 +919,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
918 * Add one to `nr_to_scan' just to make sure that the kernel will 919 * Add one to `nr_to_scan' just to make sure that the kernel will
919 * slowly sift through the active list. 920 * slowly sift through the active list.
920 */ 921 */
921 zone->nr_scan_active += (zone->nr_active >> priority) + 1; 922 zone->nr_scan_active +=
923 (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
922 nr_active = zone->nr_scan_active; 924 nr_active = zone->nr_scan_active;
923 if (nr_active >= sc->swap_cluster_max) 925 if (nr_active >= sc->swap_cluster_max)
924 zone->nr_scan_active = 0; 926 zone->nr_scan_active = 0;
925 else 927 else
926 nr_active = 0; 928 nr_active = 0;
927 929
928 zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; 930 zone->nr_scan_inactive +=
931 (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
929 nr_inactive = zone->nr_scan_inactive; 932 nr_inactive = zone->nr_scan_inactive;
930 if (nr_inactive >= sc->swap_cluster_max) 933 if (nr_inactive >= sc->swap_cluster_max)
931 zone->nr_scan_inactive = 0; 934 zone->nr_scan_inactive = 0;
@@ -1037,7 +1040,8 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1037 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1040 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1038 continue; 1041 continue;
1039 1042
1040 lru_pages += zone->nr_active + zone->nr_inactive; 1043 lru_pages += zone_page_state(zone, NR_ACTIVE)
1044 + zone_page_state(zone, NR_INACTIVE);
1041 } 1045 }
1042 1046
1043 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1047 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1182,7 +1186,8 @@ loop_again:
1182 for (i = 0; i <= end_zone; i++) { 1186 for (i = 0; i <= end_zone; i++) {
1183 struct zone *zone = pgdat->node_zones + i; 1187 struct zone *zone = pgdat->node_zones + i;
1184 1188
1185 lru_pages += zone->nr_active + zone->nr_inactive; 1189 lru_pages += zone_page_state(zone, NR_ACTIVE)
1190 + zone_page_state(zone, NR_INACTIVE);
1186 } 1191 }
1187 1192
1188 /* 1193 /*
@@ -1219,8 +1224,9 @@ loop_again:
1219 if (zone->all_unreclaimable) 1224 if (zone->all_unreclaimable)
1220 continue; 1225 continue;
1221 if (nr_slab == 0 && zone->pages_scanned >= 1226 if (nr_slab == 0 && zone->pages_scanned >=
1222 (zone->nr_active + zone->nr_inactive) * 6) 1227 (zone_page_state(zone, NR_ACTIVE)
1223 zone->all_unreclaimable = 1; 1228 + zone_page_state(zone, NR_INACTIVE)) * 6)
1229 zone->all_unreclaimable = 1;
1224 /* 1230 /*
1225 * If we've done a decent amount of scanning and 1231 * If we've done a decent amount of scanning and
1226 * the reclaim ratio is low, start doing writepage 1232 * the reclaim ratio is low, start doing writepage
@@ -1385,18 +1391,22 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1385 1391
1386 /* For pass = 0 we don't shrink the active list */ 1392 /* For pass = 0 we don't shrink the active list */
1387 if (pass > 0) { 1393 if (pass > 0) {
1388 zone->nr_scan_active += (zone->nr_active >> prio) + 1; 1394 zone->nr_scan_active +=
1395 (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
1389 if (zone->nr_scan_active >= nr_pages || pass > 3) { 1396 if (zone->nr_scan_active >= nr_pages || pass > 3) {
1390 zone->nr_scan_active = 0; 1397 zone->nr_scan_active = 0;
1391 nr_to_scan = min(nr_pages, zone->nr_active); 1398 nr_to_scan = min(nr_pages,
1399 zone_page_state(zone, NR_ACTIVE));
1392 shrink_active_list(nr_to_scan, zone, sc, prio); 1400 shrink_active_list(nr_to_scan, zone, sc, prio);
1393 } 1401 }
1394 } 1402 }
1395 1403
1396 zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; 1404 zone->nr_scan_inactive +=
1405 (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
1397 if (zone->nr_scan_inactive >= nr_pages || pass > 3) { 1406 if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
1398 zone->nr_scan_inactive = 0; 1407 zone->nr_scan_inactive = 0;
1399 nr_to_scan = min(nr_pages, zone->nr_inactive); 1408 nr_to_scan = min(nr_pages,
1409 zone_page_state(zone, NR_INACTIVE));
1400 ret += shrink_inactive_list(nr_to_scan, zone, sc); 1410 ret += shrink_inactive_list(nr_to_scan, zone, sc);
1401 if (ret >= nr_pages) 1411 if (ret >= nr_pages)
1402 return ret; 1412 return ret;
@@ -1408,12 +1418,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1408 1418
1409static unsigned long count_lru_pages(void) 1419static unsigned long count_lru_pages(void)
1410{ 1420{
1411 struct zone *zone; 1421 return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
1412 unsigned long ret = 0;
1413
1414 for_each_zone(zone)
1415 ret += zone->nr_active + zone->nr_inactive;
1416 return ret;
1417} 1422}
1418 1423
1419/* 1424/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dc005a0c96ae..6c488d6ac425 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,39 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15 15
16void __get_zone_counts(unsigned long *active, unsigned long *inactive,
17 unsigned long *free, struct pglist_data *pgdat)
18{
19 struct zone *zones = pgdat->node_zones;
20 int i;
21
22 *active = 0;
23 *inactive = 0;
24 *free = 0;
25 for (i = 0; i < MAX_NR_ZONES; i++) {
26 *active += zones[i].nr_active;
27 *inactive += zones[i].nr_inactive;
28 *free += zones[i].free_pages;
29 }
30}
31
32void get_zone_counts(unsigned long *active,
33 unsigned long *inactive, unsigned long *free)
34{
35 struct pglist_data *pgdat;
36
37 *active = 0;
38 *inactive = 0;
39 *free = 0;
40 for_each_online_pgdat(pgdat) {
41 unsigned long l, m, n;
42 __get_zone_counts(&l, &m, &n, pgdat);
43 *active += l;
44 *inactive += m;
45 *free += n;
46 }
47}
48
49#ifdef CONFIG_VM_EVENT_COUNTERS 16#ifdef CONFIG_VM_EVENT_COUNTERS
50DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 17DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
51EXPORT_PER_CPU_SYMBOL(vm_event_states); 18EXPORT_PER_CPU_SYMBOL(vm_event_states);
@@ -239,7 +206,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
239 * in between and therefore the atomicity vs. interrupt cannot be exploited 206 * in between and therefore the atomicity vs. interrupt cannot be exploited
240 * in a useful way here. 207 * in a useful way here.
241 */ 208 */
242static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 209void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
243{ 210{
244 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 211 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
245 s8 *p = pcp->vm_stat_diff + item; 212 s8 *p = pcp->vm_stat_diff + item;
@@ -260,9 +227,8 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
260} 227}
261EXPORT_SYMBOL(__inc_zone_page_state); 228EXPORT_SYMBOL(__inc_zone_page_state);
262 229
263void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 230void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
264{ 231{
265 struct zone *zone = page_zone(page);
266 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 232 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
267 s8 *p = pcp->vm_stat_diff + item; 233 s8 *p = pcp->vm_stat_diff + item;
268 234
@@ -275,6 +241,11 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
275 *p = overstep; 241 *p = overstep;
276 } 242 }
277} 243}
244
245void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
246{
247 __dec_zone_state(page_zone(page), item);
248}
278EXPORT_SYMBOL(__dec_zone_page_state); 249EXPORT_SYMBOL(__dec_zone_page_state);
279 250
280void inc_zone_state(struct zone *zone, enum zone_stat_item item) 251void inc_zone_state(struct zone *zone, enum zone_stat_item item)
@@ -437,6 +408,12 @@ const struct seq_operations fragmentation_op = {
437 .show = frag_show, 408 .show = frag_show,
438}; 409};
439 410
411#ifdef CONFIG_ZONE_DMA
412#define TEXT_FOR_DMA(xx) xx "_dma",
413#else
414#define TEXT_FOR_DMA(xx)
415#endif
416
440#ifdef CONFIG_ZONE_DMA32 417#ifdef CONFIG_ZONE_DMA32
441#define TEXT_FOR_DMA32(xx) xx "_dma32", 418#define TEXT_FOR_DMA32(xx) xx "_dma32",
442#else 419#else
@@ -449,19 +426,22 @@ const struct seq_operations fragmentation_op = {
449#define TEXT_FOR_HIGHMEM(xx) 426#define TEXT_FOR_HIGHMEM(xx)
450#endif 427#endif
451 428
452#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ 429#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
453 TEXT_FOR_HIGHMEM(xx) 430 TEXT_FOR_HIGHMEM(xx)
454 431
455static const char * const vmstat_text[] = { 432static const char * const vmstat_text[] = {
456 /* Zoned VM counters */ 433 /* Zoned VM counters */
434 "nr_free_pages",
435 "nr_active",
436 "nr_inactive",
457 "nr_anon_pages", 437 "nr_anon_pages",
458 "nr_mapped", 438 "nr_mapped",
459 "nr_file_pages", 439 "nr_file_pages",
440 "nr_dirty",
441 "nr_writeback",
460 "nr_slab_reclaimable", 442 "nr_slab_reclaimable",
461 "nr_slab_unreclaimable", 443 "nr_slab_unreclaimable",
462 "nr_page_table_pages", 444 "nr_page_table_pages",
463 "nr_dirty",
464 "nr_writeback",
465 "nr_unstable", 445 "nr_unstable",
466 "nr_bounce", 446 "nr_bounce",
467 "nr_vmscan_write", 447 "nr_vmscan_write",
@@ -529,17 +509,13 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
529 "\n min %lu" 509 "\n min %lu"
530 "\n low %lu" 510 "\n low %lu"
531 "\n high %lu" 511 "\n high %lu"
532 "\n active %lu"
533 "\n inactive %lu"
534 "\n scanned %lu (a: %lu i: %lu)" 512 "\n scanned %lu (a: %lu i: %lu)"
535 "\n spanned %lu" 513 "\n spanned %lu"
536 "\n present %lu", 514 "\n present %lu",
537 zone->free_pages, 515 zone_page_state(zone, NR_FREE_PAGES),
538 zone->pages_min, 516 zone->pages_min,
539 zone->pages_low, 517 zone->pages_low,
540 zone->pages_high, 518 zone->pages_high,
541 zone->nr_active,
542 zone->nr_inactive,
543 zone->pages_scanned, 519 zone->pages_scanned,
544 zone->nr_scan_active, zone->nr_scan_inactive, 520 zone->nr_scan_active, zone->nr_scan_inactive,
545 zone->spanned_pages, 521 zone->spanned_pages,
@@ -563,12 +539,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
563 539
564 pageset = zone_pcp(zone, i); 540 pageset = zone_pcp(zone, i);
565 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { 541 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
566 if (pageset->pcp[j].count)
567 break;
568 }
569 if (j == ARRAY_SIZE(pageset->pcp))
570 continue;
571 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
572 seq_printf(m, 542 seq_printf(m,
573 "\n cpu: %i pcp: %i" 543 "\n cpu: %i pcp: %i"
574 "\n count: %i" 544 "\n count: %i"