aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2007-02-13 01:43:25 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2007-02-13 01:43:25 -0500
commitd9bc125caf592b7d081021f32ce5b717efdf70c8 (patch)
tree263b7066ba22ddce21db610c0300f6eaac6f2064 /mm
parent43d78ef2ba5bec26d0315859e8324bfc0be23766 (diff)
parentec2f9d1331f658433411c58077871e1eef4ee1b4 (diff)
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Conflicts: net/sunrpc/auth_gss/gss_krb5_crypto.c net/sunrpc/auth_gss/gss_spkm3_token.c net/sunrpc/clnt.c Merge with mainline and fix conflicts.
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/filemap.c24
-rw-r--r--mm/highmem.c3
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/memory.c60
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mempool.c6
-rw-r--r--mm/mincore.c102
-rw-r--r--mm/mmap.c72
-rw-r--r--mm/page-writeback.c17
-rw-r--r--mm/page_alloc.c102
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/shmem.c22
-rw-r--r--mm/slab.c246
-rw-r--r--mm/truncate.c11
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c51
-rw-r--r--mm/vmstat.c70
18 files changed, 464 insertions, 342 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index db7c55de92cd..7942b333e46c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -157,3 +157,9 @@ config RESOURCES_64BIT
157 default 64BIT 157 default 64BIT
158 help 158 help
159 This option allows memory and IO resources to be 64 bit. 159 This option allows memory and IO resources to be 64 bit.
160
161config ZONE_DMA_FLAG
162 int
163 default "0" if !ZONE_DMA
164 default "1"
165
diff --git a/mm/filemap.c b/mm/filemap.c
index 8332c77b1bd1..00414849a867 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -327,7 +327,7 @@ EXPORT_SYMBOL(sync_page_range);
327 * @pos: beginning offset in pages to write 327 * @pos: beginning offset in pages to write
328 * @count: number of bytes to write 328 * @count: number of bytes to write
329 * 329 *
330 * Note: Holding i_mutex across sync_page_range_nolock is not a good idea 330 * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
331 * as it forces O_SYNC writers to different parts of the same file 331 * as it forces O_SYNC writers to different parts of the same file
332 * to be serialised right until io completion. 332 * to be serialised right until io completion.
333 */ 333 */
@@ -606,26 +606,6 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset)
606EXPORT_SYMBOL(find_get_page); 606EXPORT_SYMBOL(find_get_page);
607 607
608/** 608/**
609 * find_trylock_page - find and lock a page
610 * @mapping: the address_space to search
611 * @offset: the page index
612 *
613 * Same as find_get_page(), but trylock it instead of incrementing the count.
614 */
615struct page *find_trylock_page(struct address_space *mapping, unsigned long offset)
616{
617 struct page *page;
618
619 read_lock_irq(&mapping->tree_lock);
620 page = radix_tree_lookup(&mapping->page_tree, offset);
621 if (page && TestSetPageLocked(page))
622 page = NULL;
623 read_unlock_irq(&mapping->tree_lock);
624 return page;
625}
626EXPORT_SYMBOL(find_trylock_page);
627
628/**
629 * find_lock_page - locate, pin and lock a pagecache page 609 * find_lock_page - locate, pin and lock a pagecache page
630 * @mapping: the address_space to search 610 * @mapping: the address_space to search
631 * @offset: the page index 611 * @offset: the page index
@@ -804,7 +784,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
804 * @mapping: target address_space 784 * @mapping: target address_space
805 * @index: the page index 785 * @index: the page index
806 * 786 *
807 * Same as grab_cache_page, but do not wait if the page is unavailable. 787 * Same as grab_cache_page(), but do not wait if the page is unavailable.
808 * This is intended for speculative data generators, where the data can 788 * This is intended for speculative data generators, where the data can
809 * be regenerated if the page couldn't be grabbed. This routine should 789 * be regenerated if the page couldn't be grabbed. This routine should
810 * be safe to call while holding the lock for another page. 790 * be safe to call while holding the lock for another page.
diff --git a/mm/highmem.c b/mm/highmem.c
index 0206e7e5018c..51e1c1995fec 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -47,7 +47,8 @@ unsigned int nr_free_highpages (void)
47 unsigned int pages = 0; 47 unsigned int pages = 0;
48 48
49 for_each_online_pgdat(pgdat) 49 for_each_online_pgdat(pgdat)
50 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; 50 pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
51 NR_FREE_PAGES);
51 52
52 return pages; 53 return pages;
53} 54}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cb362f761f17..36db012b38dd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -389,6 +389,8 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
389 continue; 389 continue;
390 390
391 page = pte_page(pte); 391 page = pte_page(pte);
392 if (pte_dirty(pte))
393 set_page_dirty(page);
392 list_add(&page->lru, &page_list); 394 list_add(&page->lru, &page_list);
393 } 395 }
394 spin_unlock(&mm->page_table_lock); 396 spin_unlock(&mm->page_table_lock);
diff --git a/mm/memory.c b/mm/memory.c
index ef09f0acb1d8..e7066e71dfa3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -678,7 +678,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
678 if (pte_dirty(ptent)) 678 if (pte_dirty(ptent))
679 set_page_dirty(page); 679 set_page_dirty(page);
680 if (pte_young(ptent)) 680 if (pte_young(ptent))
681 mark_page_accessed(page); 681 SetPageReferenced(page);
682 file_rss--; 682 file_rss--;
683 } 683 }
684 page_remove_rmap(page, vma); 684 page_remove_rmap(page, vma);
@@ -1277,6 +1277,51 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *
1277} 1277}
1278EXPORT_SYMBOL(vm_insert_page); 1278EXPORT_SYMBOL(vm_insert_page);
1279 1279
1280/**
1281 * vm_insert_pfn - insert single pfn into user vma
1282 * @vma: user vma to map to
1283 * @addr: target user address of this page
1284 * @pfn: source kernel pfn
1285 *
1286 * Similar to vm_inert_page, this allows drivers to insert individual pages
1287 * they've allocated into a user vma. Same comments apply.
1288 *
1289 * This function should only be called from a vm_ops->fault handler, and
1290 * in that case the handler should return NULL.
1291 */
1292int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1293 unsigned long pfn)
1294{
1295 struct mm_struct *mm = vma->vm_mm;
1296 int retval;
1297 pte_t *pte, entry;
1298 spinlock_t *ptl;
1299
1300 BUG_ON(!(vma->vm_flags & VM_PFNMAP));
1301 BUG_ON(is_cow_mapping(vma->vm_flags));
1302
1303 retval = -ENOMEM;
1304 pte = get_locked_pte(mm, addr, &ptl);
1305 if (!pte)
1306 goto out;
1307 retval = -EBUSY;
1308 if (!pte_none(*pte))
1309 goto out_unlock;
1310
1311 /* Ok, finally just insert the thing.. */
1312 entry = pfn_pte(pfn, vma->vm_page_prot);
1313 set_pte_at(mm, addr, pte, entry);
1314 update_mmu_cache(vma, addr, entry);
1315
1316 retval = 0;
1317out_unlock:
1318 pte_unmap_unlock(pte, ptl);
1319
1320out:
1321 return retval;
1322}
1323EXPORT_SYMBOL(vm_insert_pfn);
1324
1280/* 1325/*
1281 * maps a range of physical memory into the requested pages. the old 1326 * maps a range of physical memory into the requested pages. the old
1282 * mappings are removed. any references to nonexistent pages results 1327 * mappings are removed. any references to nonexistent pages results
@@ -1531,8 +1576,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1531 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) 1576 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1532 goto unwritable_page; 1577 goto unwritable_page;
1533 1578
1534 page_cache_release(old_page);
1535
1536 /* 1579 /*
1537 * Since we dropped the lock we need to revalidate 1580 * Since we dropped the lock we need to revalidate
1538 * the PTE as someone else may have changed it. If 1581 * the PTE as someone else may have changed it. If
@@ -1541,6 +1584,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1541 */ 1584 */
1542 page_table = pte_offset_map_lock(mm, pmd, address, 1585 page_table = pte_offset_map_lock(mm, pmd, address,
1543 &ptl); 1586 &ptl);
1587 page_cache_release(old_page);
1544 if (!pte_same(*page_table, orig_pte)) 1588 if (!pte_same(*page_table, orig_pte))
1545 goto unlock; 1589 goto unlock;
1546 } 1590 }
@@ -1776,9 +1820,7 @@ restart:
1776} 1820}
1777 1821
1778/** 1822/**
1779 * unmap_mapping_range - unmap the portion of all mmaps 1823 * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
1780 * in the specified address_space corresponding to the specified
1781 * page range in the underlying file.
1782 * @mapping: the address space containing mmaps to be unmapped. 1824 * @mapping: the address space containing mmaps to be unmapped.
1783 * @holebegin: byte in first page to unmap, relative to the start of 1825 * @holebegin: byte in first page to unmap, relative to the start of
1784 * the underlying file. This will be rounded down to a PAGE_SIZE 1826 * the underlying file. This will be rounded down to a PAGE_SIZE
@@ -2313,10 +2355,12 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2313 BUG_ON(is_cow_mapping(vma->vm_flags)); 2355 BUG_ON(is_cow_mapping(vma->vm_flags));
2314 2356
2315 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); 2357 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
2316 if (pfn == NOPFN_OOM) 2358 if (unlikely(pfn == NOPFN_OOM))
2317 return VM_FAULT_OOM; 2359 return VM_FAULT_OOM;
2318 if (pfn == NOPFN_SIGBUS) 2360 else if (unlikely(pfn == NOPFN_SIGBUS))
2319 return VM_FAULT_SIGBUS; 2361 return VM_FAULT_SIGBUS;
2362 else if (unlikely(pfn == NOPFN_REFAULT))
2363 return VM_FAULT_MINOR;
2320 2364
2321 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2365 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2322 2366
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c2aec0e1090d..259a706bd83e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
105 105
106/* Highest zone. An specific allocation for a zone below that is not 106/* Highest zone. An specific allocation for a zone below that is not
107 policied. */ 107 policied. */
108enum zone_type policy_zone = ZONE_DMA; 108enum zone_type policy_zone = 0;
109 109
110struct mempolicy default_policy = { 110struct mempolicy default_policy = {
111 .refcnt = ATOMIC_INIT(1), /* never free it */ 111 .refcnt = ATOMIC_INIT(1), /* never free it */
diff --git a/mm/mempool.c b/mm/mempool.c
index ccd8cb8cd41f..cc1ca86dfc24 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -46,9 +46,9 @@ static void free_pool(mempool_t *pool)
46 * @pool_data: optional private data available to the user-defined functions. 46 * @pool_data: optional private data available to the user-defined functions.
47 * 47 *
48 * this function creates and allocates a guaranteed size, preallocated 48 * this function creates and allocates a guaranteed size, preallocated
49 * memory pool. The pool can be used from the mempool_alloc and mempool_free 49 * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
50 * functions. This function might sleep. Both the alloc_fn() and the free_fn() 50 * functions. This function might sleep. Both the alloc_fn() and the free_fn()
51 * functions might sleep - as long as the mempool_alloc function is not called 51 * functions might sleep - as long as the mempool_alloc() function is not called
52 * from IRQ contexts. 52 * from IRQ contexts.
53 */ 53 */
54mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, 54mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(mempool_destroy);
195 * mempool_create(). 195 * mempool_create().
196 * @gfp_mask: the usual allocation bitmask. 196 * @gfp_mask: the usual allocation bitmask.
197 * 197 *
198 * this function only sleeps if the alloc_fn function sleeps or 198 * this function only sleeps if the alloc_fn() function sleeps or
199 * returns NULL. Note that due to preallocation, this function 199 * returns NULL. Note that due to preallocation, this function
200 * *never* fails when called from process contexts. (it might 200 * *never* fails when called from process contexts. (it might
201 * fail if called from an IRQ context.) 201 * fail if called from an IRQ context.)
diff --git a/mm/mincore.c b/mm/mincore.c
index 8aca6f7167bb..95c5f49f0a1a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -12,6 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/mman.h> 13#include <linux/mman.h>
14#include <linux/syscalls.h> 14#include <linux/syscalls.h>
15#include <linux/swap.h>
16#include <linux/swapops.h>
15 17
16#include <asm/uaccess.h> 18#include <asm/uaccess.h>
17#include <asm/pgtable.h> 19#include <asm/pgtable.h>
@@ -22,14 +24,22 @@
22 * and is up to date; i.e. that no page-in operation would be required 24 * and is up to date; i.e. that no page-in operation would be required
23 * at this time if an application were to map and access this page. 25 * at this time if an application were to map and access this page.
24 */ 26 */
25static unsigned char mincore_page(struct vm_area_struct * vma, 27static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
26 unsigned long pgoff)
27{ 28{
28 unsigned char present = 0; 29 unsigned char present = 0;
29 struct address_space * as = vma->vm_file->f_mapping; 30 struct page *page;
30 struct page * page;
31 31
32 page = find_get_page(as, pgoff); 32 /*
33 * When tmpfs swaps out a page from a file, any process mapping that
34 * file will not get a swp_entry_t in its pte, but rather it is like
35 * any other file mapping (ie. marked !present and faulted in with
36 * tmpfs's .nopage). So swapped out tmpfs mappings are tested here.
37 *
38 * However when tmpfs moves the page from pagecache and into swapcache,
39 * it is still in core, but the find_get_page below won't find it.
40 * No big deal, but make a note of it.
41 */
42 page = find_get_page(mapping, pgoff);
33 if (page) { 43 if (page) {
34 present = PageUptodate(page); 44 present = PageUptodate(page);
35 page_cache_release(page); 45 page_cache_release(page);
@@ -45,7 +55,14 @@ static unsigned char mincore_page(struct vm_area_struct * vma,
45 */ 55 */
46static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) 56static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
47{ 57{
48 unsigned long i, nr, pgoff; 58 pgd_t *pgd;
59 pud_t *pud;
60 pmd_t *pmd;
61 pte_t *ptep;
62 spinlock_t *ptl;
63 unsigned long nr;
64 int i;
65 pgoff_t pgoff;
49 struct vm_area_struct *vma = find_vma(current->mm, addr); 66 struct vm_area_struct *vma = find_vma(current->mm, addr);
50 67
51 /* 68 /*
@@ -56,31 +73,64 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag
56 return -ENOMEM; 73 return -ENOMEM;
57 74
58 /* 75 /*
59 * Ok, got it. But check whether it's a segment we support 76 * Calculate how many pages there are left in the last level of the
60 * mincore() on. Right now, we don't do any anonymous mappings. 77 * PTE array for our address.
61 *
62 * FIXME: This is just stupid. And returning ENOMEM is
63 * stupid too. We should just look at the page tables. But
64 * this is what we've traditionally done, so we'll just
65 * continue doing it.
66 */ 78 */
67 if (!vma->vm_file) 79 nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1));
68 return -ENOMEM;
69
70 /*
71 * Calculate how many pages there are left in the vma, and
72 * what the pgoff is for our address.
73 */
74 nr = (vma->vm_end - addr) >> PAGE_SHIFT;
75 if (nr > pages) 80 if (nr > pages)
76 nr = pages; 81 nr = pages;
77 82
78 pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; 83 pgd = pgd_offset(vma->vm_mm, addr);
79 pgoff += vma->vm_pgoff; 84 if (pgd_none_or_clear_bad(pgd))
85 goto none_mapped;
86 pud = pud_offset(pgd, addr);
87 if (pud_none_or_clear_bad(pud))
88 goto none_mapped;
89 pmd = pmd_offset(pud, addr);
90 if (pmd_none_or_clear_bad(pmd))
91 goto none_mapped;
92
93 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
94 for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) {
95 unsigned char present;
96 pte_t pte = *ptep;
97
98 if (pte_present(pte)) {
99 present = 1;
100
101 } else if (pte_none(pte)) {
102 if (vma->vm_file) {
103 pgoff = linear_page_index(vma, addr);
104 present = mincore_page(vma->vm_file->f_mapping,
105 pgoff);
106 } else
107 present = 0;
108
109 } else if (pte_file(pte)) {
110 pgoff = pte_to_pgoff(pte);
111 present = mincore_page(vma->vm_file->f_mapping, pgoff);
112
113 } else { /* pte is a swap entry */
114 swp_entry_t entry = pte_to_swp_entry(pte);
115 if (is_migration_entry(entry)) {
116 /* migration entries are always uptodate */
117 present = 1;
118 } else {
119 pgoff = entry.val;
120 present = mincore_page(&swapper_space, pgoff);
121 }
122 }
123 }
124 pte_unmap_unlock(ptep-1, ptl);
125
126 return nr;
80 127
81 /* And then we just fill the sucker in.. */ 128none_mapped:
82 for (i = 0 ; i < nr; i++, pgoff++) 129 if (vma->vm_file) {
83 vec[i] = mincore_page(vma, pgoff); 130 pgoff = linear_page_index(vma, addr);
131 for (i = 0; i < nr; i++, pgoff++)
132 vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
133 }
84 134
85 return nr; 135 return nr;
86} 136}
diff --git a/mm/mmap.c b/mm/mmap.c
index cc3a20819457..eb509ae76553 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2101,3 +2101,75 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2101 return 0; 2101 return 0;
2102 return 1; 2102 return 1;
2103} 2103}
2104
2105
2106static struct page *special_mapping_nopage(struct vm_area_struct *vma,
2107 unsigned long address, int *type)
2108{
2109 struct page **pages;
2110
2111 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
2112
2113 address -= vma->vm_start;
2114 for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
2115 address -= PAGE_SIZE;
2116
2117 if (*pages) {
2118 struct page *page = *pages;
2119 get_page(page);
2120 return page;
2121 }
2122
2123 return NOPAGE_SIGBUS;
2124}
2125
2126/*
2127 * Having a close hook prevents vma merging regardless of flags.
2128 */
2129static void special_mapping_close(struct vm_area_struct *vma)
2130{
2131}
2132
2133static struct vm_operations_struct special_mapping_vmops = {
2134 .close = special_mapping_close,
2135 .nopage = special_mapping_nopage,
2136};
2137
2138/*
2139 * Called with mm->mmap_sem held for writing.
2140 * Insert a new vma covering the given region, with the given flags.
2141 * Its pages are supplied by the given array of struct page *.
2142 * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
2143 * The region past the last page supplied will always produce SIGBUS.
2144 * The array pointer and the pages it points to are assumed to stay alive
2145 * for as long as this mapping might exist.
2146 */
2147int install_special_mapping(struct mm_struct *mm,
2148 unsigned long addr, unsigned long len,
2149 unsigned long vm_flags, struct page **pages)
2150{
2151 struct vm_area_struct *vma;
2152
2153 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2154 if (unlikely(vma == NULL))
2155 return -ENOMEM;
2156
2157 vma->vm_mm = mm;
2158 vma->vm_start = addr;
2159 vma->vm_end = addr + len;
2160
2161 vma->vm_flags = vm_flags | mm->def_flags;
2162 vma->vm_page_prot = protection_map[vma->vm_flags & 7];
2163
2164 vma->vm_ops = &special_mapping_vmops;
2165 vma->vm_private_data = pages;
2166
2167 if (unlikely(insert_vm_struct(mm, vma))) {
2168 kmem_cache_free(vm_area_cachep, vma);
2169 return -ENOMEM;
2170 }
2171
2172 mm->total_vm += len >> PAGE_SHIFT;
2173
2174 return 0;
2175}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index be0efbde4994..f7e088f5a309 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -515,7 +515,7 @@ static int __cpuinit
515ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) 515ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
516{ 516{
517 writeback_set_ratelimit(); 517 writeback_set_ratelimit();
518 return 0; 518 return NOTIFY_DONE;
519} 519}
520 520
521static struct notifier_block __cpuinitdata ratelimit_nb = { 521static struct notifier_block __cpuinitdata ratelimit_nb = {
@@ -549,9 +549,7 @@ void __init page_writeback_init(void)
549} 549}
550 550
551/** 551/**
552 * generic_writepages - walk the list of dirty pages of the given 552 * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
553 * address space and writepage() all of them.
554 *
555 * @mapping: address space structure to write 553 * @mapping: address space structure to write
556 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 554 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
557 * 555 *
@@ -698,7 +696,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
698 696
699/** 697/**
700 * write_one_page - write out a single page and optionally wait on I/O 698 * write_one_page - write out a single page and optionally wait on I/O
701 *
702 * @page: the page to write 699 * @page: the page to write
703 * @wait: if true, wait on writeout 700 * @wait: if true, wait on writeout
704 * 701 *
@@ -737,6 +734,16 @@ int write_one_page(struct page *page, int wait)
737EXPORT_SYMBOL(write_one_page); 734EXPORT_SYMBOL(write_one_page);
738 735
739/* 736/*
737 * For address_spaces which do not use buffers nor write back.
738 */
739int __set_page_dirty_no_writeback(struct page *page)
740{
741 if (!PageDirty(page))
742 SetPageDirty(page);
743 return 0;
744}
745
746/*
740 * For address_spaces which do not use buffers. Just tag the page as dirty in 747 * For address_spaces which do not use buffers. Just tag the page as dirty in
741 * its radix tree. 748 * its radix tree.
742 * 749 *
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2c606cc922a5..d461b23a27a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -73,7 +73,9 @@ static void __free_pages_ok(struct page *page, unsigned int order);
73 * don't need any ZONE_NORMAL reservation 73 * don't need any ZONE_NORMAL reservation
74 */ 74 */
75int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 75int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
76#ifdef CONFIG_ZONE_DMA
76 256, 77 256,
78#endif
77#ifdef CONFIG_ZONE_DMA32 79#ifdef CONFIG_ZONE_DMA32
78 256, 80 256,
79#endif 81#endif
@@ -85,7 +87,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
85EXPORT_SYMBOL(totalram_pages); 87EXPORT_SYMBOL(totalram_pages);
86 88
87static char * const zone_names[MAX_NR_ZONES] = { 89static char * const zone_names[MAX_NR_ZONES] = {
90#ifdef CONFIG_ZONE_DMA
88 "DMA", 91 "DMA",
92#endif
89#ifdef CONFIG_ZONE_DMA32 93#ifdef CONFIG_ZONE_DMA32
90 "DMA32", 94 "DMA32",
91#endif 95#endif
@@ -395,7 +399,7 @@ static inline void __free_one_page(struct page *page,
395 VM_BUG_ON(page_idx & (order_size - 1)); 399 VM_BUG_ON(page_idx & (order_size - 1));
396 VM_BUG_ON(bad_range(zone, page)); 400 VM_BUG_ON(bad_range(zone, page));
397 401
398 zone->free_pages += order_size; 402 __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
399 while (order < MAX_ORDER-1) { 403 while (order < MAX_ORDER-1) {
400 unsigned long combined_idx; 404 unsigned long combined_idx;
401 struct free_area *area; 405 struct free_area *area;
@@ -631,7 +635,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
631 list_del(&page->lru); 635 list_del(&page->lru);
632 rmv_page_order(page); 636 rmv_page_order(page);
633 area->nr_free--; 637 area->nr_free--;
634 zone->free_pages -= 1UL << order; 638 __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
635 expand(zone, page, order, current_order, area); 639 expand(zone, page, order, current_order, area);
636 return page; 640 return page;
637 } 641 }
@@ -989,7 +993,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
989 int classzone_idx, int alloc_flags) 993 int classzone_idx, int alloc_flags)
990{ 994{
991 /* free_pages my go negative - that's OK */ 995 /* free_pages my go negative - that's OK */
992 long min = mark, free_pages = z->free_pages - (1 << order) + 1; 996 long min = mark;
997 long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
993 int o; 998 int o;
994 999
995 if (alloc_flags & ALLOC_HIGH) 1000 if (alloc_flags & ALLOC_HIGH)
@@ -1439,35 +1444,6 @@ fastcall void free_pages(unsigned long addr, unsigned int order)
1439 1444
1440EXPORT_SYMBOL(free_pages); 1445EXPORT_SYMBOL(free_pages);
1441 1446
1442/*
1443 * Total amount of free (allocatable) RAM:
1444 */
1445unsigned int nr_free_pages(void)
1446{
1447 unsigned int sum = 0;
1448 struct zone *zone;
1449
1450 for_each_zone(zone)
1451 sum += zone->free_pages;
1452
1453 return sum;
1454}
1455
1456EXPORT_SYMBOL(nr_free_pages);
1457
1458#ifdef CONFIG_NUMA
1459unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
1460{
1461 unsigned int sum = 0;
1462 enum zone_type i;
1463
1464 for (i = 0; i < MAX_NR_ZONES; i++)
1465 sum += pgdat->node_zones[i].free_pages;
1466
1467 return sum;
1468}
1469#endif
1470
1471static unsigned int nr_free_zone_pages(int offset) 1447static unsigned int nr_free_zone_pages(int offset)
1472{ 1448{
1473 /* Just pick one node, since fallback list is circular */ 1449 /* Just pick one node, since fallback list is circular */
@@ -1514,7 +1490,7 @@ void si_meminfo(struct sysinfo *val)
1514{ 1490{
1515 val->totalram = totalram_pages; 1491 val->totalram = totalram_pages;
1516 val->sharedram = 0; 1492 val->sharedram = 0;
1517 val->freeram = nr_free_pages(); 1493 val->freeram = global_page_state(NR_FREE_PAGES);
1518 val->bufferram = nr_blockdev_pages(); 1494 val->bufferram = nr_blockdev_pages();
1519 val->totalhigh = totalhigh_pages; 1495 val->totalhigh = totalhigh_pages;
1520 val->freehigh = nr_free_highpages(); 1496 val->freehigh = nr_free_highpages();
@@ -1529,10 +1505,11 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1529 pg_data_t *pgdat = NODE_DATA(nid); 1505 pg_data_t *pgdat = NODE_DATA(nid);
1530 1506
1531 val->totalram = pgdat->node_present_pages; 1507 val->totalram = pgdat->node_present_pages;
1532 val->freeram = nr_free_pages_pgdat(pgdat); 1508 val->freeram = node_page_state(nid, NR_FREE_PAGES);
1533#ifdef CONFIG_HIGHMEM 1509#ifdef CONFIG_HIGHMEM
1534 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; 1510 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
1535 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; 1511 val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
1512 NR_FREE_PAGES);
1536#else 1513#else
1537 val->totalhigh = 0; 1514 val->totalhigh = 0;
1538 val->freehigh = 0; 1515 val->freehigh = 0;
@@ -1551,9 +1528,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1551void show_free_areas(void) 1528void show_free_areas(void)
1552{ 1529{
1553 int cpu; 1530 int cpu;
1554 unsigned long active;
1555 unsigned long inactive;
1556 unsigned long free;
1557 struct zone *zone; 1531 struct zone *zone;
1558 1532
1559 for_each_zone(zone) { 1533 for_each_zone(zone) {
@@ -1577,20 +1551,19 @@ void show_free_areas(void)
1577 } 1551 }
1578 } 1552 }
1579 1553
1580 get_zone_counts(&active, &inactive, &free); 1554 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
1581 1555 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
1582 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " 1556 global_page_state(NR_ACTIVE),
1583 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", 1557 global_page_state(NR_INACTIVE),
1584 active,
1585 inactive,
1586 global_page_state(NR_FILE_DIRTY), 1558 global_page_state(NR_FILE_DIRTY),
1587 global_page_state(NR_WRITEBACK), 1559 global_page_state(NR_WRITEBACK),
1588 global_page_state(NR_UNSTABLE_NFS), 1560 global_page_state(NR_UNSTABLE_NFS),
1589 nr_free_pages(), 1561 global_page_state(NR_FREE_PAGES),
1590 global_page_state(NR_SLAB_RECLAIMABLE) + 1562 global_page_state(NR_SLAB_RECLAIMABLE) +
1591 global_page_state(NR_SLAB_UNRECLAIMABLE), 1563 global_page_state(NR_SLAB_UNRECLAIMABLE),
1592 global_page_state(NR_FILE_MAPPED), 1564 global_page_state(NR_FILE_MAPPED),
1593 global_page_state(NR_PAGETABLE)); 1565 global_page_state(NR_PAGETABLE),
1566 global_page_state(NR_BOUNCE));
1594 1567
1595 for_each_zone(zone) { 1568 for_each_zone(zone) {
1596 int i; 1569 int i;
@@ -1611,12 +1584,12 @@ void show_free_areas(void)
1611 " all_unreclaimable? %s" 1584 " all_unreclaimable? %s"
1612 "\n", 1585 "\n",
1613 zone->name, 1586 zone->name,
1614 K(zone->free_pages), 1587 K(zone_page_state(zone, NR_FREE_PAGES)),
1615 K(zone->pages_min), 1588 K(zone->pages_min),
1616 K(zone->pages_low), 1589 K(zone->pages_low),
1617 K(zone->pages_high), 1590 K(zone->pages_high),
1618 K(zone->nr_active), 1591 K(zone_page_state(zone, NR_ACTIVE)),
1619 K(zone->nr_inactive), 1592 K(zone_page_state(zone, NR_INACTIVE)),
1620 K(zone->present_pages), 1593 K(zone->present_pages),
1621 zone->pages_scanned, 1594 zone->pages_scanned,
1622 (zone->all_unreclaimable ? "yes" : "no") 1595 (zone->all_unreclaimable ? "yes" : "no")
@@ -2650,11 +2623,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2650 " %s zone: %lu pages exceeds realsize %lu\n", 2623 " %s zone: %lu pages exceeds realsize %lu\n",
2651 zone_names[j], memmap_pages, realsize); 2624 zone_names[j], memmap_pages, realsize);
2652 2625
2653 /* Account for reserved DMA pages */ 2626 /* Account for reserved pages */
2654 if (j == ZONE_DMA && realsize > dma_reserve) { 2627 if (j == 0 && realsize > dma_reserve) {
2655 realsize -= dma_reserve; 2628 realsize -= dma_reserve;
2656 printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", 2629 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
2657 dma_reserve); 2630 zone_names[0], dma_reserve);
2658 } 2631 }
2659 2632
2660 if (!is_highmem_idx(j)) 2633 if (!is_highmem_idx(j))
@@ -2674,7 +2647,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2674 spin_lock_init(&zone->lru_lock); 2647 spin_lock_init(&zone->lru_lock);
2675 zone_seqlock_init(zone); 2648 zone_seqlock_init(zone);
2676 zone->zone_pgdat = pgdat; 2649 zone->zone_pgdat = pgdat;
2677 zone->free_pages = 0;
2678 2650
2679 zone->prev_priority = DEF_PRIORITY; 2651 zone->prev_priority = DEF_PRIORITY;
2680 2652
@@ -2683,8 +2655,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2683 INIT_LIST_HEAD(&zone->inactive_list); 2655 INIT_LIST_HEAD(&zone->inactive_list);
2684 zone->nr_scan_active = 0; 2656 zone->nr_scan_active = 0;
2685 zone->nr_scan_inactive = 0; 2657 zone->nr_scan_inactive = 0;
2686 zone->nr_active = 0;
2687 zone->nr_inactive = 0;
2688 zap_zone_vm_stats(zone); 2658 zap_zone_vm_stats(zone);
2689 atomic_set(&zone->reclaim_in_progress, 0); 2659 atomic_set(&zone->reclaim_in_progress, 0);
2690 if (!size) 2660 if (!size)
@@ -2876,20 +2846,23 @@ static void __init sort_node_map(void)
2876 cmp_node_active_region, NULL); 2846 cmp_node_active_region, NULL);
2877} 2847}
2878 2848
2879/* Find the lowest pfn for a node. This depends on a sorted early_node_map */ 2849/* Find the lowest pfn for a node */
2880unsigned long __init find_min_pfn_for_node(unsigned long nid) 2850unsigned long __init find_min_pfn_for_node(unsigned long nid)
2881{ 2851{
2882 int i; 2852 int i;
2883 2853 unsigned long min_pfn = ULONG_MAX;
2884 /* Regions in the early_node_map can be in any order */
2885 sort_node_map();
2886 2854
2887 /* Assuming a sorted map, the first range found has the starting pfn */ 2855 /* Assuming a sorted map, the first range found has the starting pfn */
2888 for_each_active_range_index_in_nid(i, nid) 2856 for_each_active_range_index_in_nid(i, nid)
2889 return early_node_map[i].start_pfn; 2857 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
2890 2858
2891 printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); 2859 if (min_pfn == ULONG_MAX) {
2892 return 0; 2860 printk(KERN_WARNING
2861 "Could not find start_pfn for node %lu\n", nid);
2862 return 0;
2863 }
2864
2865 return min_pfn;
2893} 2866}
2894 2867
2895/** 2868/**
@@ -2938,6 +2911,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
2938 unsigned long nid; 2911 unsigned long nid;
2939 enum zone_type i; 2912 enum zone_type i;
2940 2913
2914 /* Sort early_node_map as initialisation assumes it is sorted */
2915 sort_node_map();
2916
2941 /* Record where the zone boundaries are */ 2917 /* Record where the zone boundaries are */
2942 memset(arch_zone_lowest_possible_pfn, 0, 2918 memset(arch_zone_lowest_possible_pfn, 0,
2943 sizeof(arch_zone_lowest_possible_pfn)); 2919 sizeof(arch_zone_lowest_possible_pfn));
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f539e8e827a..93d9ee692fd8 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -575,10 +575,6 @@ void handle_ra_miss(struct address_space *mapping,
575 */ 575 */
576unsigned long max_sane_readahead(unsigned long nr) 576unsigned long max_sane_readahead(unsigned long nr)
577{ 577{
578 unsigned long active; 578 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
579 unsigned long inactive; 579 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
580 unsigned long free;
581
582 __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
583 return min(nr, (inactive + free) / 2);
584} 580}
diff --git a/mm/shmem.c b/mm/shmem.c
index 70da7a0981bf..882053031aa0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -178,9 +178,9 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
178static struct super_operations shmem_ops; 178static struct super_operations shmem_ops;
179static const struct address_space_operations shmem_aops; 179static const struct address_space_operations shmem_aops;
180static const struct file_operations shmem_file_operations; 180static const struct file_operations shmem_file_operations;
181static struct inode_operations shmem_inode_operations; 181static const struct inode_operations shmem_inode_operations;
182static struct inode_operations shmem_dir_inode_operations; 182static const struct inode_operations shmem_dir_inode_operations;
183static struct inode_operations shmem_special_inode_operations; 183static const struct inode_operations shmem_special_inode_operations;
184static struct vm_operations_struct shmem_vm_ops; 184static struct vm_operations_struct shmem_vm_ops;
185 185
186static struct backing_dev_info shmem_backing_dev_info __read_mostly = { 186static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
@@ -1410,8 +1410,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1410} 1410}
1411 1411
1412#ifdef CONFIG_TMPFS 1412#ifdef CONFIG_TMPFS
1413static struct inode_operations shmem_symlink_inode_operations; 1413static const struct inode_operations shmem_symlink_inode_operations;
1414static struct inode_operations shmem_symlink_inline_operations; 1414static const struct inode_operations shmem_symlink_inline_operations;
1415 1415
1416/* 1416/*
1417 * Normally tmpfs makes no use of shmem_prepare_write, but it 1417 * Normally tmpfs makes no use of shmem_prepare_write, but it
@@ -1904,12 +1904,12 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
1904 } 1904 }
1905} 1905}
1906 1906
1907static struct inode_operations shmem_symlink_inline_operations = { 1907static const struct inode_operations shmem_symlink_inline_operations = {
1908 .readlink = generic_readlink, 1908 .readlink = generic_readlink,
1909 .follow_link = shmem_follow_link_inline, 1909 .follow_link = shmem_follow_link_inline,
1910}; 1910};
1911 1911
1912static struct inode_operations shmem_symlink_inode_operations = { 1912static const struct inode_operations shmem_symlink_inode_operations = {
1913 .truncate = shmem_truncate, 1913 .truncate = shmem_truncate,
1914 .readlink = generic_readlink, 1914 .readlink = generic_readlink,
1915 .follow_link = shmem_follow_link, 1915 .follow_link = shmem_follow_link,
@@ -2316,7 +2316,7 @@ static void destroy_inodecache(void)
2316 2316
2317static const struct address_space_operations shmem_aops = { 2317static const struct address_space_operations shmem_aops = {
2318 .writepage = shmem_writepage, 2318 .writepage = shmem_writepage,
2319 .set_page_dirty = __set_page_dirty_nobuffers, 2319 .set_page_dirty = __set_page_dirty_no_writeback,
2320#ifdef CONFIG_TMPFS 2320#ifdef CONFIG_TMPFS
2321 .prepare_write = shmem_prepare_write, 2321 .prepare_write = shmem_prepare_write,
2322 .commit_write = simple_commit_write, 2322 .commit_write = simple_commit_write,
@@ -2335,7 +2335,7 @@ static const struct file_operations shmem_file_operations = {
2335#endif 2335#endif
2336}; 2336};
2337 2337
2338static struct inode_operations shmem_inode_operations = { 2338static const struct inode_operations shmem_inode_operations = {
2339 .truncate = shmem_truncate, 2339 .truncate = shmem_truncate,
2340 .setattr = shmem_notify_change, 2340 .setattr = shmem_notify_change,
2341 .truncate_range = shmem_truncate_range, 2341 .truncate_range = shmem_truncate_range,
@@ -2349,7 +2349,7 @@ static struct inode_operations shmem_inode_operations = {
2349 2349
2350}; 2350};
2351 2351
2352static struct inode_operations shmem_dir_inode_operations = { 2352static const struct inode_operations shmem_dir_inode_operations = {
2353#ifdef CONFIG_TMPFS 2353#ifdef CONFIG_TMPFS
2354 .create = shmem_create, 2354 .create = shmem_create,
2355 .lookup = simple_lookup, 2355 .lookup = simple_lookup,
@@ -2371,7 +2371,7 @@ static struct inode_operations shmem_dir_inode_operations = {
2371#endif 2371#endif
2372}; 2372};
2373 2373
2374static struct inode_operations shmem_special_inode_operations = { 2374static const struct inode_operations shmem_special_inode_operations = {
2375#ifdef CONFIG_TMPFS_POSIX_ACL 2375#ifdef CONFIG_TMPFS_POSIX_ACL
2376 .setattr = shmem_notify_change, 2376 .setattr = shmem_notify_change,
2377 .setxattr = generic_setxattr, 2377 .setxattr = generic_setxattr,
diff --git a/mm/slab.c b/mm/slab.c
index c6100628a6ef..70784b848b69 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -793,8 +793,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
793 * has cs_{dma,}cachep==NULL. Thus no special case 793 * has cs_{dma,}cachep==NULL. Thus no special case
794 * for large kmalloc calls required. 794 * for large kmalloc calls required.
795 */ 795 */
796#ifdef CONFIG_ZONE_DMA
796 if (unlikely(gfpflags & GFP_DMA)) 797 if (unlikely(gfpflags & GFP_DMA))
797 return csizep->cs_dmacachep; 798 return csizep->cs_dmacachep;
799#endif
798 return csizep->cs_cachep; 800 return csizep->cs_cachep;
799} 801}
800 802
@@ -1493,13 +1495,15 @@ void __init kmem_cache_init(void)
1493 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1495 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1494 NULL, NULL); 1496 NULL, NULL);
1495 } 1497 }
1496 1498#ifdef CONFIG_ZONE_DMA
1497 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1499 sizes->cs_dmacachep = kmem_cache_create(
1500 names->name_dma,
1498 sizes->cs_size, 1501 sizes->cs_size,
1499 ARCH_KMALLOC_MINALIGN, 1502 ARCH_KMALLOC_MINALIGN,
1500 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| 1503 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1501 SLAB_PANIC, 1504 SLAB_PANIC,
1502 NULL, NULL); 1505 NULL, NULL);
1506#endif
1503 sizes++; 1507 sizes++;
1504 names++; 1508 names++;
1505 } 1509 }
@@ -2321,7 +2325,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2321 cachep->slab_size = slab_size; 2325 cachep->slab_size = slab_size;
2322 cachep->flags = flags; 2326 cachep->flags = flags;
2323 cachep->gfpflags = 0; 2327 cachep->gfpflags = 0;
2324 if (flags & SLAB_CACHE_DMA) 2328 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2325 cachep->gfpflags |= GFP_DMA; 2329 cachep->gfpflags |= GFP_DMA;
2326 cachep->buffer_size = size; 2330 cachep->buffer_size = size;
2327 cachep->reciprocal_buffer_size = reciprocal_value(size); 2331 cachep->reciprocal_buffer_size = reciprocal_value(size);
@@ -2516,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2516 * kmem_cache_destroy - delete a cache 2520 * kmem_cache_destroy - delete a cache
2517 * @cachep: the cache to destroy 2521 * @cachep: the cache to destroy
2518 * 2522 *
2519 * Remove a struct kmem_cache object from the slab cache. 2523 * Remove a &struct kmem_cache object from the slab cache.
2520 * 2524 *
2521 * It is expected this function will be called by a module when it is 2525 * It is expected this function will be called by a module when it is
2522 * unloaded. This will remove the cache completely, and avoid a duplicate 2526 * unloaded. This will remove the cache completely, and avoid a duplicate
@@ -2643,10 +2647,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
2643 2647
2644static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2648static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2645{ 2649{
2646 if (flags & GFP_DMA) 2650 if (CONFIG_ZONE_DMA_FLAG) {
2647 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2651 if (flags & GFP_DMA)
2648 else 2652 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2649 BUG_ON(cachep->gfpflags & GFP_DMA); 2653 else
2654 BUG_ON(cachep->gfpflags & GFP_DMA);
2655 }
2650} 2656}
2651 2657
2652static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, 2658static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
@@ -2814,19 +2820,11 @@ failed:
2814 */ 2820 */
2815static void kfree_debugcheck(const void *objp) 2821static void kfree_debugcheck(const void *objp)
2816{ 2822{
2817 struct page *page;
2818
2819 if (!virt_addr_valid(objp)) { 2823 if (!virt_addr_valid(objp)) {
2820 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", 2824 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2821 (unsigned long)objp); 2825 (unsigned long)objp);
2822 BUG(); 2826 BUG();
2823 } 2827 }
2824 page = virt_to_page(objp);
2825 if (!PageSlab(page)) {
2826 printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n",
2827 (unsigned long)objp);
2828 BUG();
2829 }
2830} 2828}
2831 2829
2832static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) 2830static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
@@ -3197,35 +3195,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3197 return objp; 3195 return objp;
3198} 3196}
3199 3197
3200static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
3201 gfp_t flags, void *caller)
3202{
3203 unsigned long save_flags;
3204 void *objp = NULL;
3205
3206 cache_alloc_debugcheck_before(cachep, flags);
3207
3208 local_irq_save(save_flags);
3209
3210 if (unlikely(NUMA_BUILD &&
3211 current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
3212 objp = alternate_node_alloc(cachep, flags);
3213
3214 if (!objp)
3215 objp = ____cache_alloc(cachep, flags);
3216 /*
3217 * We may just have run out of memory on the local node.
3218 * ____cache_alloc_node() knows how to locate memory on other nodes
3219 */
3220 if (NUMA_BUILD && !objp)
3221 objp = ____cache_alloc_node(cachep, flags, numa_node_id());
3222 local_irq_restore(save_flags);
3223 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
3224 caller);
3225 prefetchw(objp);
3226 return objp;
3227}
3228
3229#ifdef CONFIG_NUMA 3198#ifdef CONFIG_NUMA
3230/* 3199/*
3231 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. 3200 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
@@ -3257,14 +3226,20 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3257 * allocator to do its reclaim / fallback magic. We then insert the 3226 * allocator to do its reclaim / fallback magic. We then insert the
3258 * slab into the proper nodelist and then allocate from it. 3227 * slab into the proper nodelist and then allocate from it.
3259 */ 3228 */
3260void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 3229static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3261{ 3230{
3262 struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) 3231 struct zonelist *zonelist;
3263 ->node_zonelists[gfp_zone(flags)]; 3232 gfp_t local_flags;
3264 struct zone **z; 3233 struct zone **z;
3265 void *obj = NULL; 3234 void *obj = NULL;
3266 int nid; 3235 int nid;
3267 gfp_t local_flags = (flags & GFP_LEVEL_MASK); 3236
3237 if (flags & __GFP_THISNODE)
3238 return NULL;
3239
3240 zonelist = &NODE_DATA(slab_node(current->mempolicy))
3241 ->node_zonelists[gfp_zone(flags)];
3242 local_flags = (flags & GFP_LEVEL_MASK);
3268 3243
3269retry: 3244retry:
3270 /* 3245 /*
@@ -3374,16 +3349,110 @@ must_grow:
3374 if (x) 3349 if (x)
3375 goto retry; 3350 goto retry;
3376 3351
3377 if (!(flags & __GFP_THISNODE)) 3352 return fallback_alloc(cachep, flags);
3378 /* Unable to grow the cache. Fall back to other nodes. */
3379 return fallback_alloc(cachep, flags);
3380
3381 return NULL;
3382 3353
3383done: 3354done:
3384 return obj; 3355 return obj;
3385} 3356}
3386#endif 3357
3358/**
3359 * kmem_cache_alloc_node - Allocate an object on the specified node
3360 * @cachep: The cache to allocate from.
3361 * @flags: See kmalloc().
3362 * @nodeid: node number of the target node.
3363 * @caller: return address of caller, used for debug information
3364 *
3365 * Identical to kmem_cache_alloc but it will allocate memory on the given
3366 * node, which can improve the performance for cpu bound structures.
3367 *
3368 * Fallback to other node is possible if __GFP_THISNODE is not set.
3369 */
3370static __always_inline void *
3371__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3372 void *caller)
3373{
3374 unsigned long save_flags;
3375 void *ptr;
3376
3377 cache_alloc_debugcheck_before(cachep, flags);
3378 local_irq_save(save_flags);
3379
3380 if (unlikely(nodeid == -1))
3381 nodeid = numa_node_id();
3382
3383 if (unlikely(!cachep->nodelists[nodeid])) {
3384 /* Node not bootstrapped yet */
3385 ptr = fallback_alloc(cachep, flags);
3386 goto out;
3387 }
3388
3389 if (nodeid == numa_node_id()) {
3390 /*
3391 * Use the locally cached objects if possible.
3392 * However ____cache_alloc does not allow fallback
3393 * to other nodes. It may fail while we still have
3394 * objects on other nodes available.
3395 */
3396 ptr = ____cache_alloc(cachep, flags);
3397 if (ptr)
3398 goto out;
3399 }
3400 /* ___cache_alloc_node can fall back to other nodes */
3401 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3402 out:
3403 local_irq_restore(save_flags);
3404 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3405
3406 return ptr;
3407}
3408
3409static __always_inline void *
3410__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3411{
3412 void *objp;
3413
3414 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3415 objp = alternate_node_alloc(cache, flags);
3416 if (objp)
3417 goto out;
3418 }
3419 objp = ____cache_alloc(cache, flags);
3420
3421 /*
3422 * We may just have run out of memory on the local node.
3423 * ____cache_alloc_node() knows how to locate memory on other nodes
3424 */
3425 if (!objp)
3426 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3427
3428 out:
3429 return objp;
3430}
3431#else
3432
3433static __always_inline void *
3434__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3435{
3436 return ____cache_alloc(cachep, flags);
3437}
3438
3439#endif /* CONFIG_NUMA */
3440
3441static __always_inline void *
3442__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3443{
3444 unsigned long save_flags;
3445 void *objp;
3446
3447 cache_alloc_debugcheck_before(cachep, flags);
3448 local_irq_save(save_flags);
3449 objp = __do_cache_alloc(cachep, flags);
3450 local_irq_restore(save_flags);
3451 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3452 prefetchw(objp);
3453
3454 return objp;
3455}
3387 3456
3388/* 3457/*
3389 * Caller needs to acquire correct kmem_list's list_lock 3458 * Caller needs to acquire correct kmem_list's list_lock
@@ -3582,57 +3651,6 @@ out:
3582} 3651}
3583 3652
3584#ifdef CONFIG_NUMA 3653#ifdef CONFIG_NUMA
3585/**
3586 * kmem_cache_alloc_node - Allocate an object on the specified node
3587 * @cachep: The cache to allocate from.
3588 * @flags: See kmalloc().
3589 * @nodeid: node number of the target node.
3590 * @caller: return address of caller, used for debug information
3591 *
3592 * Identical to kmem_cache_alloc but it will allocate memory on the given
3593 * node, which can improve the performance for cpu bound structures.
3594 *
3595 * Fallback to other node is possible if __GFP_THISNODE is not set.
3596 */
3597static __always_inline void *
3598__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3599 int nodeid, void *caller)
3600{
3601 unsigned long save_flags;
3602 void *ptr = NULL;
3603
3604 cache_alloc_debugcheck_before(cachep, flags);
3605 local_irq_save(save_flags);
3606
3607 if (unlikely(nodeid == -1))
3608 nodeid = numa_node_id();
3609
3610 if (likely(cachep->nodelists[nodeid])) {
3611 if (nodeid == numa_node_id()) {
3612 /*
3613 * Use the locally cached objects if possible.
3614 * However ____cache_alloc does not allow fallback
3615 * to other nodes. It may fail while we still have
3616 * objects on other nodes available.
3617 */
3618 ptr = ____cache_alloc(cachep, flags);
3619 }
3620 if (!ptr) {
3621 /* ___cache_alloc_node can fall back to other nodes */
3622 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3623 }
3624 } else {
3625 /* Node not bootstrapped yet */
3626 if (!(flags & __GFP_THISNODE))
3627 ptr = fallback_alloc(cachep, flags);
3628 }
3629
3630 local_irq_restore(save_flags);
3631 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3632
3633 return ptr;
3634}
3635
3636void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3654void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3637{ 3655{
3638 return __cache_alloc_node(cachep, flags, nodeid, 3656 return __cache_alloc_node(cachep, flags, nodeid,
@@ -3733,6 +3751,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3733 BUG_ON(virt_to_cache(objp) != cachep); 3751 BUG_ON(virt_to_cache(objp) != cachep);
3734 3752
3735 local_irq_save(flags); 3753 local_irq_save(flags);
3754 debug_check_no_locks_freed(objp, obj_size(cachep));
3736 __cache_free(cachep, objp); 3755 __cache_free(cachep, objp);
3737 local_irq_restore(flags); 3756 local_irq_restore(flags);
3738} 3757}
@@ -4017,18 +4036,17 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4017 * If we cannot acquire the cache chain mutex then just give up - we'll try 4036 * If we cannot acquire the cache chain mutex then just give up - we'll try
4018 * again on the next iteration. 4037 * again on the next iteration.
4019 */ 4038 */
4020static void cache_reap(struct work_struct *unused) 4039static void cache_reap(struct work_struct *w)
4021{ 4040{
4022 struct kmem_cache *searchp; 4041 struct kmem_cache *searchp;
4023 struct kmem_list3 *l3; 4042 struct kmem_list3 *l3;
4024 int node = numa_node_id(); 4043 int node = numa_node_id();
4044 struct delayed_work *work =
4045 container_of(w, struct delayed_work, work);
4025 4046
4026 if (!mutex_trylock(&cache_chain_mutex)) { 4047 if (!mutex_trylock(&cache_chain_mutex))
4027 /* Give up. Setup the next iteration. */ 4048 /* Give up. Setup the next iteration. */
4028 schedule_delayed_work(&__get_cpu_var(reap_work), 4049 goto out;
4029 round_jiffies_relative(REAPTIMEOUT_CPUC));
4030 return;
4031 }
4032 4050
4033 list_for_each_entry(searchp, &cache_chain, next) { 4051 list_for_each_entry(searchp, &cache_chain, next) {
4034 check_irq_on(); 4052 check_irq_on();
@@ -4071,9 +4089,9 @@ next:
4071 mutex_unlock(&cache_chain_mutex); 4089 mutex_unlock(&cache_chain_mutex);
4072 next_reap_node(); 4090 next_reap_node();
4073 refresh_cpu_vm_stats(smp_processor_id()); 4091 refresh_cpu_vm_stats(smp_processor_id());
4092out:
4074 /* Set up the next iteration */ 4093 /* Set up the next iteration */
4075 schedule_delayed_work(&__get_cpu_var(reap_work), 4094 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4076 round_jiffies_relative(REAPTIMEOUT_CPUC));
4077} 4095}
4078 4096
4079#ifdef CONFIG_PROC_FS 4097#ifdef CONFIG_PROC_FS
diff --git a/mm/truncate.c b/mm/truncate.c
index 5df947de7654..ebf3fcb4115b 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(cancel_dirty_page);
85 * 85 *
86 * We need to bale out if page->mapping is no longer equal to the original 86 * We need to bale out if page->mapping is no longer equal to the original
87 * mapping. This happens a) when the VM reclaimed the page while we waited on 87 * mapping. This happens a) when the VM reclaimed the page while we waited on
88 * its lock, b) when a concurrent invalidate_inode_pages got there first and 88 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
89 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 89 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
90 */ 90 */
91static void 91static void
@@ -106,7 +106,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
106} 106}
107 107
108/* 108/*
109 * This is for invalidate_inode_pages(). That function can be called at 109 * This is for invalidate_mapping_pages(). That function can be called at
110 * any time, and is not supposed to throw away dirty pages. But pages can 110 * any time, and is not supposed to throw away dirty pages. But pages can
111 * be marked dirty at any time too, so use remove_mapping which safely 111 * be marked dirty at any time too, so use remove_mapping which safely
112 * discards clean, unused pages. 112 * discards clean, unused pages.
@@ -310,12 +310,7 @@ unlock:
310 } 310 }
311 return ret; 311 return ret;
312} 312}
313 313EXPORT_SYMBOL(invalidate_mapping_pages);
314unsigned long invalidate_inode_pages(struct address_space *mapping)
315{
316 return invalidate_mapping_pages(mapping, 0, ~0UL);
317}
318EXPORT_SYMBOL(invalidate_inode_pages);
319 314
320/* 315/*
321 * This is like invalidate_complete_page(), except it ignores the page's 316 * This is like invalidate_complete_page(), except it ignores the page's
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 86897ee792d6..9eef486da909 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -699,7 +699,7 @@ finished:
699 * that it is big enough to cover the vma. Will return failure if 699 * that it is big enough to cover the vma. Will return failure if
700 * that criteria isn't met. 700 * that criteria isn't met.
701 * 701 *
702 * Similar to remap_pfn_range (see mm/memory.c) 702 * Similar to remap_pfn_range() (see mm/memory.c)
703 */ 703 */
704int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 704int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
705 unsigned long pgoff) 705 unsigned long pgoff)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7430df68cb64..0655d5fe73e8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -679,7 +679,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
679 nr_taken = isolate_lru_pages(sc->swap_cluster_max, 679 nr_taken = isolate_lru_pages(sc->swap_cluster_max,
680 &zone->inactive_list, 680 &zone->inactive_list,
681 &page_list, &nr_scan); 681 &page_list, &nr_scan);
682 zone->nr_inactive -= nr_taken; 682 __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
683 zone->pages_scanned += nr_scan; 683 zone->pages_scanned += nr_scan;
684 spin_unlock_irq(&zone->lru_lock); 684 spin_unlock_irq(&zone->lru_lock);
685 685
@@ -740,7 +740,8 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
740 740
741static inline int zone_is_near_oom(struct zone *zone) 741static inline int zone_is_near_oom(struct zone *zone)
742{ 742{
743 return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; 743 return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
744 + zone_page_state(zone, NR_INACTIVE))*3;
744} 745}
745 746
746/* 747/*
@@ -825,7 +826,7 @@ force_reclaim_mapped:
825 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, 826 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
826 &l_hold, &pgscanned); 827 &l_hold, &pgscanned);
827 zone->pages_scanned += pgscanned; 828 zone->pages_scanned += pgscanned;
828 zone->nr_active -= pgmoved; 829 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
829 spin_unlock_irq(&zone->lru_lock); 830 spin_unlock_irq(&zone->lru_lock);
830 831
831 while (!list_empty(&l_hold)) { 832 while (!list_empty(&l_hold)) {
@@ -857,7 +858,7 @@ force_reclaim_mapped:
857 list_move(&page->lru, &zone->inactive_list); 858 list_move(&page->lru, &zone->inactive_list);
858 pgmoved++; 859 pgmoved++;
859 if (!pagevec_add(&pvec, page)) { 860 if (!pagevec_add(&pvec, page)) {
860 zone->nr_inactive += pgmoved; 861 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
861 spin_unlock_irq(&zone->lru_lock); 862 spin_unlock_irq(&zone->lru_lock);
862 pgdeactivate += pgmoved; 863 pgdeactivate += pgmoved;
863 pgmoved = 0; 864 pgmoved = 0;
@@ -867,7 +868,7 @@ force_reclaim_mapped:
867 spin_lock_irq(&zone->lru_lock); 868 spin_lock_irq(&zone->lru_lock);
868 } 869 }
869 } 870 }
870 zone->nr_inactive += pgmoved; 871 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
871 pgdeactivate += pgmoved; 872 pgdeactivate += pgmoved;
872 if (buffer_heads_over_limit) { 873 if (buffer_heads_over_limit) {
873 spin_unlock_irq(&zone->lru_lock); 874 spin_unlock_irq(&zone->lru_lock);
@@ -885,14 +886,14 @@ force_reclaim_mapped:
885 list_move(&page->lru, &zone->active_list); 886 list_move(&page->lru, &zone->active_list);
886 pgmoved++; 887 pgmoved++;
887 if (!pagevec_add(&pvec, page)) { 888 if (!pagevec_add(&pvec, page)) {
888 zone->nr_active += pgmoved; 889 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
889 pgmoved = 0; 890 pgmoved = 0;
890 spin_unlock_irq(&zone->lru_lock); 891 spin_unlock_irq(&zone->lru_lock);
891 __pagevec_release(&pvec); 892 __pagevec_release(&pvec);
892 spin_lock_irq(&zone->lru_lock); 893 spin_lock_irq(&zone->lru_lock);
893 } 894 }
894 } 895 }
895 zone->nr_active += pgmoved; 896 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
896 897
897 __count_zone_vm_events(PGREFILL, zone, pgscanned); 898 __count_zone_vm_events(PGREFILL, zone, pgscanned);
898 __count_vm_events(PGDEACTIVATE, pgdeactivate); 899 __count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -918,14 +919,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
918 * Add one to `nr_to_scan' just to make sure that the kernel will 919 * Add one to `nr_to_scan' just to make sure that the kernel will
919 * slowly sift through the active list. 920 * slowly sift through the active list.
920 */ 921 */
921 zone->nr_scan_active += (zone->nr_active >> priority) + 1; 922 zone->nr_scan_active +=
923 (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
922 nr_active = zone->nr_scan_active; 924 nr_active = zone->nr_scan_active;
923 if (nr_active >= sc->swap_cluster_max) 925 if (nr_active >= sc->swap_cluster_max)
924 zone->nr_scan_active = 0; 926 zone->nr_scan_active = 0;
925 else 927 else
926 nr_active = 0; 928 nr_active = 0;
927 929
928 zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; 930 zone->nr_scan_inactive +=
931 (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
929 nr_inactive = zone->nr_scan_inactive; 932 nr_inactive = zone->nr_scan_inactive;
930 if (nr_inactive >= sc->swap_cluster_max) 933 if (nr_inactive >= sc->swap_cluster_max)
931 zone->nr_scan_inactive = 0; 934 zone->nr_scan_inactive = 0;
@@ -1037,7 +1040,8 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1037 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1040 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1038 continue; 1041 continue;
1039 1042
1040 lru_pages += zone->nr_active + zone->nr_inactive; 1043 lru_pages += zone_page_state(zone, NR_ACTIVE)
1044 + zone_page_state(zone, NR_INACTIVE);
1041 } 1045 }
1042 1046
1043 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1047 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1182,7 +1186,8 @@ loop_again:
1182 for (i = 0; i <= end_zone; i++) { 1186 for (i = 0; i <= end_zone; i++) {
1183 struct zone *zone = pgdat->node_zones + i; 1187 struct zone *zone = pgdat->node_zones + i;
1184 1188
1185 lru_pages += zone->nr_active + zone->nr_inactive; 1189 lru_pages += zone_page_state(zone, NR_ACTIVE)
1190 + zone_page_state(zone, NR_INACTIVE);
1186 } 1191 }
1187 1192
1188 /* 1193 /*
@@ -1219,8 +1224,9 @@ loop_again:
1219 if (zone->all_unreclaimable) 1224 if (zone->all_unreclaimable)
1220 continue; 1225 continue;
1221 if (nr_slab == 0 && zone->pages_scanned >= 1226 if (nr_slab == 0 && zone->pages_scanned >=
1222 (zone->nr_active + zone->nr_inactive) * 6) 1227 (zone_page_state(zone, NR_ACTIVE)
1223 zone->all_unreclaimable = 1; 1228 + zone_page_state(zone, NR_INACTIVE)) * 6)
1229 zone->all_unreclaimable = 1;
1224 /* 1230 /*
1225 * If we've done a decent amount of scanning and 1231 * If we've done a decent amount of scanning and
1226 * the reclaim ratio is low, start doing writepage 1232 * the reclaim ratio is low, start doing writepage
@@ -1385,18 +1391,22 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1385 1391
1386 /* For pass = 0 we don't shrink the active list */ 1392 /* For pass = 0 we don't shrink the active list */
1387 if (pass > 0) { 1393 if (pass > 0) {
1388 zone->nr_scan_active += (zone->nr_active >> prio) + 1; 1394 zone->nr_scan_active +=
1395 (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
1389 if (zone->nr_scan_active >= nr_pages || pass > 3) { 1396 if (zone->nr_scan_active >= nr_pages || pass > 3) {
1390 zone->nr_scan_active = 0; 1397 zone->nr_scan_active = 0;
1391 nr_to_scan = min(nr_pages, zone->nr_active); 1398 nr_to_scan = min(nr_pages,
1399 zone_page_state(zone, NR_ACTIVE));
1392 shrink_active_list(nr_to_scan, zone, sc, prio); 1400 shrink_active_list(nr_to_scan, zone, sc, prio);
1393 } 1401 }
1394 } 1402 }
1395 1403
1396 zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; 1404 zone->nr_scan_inactive +=
1405 (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
1397 if (zone->nr_scan_inactive >= nr_pages || pass > 3) { 1406 if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
1398 zone->nr_scan_inactive = 0; 1407 zone->nr_scan_inactive = 0;
1399 nr_to_scan = min(nr_pages, zone->nr_inactive); 1408 nr_to_scan = min(nr_pages,
1409 zone_page_state(zone, NR_INACTIVE));
1400 ret += shrink_inactive_list(nr_to_scan, zone, sc); 1410 ret += shrink_inactive_list(nr_to_scan, zone, sc);
1401 if (ret >= nr_pages) 1411 if (ret >= nr_pages)
1402 return ret; 1412 return ret;
@@ -1408,12 +1418,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1408 1418
1409static unsigned long count_lru_pages(void) 1419static unsigned long count_lru_pages(void)
1410{ 1420{
1411 struct zone *zone; 1421 return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
1412 unsigned long ret = 0;
1413
1414 for_each_zone(zone)
1415 ret += zone->nr_active + zone->nr_inactive;
1416 return ret;
1417} 1422}
1418 1423
1419/* 1424/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dc005a0c96ae..6c488d6ac425 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,39 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15 15
16void __get_zone_counts(unsigned long *active, unsigned long *inactive,
17 unsigned long *free, struct pglist_data *pgdat)
18{
19 struct zone *zones = pgdat->node_zones;
20 int i;
21
22 *active = 0;
23 *inactive = 0;
24 *free = 0;
25 for (i = 0; i < MAX_NR_ZONES; i++) {
26 *active += zones[i].nr_active;
27 *inactive += zones[i].nr_inactive;
28 *free += zones[i].free_pages;
29 }
30}
31
32void get_zone_counts(unsigned long *active,
33 unsigned long *inactive, unsigned long *free)
34{
35 struct pglist_data *pgdat;
36
37 *active = 0;
38 *inactive = 0;
39 *free = 0;
40 for_each_online_pgdat(pgdat) {
41 unsigned long l, m, n;
42 __get_zone_counts(&l, &m, &n, pgdat);
43 *active += l;
44 *inactive += m;
45 *free += n;
46 }
47}
48
49#ifdef CONFIG_VM_EVENT_COUNTERS 16#ifdef CONFIG_VM_EVENT_COUNTERS
50DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 17DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
51EXPORT_PER_CPU_SYMBOL(vm_event_states); 18EXPORT_PER_CPU_SYMBOL(vm_event_states);
@@ -239,7 +206,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
239 * in between and therefore the atomicity vs. interrupt cannot be exploited 206 * in between and therefore the atomicity vs. interrupt cannot be exploited
240 * in a useful way here. 207 * in a useful way here.
241 */ 208 */
242static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 209void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
243{ 210{
244 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 211 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
245 s8 *p = pcp->vm_stat_diff + item; 212 s8 *p = pcp->vm_stat_diff + item;
@@ -260,9 +227,8 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
260} 227}
261EXPORT_SYMBOL(__inc_zone_page_state); 228EXPORT_SYMBOL(__inc_zone_page_state);
262 229
263void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 230void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
264{ 231{
265 struct zone *zone = page_zone(page);
266 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 232 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
267 s8 *p = pcp->vm_stat_diff + item; 233 s8 *p = pcp->vm_stat_diff + item;
268 234
@@ -275,6 +241,11 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
275 *p = overstep; 241 *p = overstep;
276 } 242 }
277} 243}
244
245void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
246{
247 __dec_zone_state(page_zone(page), item);
248}
278EXPORT_SYMBOL(__dec_zone_page_state); 249EXPORT_SYMBOL(__dec_zone_page_state);
279 250
280void inc_zone_state(struct zone *zone, enum zone_stat_item item) 251void inc_zone_state(struct zone *zone, enum zone_stat_item item)
@@ -437,6 +408,12 @@ const struct seq_operations fragmentation_op = {
437 .show = frag_show, 408 .show = frag_show,
438}; 409};
439 410
411#ifdef CONFIG_ZONE_DMA
412#define TEXT_FOR_DMA(xx) xx "_dma",
413#else
414#define TEXT_FOR_DMA(xx)
415#endif
416
440#ifdef CONFIG_ZONE_DMA32 417#ifdef CONFIG_ZONE_DMA32
441#define TEXT_FOR_DMA32(xx) xx "_dma32", 418#define TEXT_FOR_DMA32(xx) xx "_dma32",
442#else 419#else
@@ -449,19 +426,22 @@ const struct seq_operations fragmentation_op = {
449#define TEXT_FOR_HIGHMEM(xx) 426#define TEXT_FOR_HIGHMEM(xx)
450#endif 427#endif
451 428
452#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ 429#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
453 TEXT_FOR_HIGHMEM(xx) 430 TEXT_FOR_HIGHMEM(xx)
454 431
455static const char * const vmstat_text[] = { 432static const char * const vmstat_text[] = {
456 /* Zoned VM counters */ 433 /* Zoned VM counters */
434 "nr_free_pages",
435 "nr_active",
436 "nr_inactive",
457 "nr_anon_pages", 437 "nr_anon_pages",
458 "nr_mapped", 438 "nr_mapped",
459 "nr_file_pages", 439 "nr_file_pages",
440 "nr_dirty",
441 "nr_writeback",
460 "nr_slab_reclaimable", 442 "nr_slab_reclaimable",
461 "nr_slab_unreclaimable", 443 "nr_slab_unreclaimable",
462 "nr_page_table_pages", 444 "nr_page_table_pages",
463 "nr_dirty",
464 "nr_writeback",
465 "nr_unstable", 445 "nr_unstable",
466 "nr_bounce", 446 "nr_bounce",
467 "nr_vmscan_write", 447 "nr_vmscan_write",
@@ -529,17 +509,13 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
529 "\n min %lu" 509 "\n min %lu"
530 "\n low %lu" 510 "\n low %lu"
531 "\n high %lu" 511 "\n high %lu"
532 "\n active %lu"
533 "\n inactive %lu"
534 "\n scanned %lu (a: %lu i: %lu)" 512 "\n scanned %lu (a: %lu i: %lu)"
535 "\n spanned %lu" 513 "\n spanned %lu"
536 "\n present %lu", 514 "\n present %lu",
537 zone->free_pages, 515 zone_page_state(zone, NR_FREE_PAGES),
538 zone->pages_min, 516 zone->pages_min,
539 zone->pages_low, 517 zone->pages_low,
540 zone->pages_high, 518 zone->pages_high,
541 zone->nr_active,
542 zone->nr_inactive,
543 zone->pages_scanned, 519 zone->pages_scanned,
544 zone->nr_scan_active, zone->nr_scan_inactive, 520 zone->nr_scan_active, zone->nr_scan_inactive,
545 zone->spanned_pages, 521 zone->spanned_pages,
@@ -563,12 +539,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
563 539
564 pageset = zone_pcp(zone, i); 540 pageset = zone_pcp(zone, i);
565 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { 541 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
566 if (pageset->pcp[j].count)
567 break;
568 }
569 if (j == ARRAY_SIZE(pageset->pcp))
570 continue;
571 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
572 seq_printf(m, 542 seq_printf(m,
573 "\n cpu: %i pcp: %i" 543 "\n cpu: %i pcp: %i"
574 "\n count: %i" 544 "\n count: %i"