aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c8
-rw-r--r--mm/memory.c14
-rw-r--r--mm/mincore.c183
-rw-r--r--mm/oom_kill.c9
-rw-r--r--mm/page-writeback.c77
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/slab.c29
-rw-r--r--mm/slob.c27
-rw-r--r--mm/truncate.c31
-rw-r--r--mm/vmscan.c14
14 files changed, 241 insertions, 200 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 8d667617f558..45b3553865cf 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -189,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
189 /* Nuke the page table entry. */ 189 /* Nuke the page table entry. */
190 flush_cache_page(vma, address, pte_pfn(*pte)); 190 flush_cache_page(vma, address, pte_pfn(*pte));
191 pteval = ptep_clear_flush(vma, address, pte); 191 pteval = ptep_clear_flush(vma, address, pte);
192 page_remove_rmap(page); 192 page_remove_rmap(page, vma);
193 dec_mm_counter(mm, file_rss); 193 dec_mm_counter(mm, file_rss);
194 BUG_ON(pte_dirty(pteval)); 194 BUG_ON(pte_dirty(pteval));
195 pte_unmap_unlock(pte, ptl); 195 pte_unmap_unlock(pte, ptl);
diff --git a/mm/fremap.c b/mm/fremap.c
index b77a002c3352..4e3f53dd5fd4 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -33,7 +33,7 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
33 if (page) { 33 if (page) {
34 if (pte_dirty(pte)) 34 if (pte_dirty(pte))
35 set_page_dirty(page); 35 set_page_dirty(page);
36 page_remove_rmap(page); 36 page_remove_rmap(page, vma);
37 page_cache_release(page); 37 page_cache_release(page);
38 } 38 }
39 } else { 39 } else {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0ccc7f230252..cb362f761f17 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -44,14 +44,14 @@ static void clear_huge_page(struct page *page, unsigned long addr)
44} 44}
45 45
46static void copy_huge_page(struct page *dst, struct page *src, 46static void copy_huge_page(struct page *dst, struct page *src,
47 unsigned long addr) 47 unsigned long addr, struct vm_area_struct *vma)
48{ 48{
49 int i; 49 int i;
50 50
51 might_sleep(); 51 might_sleep();
52 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { 52 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
53 cond_resched(); 53 cond_resched();
54 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE); 54 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
55 } 55 }
56} 56}
57 57
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
73 73
74 for (z = zonelist->zones; *z; z++) { 74 for (z = zonelist->zones; *z; z++) {
75 nid = zone_to_nid(*z); 75 nid = zone_to_nid(*z);
76 if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && 76 if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
77 !list_empty(&hugepage_freelists[nid])) 77 !list_empty(&hugepage_freelists[nid]))
78 break; 78 break;
79 } 79 }
@@ -442,7 +442,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
442 } 442 }
443 443
444 spin_unlock(&mm->page_table_lock); 444 spin_unlock(&mm->page_table_lock);
445 copy_huge_page(new_page, old_page, address); 445 copy_huge_page(new_page, old_page, address, vma);
446 spin_lock(&mm->page_table_lock); 446 spin_lock(&mm->page_table_lock);
447 447
448 ptep = huge_pte_offset(mm, address & HPAGE_MASK); 448 ptep = huge_pte_offset(mm, address & HPAGE_MASK);
diff --git a/mm/memory.c b/mm/memory.c
index bf6100236e62..563792f4f687 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -681,7 +681,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
681 mark_page_accessed(page); 681 mark_page_accessed(page);
682 file_rss--; 682 file_rss--;
683 } 683 }
684 page_remove_rmap(page); 684 page_remove_rmap(page, vma);
685 tlb_remove_page(tlb, page); 685 tlb_remove_page(tlb, page);
686 continue; 686 continue;
687 } 687 }
@@ -1441,7 +1441,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1441 return pte; 1441 return pte;
1442} 1442}
1443 1443
1444static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va) 1444static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
1445{ 1445{
1446 /* 1446 /*
1447 * If the source page was a PFN mapping, we don't have 1447 * If the source page was a PFN mapping, we don't have
@@ -1464,9 +1464,9 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
1464 kunmap_atomic(kaddr, KM_USER0); 1464 kunmap_atomic(kaddr, KM_USER0);
1465 flush_dcache_page(dst); 1465 flush_dcache_page(dst);
1466 return; 1466 return;
1467 1467
1468 } 1468 }
1469 copy_user_highpage(dst, src, va); 1469 copy_user_highpage(dst, src, va, vma);
1470} 1470}
1471 1471
1472/* 1472/*
@@ -1577,7 +1577,7 @@ gotten:
1577 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); 1577 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1578 if (!new_page) 1578 if (!new_page)
1579 goto oom; 1579 goto oom;
1580 cow_user_page(new_page, old_page, address); 1580 cow_user_page(new_page, old_page, address, vma);
1581 } 1581 }
1582 1582
1583 /* 1583 /*
@@ -1586,7 +1586,7 @@ gotten:
1586 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1586 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1587 if (likely(pte_same(*page_table, orig_pte))) { 1587 if (likely(pte_same(*page_table, orig_pte))) {
1588 if (old_page) { 1588 if (old_page) {
1589 page_remove_rmap(old_page); 1589 page_remove_rmap(old_page, vma);
1590 if (!PageAnon(old_page)) { 1590 if (!PageAnon(old_page)) {
1591 dec_mm_counter(mm, file_rss); 1591 dec_mm_counter(mm, file_rss);
1592 inc_mm_counter(mm, anon_rss); 1592 inc_mm_counter(mm, anon_rss);
@@ -2200,7 +2200,7 @@ retry:
2200 page = alloc_page_vma(GFP_HIGHUSER, vma, address); 2200 page = alloc_page_vma(GFP_HIGHUSER, vma, address);
2201 if (!page) 2201 if (!page)
2202 goto oom; 2202 goto oom;
2203 copy_user_highpage(page, new_page, address); 2203 copy_user_highpage(page, new_page, address, vma);
2204 page_cache_release(new_page); 2204 page_cache_release(new_page);
2205 new_page = page; 2205 new_page = page;
2206 anon = 1; 2206 anon = 1;
diff --git a/mm/mincore.c b/mm/mincore.c
index 72890780c1c9..8aca6f7167bb 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/mm/mincore.c 2 * linux/mm/mincore.c
3 * 3 *
4 * Copyright (C) 1994-1999 Linus Torvalds 4 * Copyright (C) 1994-2006 Linus Torvalds
5 */ 5 */
6 6
7/* 7/*
@@ -38,46 +38,51 @@ static unsigned char mincore_page(struct vm_area_struct * vma,
38 return present; 38 return present;
39} 39}
40 40
41static long mincore_vma(struct vm_area_struct * vma, 41/*
42 unsigned long start, unsigned long end, unsigned char __user * vec) 42 * Do a chunk of "sys_mincore()". We've already checked
43 * all the arguments, we hold the mmap semaphore: we should
44 * just return the amount of info we're asked for.
45 */
46static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
43{ 47{
44 long error, i, remaining; 48 unsigned long i, nr, pgoff;
45 unsigned char * tmp; 49 struct vm_area_struct *vma = find_vma(current->mm, addr);
46
47 error = -ENOMEM;
48 if (!vma->vm_file)
49 return error;
50
51 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
52 if (end > vma->vm_end)
53 end = vma->vm_end;
54 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
55 50
56 error = -EAGAIN; 51 /*
57 tmp = (unsigned char *) __get_free_page(GFP_KERNEL); 52 * find_vma() didn't find anything above us, or we're
58 if (!tmp) 53 * in an unmapped hole in the address space: ENOMEM.
59 return error; 54 */
55 if (!vma || addr < vma->vm_start)
56 return -ENOMEM;
60 57
61 /* (end - start) is # of pages, and also # of bytes in "vec */ 58 /*
62 remaining = (end - start), 59 * Ok, got it. But check whether it's a segment we support
60 * mincore() on. Right now, we don't do any anonymous mappings.
61 *
62 * FIXME: This is just stupid. And returning ENOMEM is
63 * stupid too. We should just look at the page tables. But
64 * this is what we've traditionally done, so we'll just
65 * continue doing it.
66 */
67 if (!vma->vm_file)
68 return -ENOMEM;
63 69
64 error = 0; 70 /*
65 for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { 71 * Calculate how many pages there are left in the vma, and
66 int j = 0; 72 * what the pgoff is for our address.
67 long thispiece = (remaining < PAGE_SIZE) ? 73 */
68 remaining : PAGE_SIZE; 74 nr = (vma->vm_end - addr) >> PAGE_SHIFT;
75 if (nr > pages)
76 nr = pages;
69 77
70 while (j < thispiece) 78 pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
71 tmp[j++] = mincore_page(vma, start++); 79 pgoff += vma->vm_pgoff;
72 80
73 if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { 81 /* And then we just fill the sucker in.. */
74 error = -EFAULT; 82 for (i = 0 ; i < nr; i++, pgoff++)
75 break; 83 vec[i] = mincore_page(vma, pgoff);
76 }
77 }
78 84
79 free_page((unsigned long) tmp); 85 return nr;
80 return error;
81} 86}
82 87
83/* 88/*
@@ -107,82 +112,50 @@ static long mincore_vma(struct vm_area_struct * vma,
107asmlinkage long sys_mincore(unsigned long start, size_t len, 112asmlinkage long sys_mincore(unsigned long start, size_t len,
108 unsigned char __user * vec) 113 unsigned char __user * vec)
109{ 114{
110 int index = 0; 115 long retval;
111 unsigned long end, limit; 116 unsigned long pages;
112 struct vm_area_struct * vma; 117 unsigned char *tmp;
113 size_t max;
114 int unmapped_error = 0;
115 long error;
116
117 /* check the arguments */
118 if (start & ~PAGE_CACHE_MASK)
119 goto einval;
120
121 limit = TASK_SIZE;
122 if (start >= limit)
123 goto enomem;
124
125 if (!len)
126 return 0;
127
128 max = limit - start;
129 len = PAGE_CACHE_ALIGN(len);
130 if (len > max || !len)
131 goto enomem;
132 118
133 end = start + len; 119 /* Check the start address: needs to be page-aligned.. */
120 if (start & ~PAGE_CACHE_MASK)
121 return -EINVAL;
134 122
135 /* check the output buffer whilst holding the lock */ 123 /* ..and we need to be passed a valid user-space range */
136 error = -EFAULT; 124 if (!access_ok(VERIFY_READ, (void __user *) start, len))
137 down_read(&current->mm->mmap_sem); 125 return -ENOMEM;
138 126
139 if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT)) 127 /* This also avoids any overflows on PAGE_CACHE_ALIGN */
140 goto out; 128 pages = len >> PAGE_SHIFT;
129 pages += (len & ~PAGE_MASK) != 0;
141 130
142 /* 131 if (!access_ok(VERIFY_WRITE, vec, pages))
143 * If the interval [start,end) covers some unmapped address 132 return -EFAULT;
144 * ranges, just ignore them, but return -ENOMEM at the end.
145 */
146 error = 0;
147
148 vma = find_vma(current->mm, start);
149 while (vma) {
150 /* Here start < vma->vm_end. */
151 if (start < vma->vm_start) {
152 unmapped_error = -ENOMEM;
153 start = vma->vm_start;
154 }
155 133
156 /* Here vma->vm_start <= start < vma->vm_end. */ 134 tmp = (void *) __get_free_page(GFP_USER);
157 if (end <= vma->vm_end) { 135 if (!tmp)
158 if (start < end) { 136 return -EAGAIN;
159 error = mincore_vma(vma, start, end, 137
160 &vec[index]); 138 retval = 0;
161 if (error) 139 while (pages) {
162 goto out; 140 /*
163 } 141 * Do at most PAGE_SIZE entries per iteration, due to
164 error = unmapped_error; 142 * the temporary buffer size.
165 goto out; 143 */
144 down_read(&current->mm->mmap_sem);
145 retval = do_mincore(start, tmp, min(pages, PAGE_SIZE));
146 up_read(&current->mm->mmap_sem);
147
148 if (retval <= 0)
149 break;
150 if (copy_to_user(vec, tmp, retval)) {
151 retval = -EFAULT;
152 break;
166 } 153 }
167 154 pages -= retval;
168 /* Here vma->vm_start <= start < vma->vm_end < end. */ 155 vec += retval;
169 error = mincore_vma(vma, start, vma->vm_end, &vec[index]); 156 start += retval << PAGE_SHIFT;
170 if (error) 157 retval = 0;
171 goto out;
172 index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
173 start = vma->vm_end;
174 vma = vma->vm_next;
175 } 158 }
176 159 free_page((unsigned long) tmp);
177 /* we found a hole in the area queried if we arrive here */ 160 return retval;
178 error = -ENOMEM;
179
180out:
181 up_read(&current->mm->mmap_sem);
182 return error;
183
184einval:
185 return -EINVAL;
186enomem:
187 return -ENOMEM;
188} 161}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 223d9ccb7d64..6969cfb33901 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -174,10 +174,15 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
174{ 174{
175#ifdef CONFIG_NUMA 175#ifdef CONFIG_NUMA
176 struct zone **z; 176 struct zone **z;
177 nodemask_t nodes = node_online_map; 177 nodemask_t nodes;
178 int node;
179 /* node has memory ? */
180 for_each_online_node(node)
181 if (NODE_DATA(node)->node_present_pages)
182 node_set(node, nodes);
178 183
179 for (z = zonelist->zones; *z; z++) 184 for (z = zonelist->zones; *z; z++)
180 if (cpuset_zone_allowed(*z, gfp_mask)) 185 if (cpuset_zone_allowed_softwall(*z, gfp_mask))
181 node_clear(zone_to_nid(*z), nodes); 186 node_clear(zone_to_nid(*z), nodes);
182 else 187 else
183 return CONSTRAINT_CPUSET; 188 return CONSTRAINT_CPUSET;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 237107c1b084..1d2fc89ca56d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -845,38 +845,6 @@ int set_page_dirty_lock(struct page *page)
845EXPORT_SYMBOL(set_page_dirty_lock); 845EXPORT_SYMBOL(set_page_dirty_lock);
846 846
847/* 847/*
848 * Clear a page's dirty flag, while caring for dirty memory accounting.
849 * Returns true if the page was previously dirty.
850 */
851int test_clear_page_dirty(struct page *page)
852{
853 struct address_space *mapping = page_mapping(page);
854 unsigned long flags;
855
856 if (!mapping)
857 return TestClearPageDirty(page);
858
859 write_lock_irqsave(&mapping->tree_lock, flags);
860 if (TestClearPageDirty(page)) {
861 radix_tree_tag_clear(&mapping->page_tree,
862 page_index(page), PAGECACHE_TAG_DIRTY);
863 write_unlock_irqrestore(&mapping->tree_lock, flags);
864 /*
865 * We can continue to use `mapping' here because the
866 * page is locked, which pins the address_space
867 */
868 if (mapping_cap_account_dirty(mapping)) {
869 page_mkclean(page);
870 dec_zone_page_state(page, NR_FILE_DIRTY);
871 }
872 return 1;
873 }
874 write_unlock_irqrestore(&mapping->tree_lock, flags);
875 return 0;
876}
877EXPORT_SYMBOL(test_clear_page_dirty);
878
879/*
880 * Clear a page's dirty flag, while caring for dirty memory accounting. 848 * Clear a page's dirty flag, while caring for dirty memory accounting.
881 * Returns true if the page was previously dirty. 849 * Returns true if the page was previously dirty.
882 * 850 *
@@ -894,17 +862,46 @@ int clear_page_dirty_for_io(struct page *page)
894{ 862{
895 struct address_space *mapping = page_mapping(page); 863 struct address_space *mapping = page_mapping(page);
896 864
897 if (!mapping) 865 if (mapping && mapping_cap_account_dirty(mapping)) {
898 return TestClearPageDirty(page); 866 /*
899 867 * Yes, Virginia, this is indeed insane.
900 if (TestClearPageDirty(page)) { 868 *
901 if (mapping_cap_account_dirty(mapping)) { 869 * We use this sequence to make sure that
902 page_mkclean(page); 870 * (a) we account for dirty stats properly
871 * (b) we tell the low-level filesystem to
872 * mark the whole page dirty if it was
873 * dirty in a pagetable. Only to then
874 * (c) clean the page again and return 1 to
875 * cause the writeback.
876 *
877 * This way we avoid all nasty races with the
878 * dirty bit in multiple places and clearing
879 * them concurrently from different threads.
880 *
881 * Note! Normally the "set_page_dirty(page)"
882 * has no effect on the actual dirty bit - since
883 * that will already usually be set. But we
884 * need the side effects, and it can help us
885 * avoid races.
886 *
887 * We basically use the page "master dirty bit"
888 * as a serialization point for all the different
889 * threads doing their things.
890 *
891 * FIXME! We still have a race here: if somebody
892 * adds the page back to the page tables in
893 * between the "page_mkclean()" and the "TestClearPageDirty()",
894 * we might have it mapped without the dirty bit set.
895 */
896 if (page_mkclean(page))
897 set_page_dirty(page);
898 if (TestClearPageDirty(page)) {
903 dec_zone_page_state(page, NR_FILE_DIRTY); 899 dec_zone_page_state(page, NR_FILE_DIRTY);
900 return 1;
904 } 901 }
905 return 1; 902 return 0;
906 } 903 }
907 return 0; 904 return TestClearPageDirty(page);
908} 905}
909EXPORT_SYMBOL(clear_page_dirty_for_io); 906EXPORT_SYMBOL(clear_page_dirty_for_io);
910 907
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e6b17b2989e0..8c1a116875bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1162,7 +1162,7 @@ zonelist_scan:
1162 zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) 1162 zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
1163 break; 1163 break;
1164 if ((alloc_flags & ALLOC_CPUSET) && 1164 if ((alloc_flags & ALLOC_CPUSET) &&
1165 !cpuset_zone_allowed(zone, gfp_mask)) 1165 !cpuset_zone_allowed_softwall(zone, gfp_mask))
1166 goto try_next_zone; 1166 goto try_next_zone;
1167 1167
1168 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { 1168 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
diff --git a/mm/rmap.c b/mm/rmap.c
index d8a842a586db..669acb22b572 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,6 +47,7 @@
47#include <linux/rmap.h> 47#include <linux/rmap.h>
48#include <linux/rcupdate.h> 48#include <linux/rcupdate.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kallsyms.h>
50 51
51#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
52 53
@@ -432,7 +433,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
432{ 433{
433 struct mm_struct *mm = vma->vm_mm; 434 struct mm_struct *mm = vma->vm_mm;
434 unsigned long address; 435 unsigned long address;
435 pte_t *pte, entry; 436 pte_t *pte;
436 spinlock_t *ptl; 437 spinlock_t *ptl;
437 int ret = 0; 438 int ret = 0;
438 439
@@ -444,17 +445,18 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
444 if (!pte) 445 if (!pte)
445 goto out; 446 goto out;
446 447
447 if (!pte_dirty(*pte) && !pte_write(*pte)) 448 if (pte_dirty(*pte) || pte_write(*pte)) {
448 goto unlock; 449 pte_t entry;
449 450
450 entry = ptep_get_and_clear(mm, address, pte); 451 flush_cache_page(vma, address, pte_pfn(*pte));
451 entry = pte_mkclean(entry); 452 entry = ptep_clear_flush(vma, address, pte);
452 entry = pte_wrprotect(entry); 453 entry = pte_wrprotect(entry);
453 ptep_establish(vma, address, pte, entry); 454 entry = pte_mkclean(entry);
454 lazy_mmu_prot_update(entry); 455 set_pte_at(mm, address, pte, entry);
455 ret = 1; 456 lazy_mmu_prot_update(entry);
457 ret = 1;
458 }
456 459
457unlock:
458 pte_unmap_unlock(pte, ptl); 460 pte_unmap_unlock(pte, ptl);
459out: 461out:
460 return ret; 462 return ret;
@@ -489,6 +491,8 @@ int page_mkclean(struct page *page)
489 if (mapping) 491 if (mapping)
490 ret = page_mkclean_file(mapping, page); 492 ret = page_mkclean_file(mapping, page);
491 } 493 }
494 if (page_test_and_clear_dirty(page))
495 ret = 1;
492 496
493 return ret; 497 return ret;
494} 498}
@@ -567,14 +571,20 @@ void page_add_file_rmap(struct page *page)
567 * 571 *
568 * The caller needs to hold the pte lock. 572 * The caller needs to hold the pte lock.
569 */ 573 */
570void page_remove_rmap(struct page *page) 574void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
571{ 575{
572 if (atomic_add_negative(-1, &page->_mapcount)) { 576 if (atomic_add_negative(-1, &page->_mapcount)) {
573 if (unlikely(page_mapcount(page) < 0)) { 577 if (unlikely(page_mapcount(page) < 0)) {
574 printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); 578 printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
579 printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
575 printk (KERN_EMERG " page->flags = %lx\n", page->flags); 580 printk (KERN_EMERG " page->flags = %lx\n", page->flags);
576 printk (KERN_EMERG " page->count = %x\n", page_count(page)); 581 printk (KERN_EMERG " page->count = %x\n", page_count(page));
577 printk (KERN_EMERG " page->mapping = %p\n", page->mapping); 582 printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
583 print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
584 if (vma->vm_ops)
585 print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage);
586 if (vma->vm_file && vma->vm_file->f_op)
587 print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
578 BUG(); 588 BUG();
579 } 589 }
580 590
@@ -679,7 +689,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
679 dec_mm_counter(mm, file_rss); 689 dec_mm_counter(mm, file_rss);
680 690
681 691
682 page_remove_rmap(page); 692 page_remove_rmap(page, vma);
683 page_cache_release(page); 693 page_cache_release(page);
684 694
685out_unmap: 695out_unmap:
@@ -769,7 +779,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
769 if (pte_dirty(pteval)) 779 if (pte_dirty(pteval))
770 set_page_dirty(page); 780 set_page_dirty(page);
771 781
772 page_remove_rmap(page); 782 page_remove_rmap(page, vma);
773 page_cache_release(page); 783 page_cache_release(page);
774 dec_mm_counter(mm, file_rss); 784 dec_mm_counter(mm, file_rss);
775 (*mapcount)--; 785 (*mapcount)--;
diff --git a/mm/shmem.c b/mm/shmem.c
index 4bb28d218eb5..70da7a0981bf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -515,7 +515,12 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
515 size = SHMEM_NR_DIRECT; 515 size = SHMEM_NR_DIRECT;
516 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); 516 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
517 } 517 }
518 if (!topdir) 518
519 /*
520 * If there are no indirect blocks or we are punching a hole
521 * below indirect blocks, nothing to be done.
522 */
523 if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT)))
519 goto done2; 524 goto done2;
520 525
521 BUG_ON(limit <= SHMEM_NR_DIRECT); 526 BUG_ON(limit <= SHMEM_NR_DIRECT);
diff --git a/mm/slab.c b/mm/slab.c
index 2c655532f5ef..0d4e57431de4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -109,6 +109,7 @@
109#include <linux/mutex.h> 109#include <linux/mutex.h>
110#include <linux/fault-inject.h> 110#include <linux/fault-inject.h>
111#include <linux/rtmutex.h> 111#include <linux/rtmutex.h>
112#include <linux/reciprocal_div.h>
112 113
113#include <asm/cacheflush.h> 114#include <asm/cacheflush.h>
114#include <asm/tlbflush.h> 115#include <asm/tlbflush.h>
@@ -386,6 +387,7 @@ struct kmem_cache {
386 unsigned int shared; 387 unsigned int shared;
387 388
388 unsigned int buffer_size; 389 unsigned int buffer_size;
390 u32 reciprocal_buffer_size;
389/* 3) touched by every alloc & free from the backend */ 391/* 3) touched by every alloc & free from the backend */
390 struct kmem_list3 *nodelists[MAX_NUMNODES]; 392 struct kmem_list3 *nodelists[MAX_NUMNODES];
391 393
@@ -627,10 +629,17 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
627 return slab->s_mem + cache->buffer_size * idx; 629 return slab->s_mem + cache->buffer_size * idx;
628} 630}
629 631
630static inline unsigned int obj_to_index(struct kmem_cache *cache, 632/*
631 struct slab *slab, void *obj) 633 * We want to avoid an expensive divide : (offset / cache->buffer_size)
634 * Using the fact that buffer_size is a constant for a particular cache,
635 * we can replace (offset / cache->buffer_size) by
636 * reciprocal_divide(offset, cache->reciprocal_buffer_size)
637 */
638static inline unsigned int obj_to_index(const struct kmem_cache *cache,
639 const struct slab *slab, void *obj)
632{ 640{
633 return (unsigned)(obj - slab->s_mem) / cache->buffer_size; 641 u32 offset = (obj - slab->s_mem);
642 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
634} 643}
635 644
636/* 645/*
@@ -1427,6 +1436,8 @@ void __init kmem_cache_init(void)
1427 1436
1428 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, 1437 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1429 cache_line_size()); 1438 cache_line_size());
1439 cache_cache.reciprocal_buffer_size =
1440 reciprocal_value(cache_cache.buffer_size);
1430 1441
1431 for (order = 0; order < MAX_ORDER; order++) { 1442 for (order = 0; order < MAX_ORDER; order++) {
1432 cache_estimate(order, cache_cache.buffer_size, 1443 cache_estimate(order, cache_cache.buffer_size,
@@ -2313,6 +2324,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2313 if (flags & SLAB_CACHE_DMA) 2324 if (flags & SLAB_CACHE_DMA)
2314 cachep->gfpflags |= GFP_DMA; 2325 cachep->gfpflags |= GFP_DMA;
2315 cachep->buffer_size = size; 2326 cachep->buffer_size = size;
2327 cachep->reciprocal_buffer_size = reciprocal_value(size);
2316 2328
2317 if (flags & CFLGS_OFF_SLAB) { 2329 if (flags & CFLGS_OFF_SLAB) {
2318 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2330 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
@@ -3252,6 +3264,7 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3252 struct zone **z; 3264 struct zone **z;
3253 void *obj = NULL; 3265 void *obj = NULL;
3254 int nid; 3266 int nid;
3267 gfp_t local_flags = (flags & GFP_LEVEL_MASK);
3255 3268
3256retry: 3269retry:
3257 /* 3270 /*
@@ -3261,7 +3274,7 @@ retry:
3261 for (z = zonelist->zones; *z && !obj; z++) { 3274 for (z = zonelist->zones; *z && !obj; z++) {
3262 nid = zone_to_nid(*z); 3275 nid = zone_to_nid(*z);
3263 3276
3264 if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) && 3277 if (cpuset_zone_allowed_hardwall(*z, flags) &&
3265 cache->nodelists[nid] && 3278 cache->nodelists[nid] &&
3266 cache->nodelists[nid]->free_objects) 3279 cache->nodelists[nid]->free_objects)
3267 obj = ____cache_alloc_node(cache, 3280 obj = ____cache_alloc_node(cache,
@@ -3275,7 +3288,12 @@ retry:
3275 * We may trigger various forms of reclaim on the allowed 3288 * We may trigger various forms of reclaim on the allowed
3276 * set and go into memory reserves if necessary. 3289 * set and go into memory reserves if necessary.
3277 */ 3290 */
3291 if (local_flags & __GFP_WAIT)
3292 local_irq_enable();
3293 kmem_flagcheck(cache, flags);
3278 obj = kmem_getpages(cache, flags, -1); 3294 obj = kmem_getpages(cache, flags, -1);
3295 if (local_flags & __GFP_WAIT)
3296 local_irq_disable();
3279 if (obj) { 3297 if (obj) {
3280 /* 3298 /*
3281 * Insert into the appropriate per node queues 3299 * Insert into the appropriate per node queues
@@ -3535,7 +3553,7 @@ EXPORT_SYMBOL(kmem_cache_zalloc);
3535 * 3553 *
3536 * Currently only used for dentry validation. 3554 * Currently only used for dentry validation.
3537 */ 3555 */
3538int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) 3556int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3539{ 3557{
3540 unsigned long addr = (unsigned long)ptr; 3558 unsigned long addr = (unsigned long)ptr;
3541 unsigned long min_addr = PAGE_OFFSET; 3559 unsigned long min_addr = PAGE_OFFSET;
@@ -3569,6 +3587,7 @@ out:
3569 * @cachep: The cache to allocate from. 3587 * @cachep: The cache to allocate from.
3570 * @flags: See kmalloc(). 3588 * @flags: See kmalloc().
3571 * @nodeid: node number of the target node. 3589 * @nodeid: node number of the target node.
3590 * @caller: return address of caller, used for debug information
3572 * 3591 *
3573 * Identical to kmem_cache_alloc but it will allocate memory on the given 3592 * Identical to kmem_cache_alloc but it will allocate memory on the given
3574 * node, which can improve the performance for cpu bound structures. 3593 * node, which can improve the performance for cpu bound structures.
diff --git a/mm/slob.c b/mm/slob.c
index 542394184a58..5adc29cb58dd 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -60,6 +60,8 @@ static DEFINE_SPINLOCK(slob_lock);
60static DEFINE_SPINLOCK(block_lock); 60static DEFINE_SPINLOCK(block_lock);
61 61
62static void slob_free(void *b, int size); 62static void slob_free(void *b, int size);
63static void slob_timer_cbk(void);
64
63 65
64static void *slob_alloc(size_t size, gfp_t gfp, int align) 66static void *slob_alloc(size_t size, gfp_t gfp, int align)
65{ 67{
@@ -157,7 +159,7 @@ static int fastcall find_order(int size)
157 return order; 159 return order;
158} 160}
159 161
160void *kmalloc(size_t size, gfp_t gfp) 162void *__kmalloc(size_t size, gfp_t gfp)
161{ 163{
162 slob_t *m; 164 slob_t *m;
163 bigblock_t *bb; 165 bigblock_t *bb;
@@ -186,8 +188,7 @@ void *kmalloc(size_t size, gfp_t gfp)
186 slob_free(bb, sizeof(bigblock_t)); 188 slob_free(bb, sizeof(bigblock_t));
187 return 0; 189 return 0;
188} 190}
189 191EXPORT_SYMBOL(__kmalloc);
190EXPORT_SYMBOL(kmalloc);
191 192
192void kfree(const void *block) 193void kfree(const void *block)
193{ 194{
@@ -327,9 +328,25 @@ const char *kmem_cache_name(struct kmem_cache *c)
327EXPORT_SYMBOL(kmem_cache_name); 328EXPORT_SYMBOL(kmem_cache_name);
328 329
329static struct timer_list slob_timer = TIMER_INITIALIZER( 330static struct timer_list slob_timer = TIMER_INITIALIZER(
330 (void (*)(unsigned long))kmem_cache_init, 0, 0); 331 (void (*)(unsigned long))slob_timer_cbk, 0, 0);
332
333int kmem_cache_shrink(struct kmem_cache *d)
334{
335 return 0;
336}
337EXPORT_SYMBOL(kmem_cache_shrink);
338
339int kmem_ptr_validate(struct kmem_cache *a, const void *b)
340{
341 return 0;
342}
343
344void __init kmem_cache_init(void)
345{
346 slob_timer_cbk();
347}
331 348
332void kmem_cache_init(void) 349static void slob_timer_cbk(void)
333{ 350{
334 void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1); 351 void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1);
335 352
diff --git a/mm/truncate.c b/mm/truncate.c
index 9bfb8e853860..ecdfdcc50522 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -51,6 +51,26 @@ static inline void truncate_partial_page(struct page *page, unsigned partial)
51 do_invalidatepage(page, partial); 51 do_invalidatepage(page, partial);
52} 52}
53 53
54void cancel_dirty_page(struct page *page, unsigned int account_size)
55{
56 /* If we're cancelling the page, it had better not be mapped any more */
57 if (page_mapped(page)) {
58 static unsigned int warncount;
59
60 WARN_ON(++warncount < 5);
61 }
62
63 if (TestClearPageDirty(page)) {
64 struct address_space *mapping = page->mapping;
65 if (mapping && mapping_cap_account_dirty(mapping)) {
66 dec_zone_page_state(page, NR_FILE_DIRTY);
67 if (account_size)
68 task_io_account_cancelled_write(account_size);
69 }
70 }
71}
72EXPORT_SYMBOL(cancel_dirty_page);
73
54/* 74/*
55 * If truncate cannot remove the fs-private metadata from the page, the page 75 * If truncate cannot remove the fs-private metadata from the page, the page
56 * becomes anonymous. It will be left on the LRU and may even be mapped into 76 * becomes anonymous. It will be left on the LRU and may even be mapped into
@@ -67,11 +87,11 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
67 if (page->mapping != mapping) 87 if (page->mapping != mapping)
68 return; 88 return;
69 89
90 cancel_dirty_page(page, PAGE_CACHE_SIZE);
91
70 if (PagePrivate(page)) 92 if (PagePrivate(page))
71 do_invalidatepage(page, 0); 93 do_invalidatepage(page, 0);
72 94
73 if (test_clear_page_dirty(page))
74 task_io_account_cancelled_write(PAGE_CACHE_SIZE);
75 ClearPageUptodate(page); 95 ClearPageUptodate(page);
76 ClearPageMappedToDisk(page); 96 ClearPageMappedToDisk(page);
77 remove_from_page_cache(page); 97 remove_from_page_cache(page);
@@ -350,7 +370,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
350 for (i = 0; !ret && i < pagevec_count(&pvec); i++) { 370 for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
351 struct page *page = pvec.pages[i]; 371 struct page *page = pvec.pages[i];
352 pgoff_t page_index; 372 pgoff_t page_index;
353 int was_dirty;
354 373
355 lock_page(page); 374 lock_page(page);
356 if (page->mapping != mapping) { 375 if (page->mapping != mapping) {
@@ -386,12 +405,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
386 PAGE_CACHE_SIZE, 0); 405 PAGE_CACHE_SIZE, 0);
387 } 406 }
388 } 407 }
389 was_dirty = test_clear_page_dirty(page); 408 if (!invalidate_complete_page2(mapping, page))
390 if (!invalidate_complete_page2(mapping, page)) {
391 if (was_dirty)
392 set_page_dirty(page);
393 ret = -EIO; 409 ret = -EIO;
394 }
395 unlock_page(page); 410 unlock_page(page);
396 } 411 }
397 pagevec_release(&pvec); 412 pagevec_release(&pvec);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 093f5fe6dd77..40fea4918390 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -692,7 +692,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
692 __count_vm_events(KSWAPD_STEAL, nr_freed); 692 __count_vm_events(KSWAPD_STEAL, nr_freed);
693 } else 693 } else
694 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); 694 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
695 __count_vm_events(PGACTIVATE, nr_freed); 695 __count_zone_vm_events(PGSTEAL, zone, nr_freed);
696 696
697 if (nr_taken == 0) 697 if (nr_taken == 0)
698 goto done; 698 goto done;
@@ -984,7 +984,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
984 if (!populated_zone(zone)) 984 if (!populated_zone(zone))
985 continue; 985 continue;
986 986
987 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 987 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
988 continue; 988 continue;
989 989
990 note_zone_scanning_priority(zone, priority); 990 note_zone_scanning_priority(zone, priority);
@@ -1034,7 +1034,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1034 for (i = 0; zones[i] != NULL; i++) { 1034 for (i = 0; zones[i] != NULL; i++) {
1035 struct zone *zone = zones[i]; 1035 struct zone *zone = zones[i];
1036 1036
1037 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1037 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1038 continue; 1038 continue;
1039 1039
1040 lru_pages += zone->nr_active + zone->nr_inactive; 1040 lru_pages += zone->nr_active + zone->nr_inactive;
@@ -1089,7 +1089,7 @@ out:
1089 for (i = 0; zones[i] != 0; i++) { 1089 for (i = 0; zones[i] != 0; i++) {
1090 struct zone *zone = zones[i]; 1090 struct zone *zone = zones[i];
1091 1091
1092 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1092 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1093 continue; 1093 continue;
1094 1094
1095 zone->prev_priority = priority; 1095 zone->prev_priority = priority;
@@ -1354,7 +1354,7 @@ void wakeup_kswapd(struct zone *zone, int order)
1354 return; 1354 return;
1355 if (pgdat->kswapd_max_order < order) 1355 if (pgdat->kswapd_max_order < order)
1356 pgdat->kswapd_max_order = order; 1356 pgdat->kswapd_max_order = order;
1357 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1357 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1358 return; 1358 return;
1359 if (!waitqueue_active(&pgdat->kswapd_wait)) 1359 if (!waitqueue_active(&pgdat->kswapd_wait))
1360 return; 1360 return;
@@ -1369,8 +1369,8 @@ void wakeup_kswapd(struct zone *zone, int order)
1369 * 1369 *
1370 * For pass > 3 we also try to shrink the LRU lists that contain a few pages 1370 * For pass > 3 we also try to shrink the LRU lists that contain a few pages
1371 */ 1371 */
1372static unsigned long shrink_all_zones(unsigned long nr_pages, int pass, 1372static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1373 int prio, struct scan_control *sc) 1373 int pass, struct scan_control *sc)
1374{ 1374{
1375 struct zone *zone; 1375 struct zone *zone;
1376 unsigned long nr_to_scan, ret = 0; 1376 unsigned long nr_to_scan, ret = 0;