aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/allocpercpu.c2
-rw-r--r--mm/backing-dev.c12
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/filemap.c5
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/memory.c106
-rw-r--r--mm/memory_hotplug.c86
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/migrate.c12
-rw-r--r--mm/mmap.c12
-rw-r--r--mm/mprotect.c21
-rw-r--r--mm/nommu.c21
-rw-r--r--mm/page_alloc.c46
-rw-r--r--mm/pagewalk.c42
-rw-r--r--mm/pdflush.c4
-rw-r--r--mm/slab.c5
-rw-r--r--mm/slob.c5
-rw-r--r--mm/slub.c19
-rw-r--r--mm/sparse-vmemmap.c2
-rw-r--r--mm/swap.c4
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c2
22 files changed, 269 insertions, 153 deletions
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index f4026bae6eed..05f2b4009ccc 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/mm/allocpercpu.c 2 * linux/mm/allocpercpu.c
3 * 3 *
4 * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com> 4 * Separated from slab.c August 11, 2006 Christoph Lameter
5 */ 5 */
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/module.h> 7#include <linux/module.h>
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7c4f9e097095..f2e574dbc300 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -172,30 +172,22 @@ postcore_initcall(bdi_class_init);
172int bdi_register(struct backing_dev_info *bdi, struct device *parent, 172int bdi_register(struct backing_dev_info *bdi, struct device *parent,
173 const char *fmt, ...) 173 const char *fmt, ...)
174{ 174{
175 char *name;
176 va_list args; 175 va_list args;
177 int ret = 0; 176 int ret = 0;
178 struct device *dev; 177 struct device *dev;
179 178
180 va_start(args, fmt); 179 va_start(args, fmt);
181 name = kvasprintf(GFP_KERNEL, fmt, args); 180 dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
182 va_end(args); 181 va_end(args);
183
184 if (!name)
185 return -ENOMEM;
186
187 dev = device_create(bdi_class, parent, MKDEV(0, 0), name);
188 if (IS_ERR(dev)) { 182 if (IS_ERR(dev)) {
189 ret = PTR_ERR(dev); 183 ret = PTR_ERR(dev);
190 goto exit; 184 goto exit;
191 } 185 }
192 186
193 bdi->dev = dev; 187 bdi->dev = dev;
194 dev_set_drvdata(bdi->dev, bdi); 188 bdi_debug_register(bdi, dev_name(dev));
195 bdi_debug_register(bdi, name);
196 189
197exit: 190exit:
198 kfree(name);
199 return ret; 191 return ret;
200} 192}
201EXPORT_SYMBOL(bdi_register); 193EXPORT_SYMBOL(bdi_register);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b9..8d9f60e06f62 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
442 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); 442 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
443} 443}
444 444
445void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 445int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
446 unsigned long size, int flags) 446 unsigned long size, int flags)
447{ 447{
448 int ret; 448 int ret;
449 449
450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
451 if (ret < 0) 451 if (ret < 0)
452 return; 452 return -ENOMEM;
453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
454
455 return 0;
454} 456}
455 457
456void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 458void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
diff --git a/mm/filemap.c b/mm/filemap.c
index 2dead9adf8b7..1e6a7d34874f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1461,6 +1461,11 @@ page_not_uptodate:
1461 */ 1461 */
1462 ClearPageError(page); 1462 ClearPageError(page);
1463 error = mapping->a_ops->readpage(file, page); 1463 error = mapping->a_ops->readpage(file, page);
1464 if (!error) {
1465 wait_on_page_locked(page);
1466 if (!PageUptodate(page))
1467 error = -EIO;
1468 }
1464 page_cache_release(page); 1469 page_cache_release(page);
1465 1470
1466 if (!error || error == AOP_TRUNCATED_PAGE) 1471 if (!error || error == AOP_TRUNCATED_PAGE)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bbf953eeb58b..ab171274ef21 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -785,7 +785,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
785 continue; 785 continue;
786 786
787 spin_lock(&dst->page_table_lock); 787 spin_lock(&dst->page_table_lock);
788 spin_lock(&src->page_table_lock); 788 spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
789 if (!huge_pte_none(huge_ptep_get(src_pte))) { 789 if (!huge_pte_none(huge_ptep_get(src_pte))) {
790 if (cow) 790 if (cow)
791 huge_ptep_set_wrprotect(src, addr, src_pte); 791 huge_ptep_set_wrprotect(src, addr, src_pte);
diff --git a/mm/memory.c b/mm/memory.c
index 48c122d42ed7..2302d228fe04 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -311,6 +311,21 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
311 if (!new) 311 if (!new)
312 return -ENOMEM; 312 return -ENOMEM;
313 313
314 /*
315 * Ensure all pte setup (eg. pte page lock and page clearing) are
316 * visible before the pte is made visible to other CPUs by being
317 * put into page tables.
318 *
319 * The other side of the story is the pointer chasing in the page
320 * table walking code (when walking the page table without locking;
321 * ie. most of the time). Fortunately, these data accesses consist
322 * of a chain of data-dependent loads, meaning most CPUs (alpha
323 * being the notable exception) will already guarantee loads are
324 * seen in-order. See the alpha page table accessors for the
325 * smp_read_barrier_depends() barriers in page table walking code.
326 */
327 smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
328
314 spin_lock(&mm->page_table_lock); 329 spin_lock(&mm->page_table_lock);
315 if (!pmd_present(*pmd)) { /* Has another populated it ? */ 330 if (!pmd_present(*pmd)) { /* Has another populated it ? */
316 mm->nr_ptes++; 331 mm->nr_ptes++;
@@ -329,6 +344,8 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
329 if (!new) 344 if (!new)
330 return -ENOMEM; 345 return -ENOMEM;
331 346
347 smp_wmb(); /* See comment in __pte_alloc */
348
332 spin_lock(&init_mm.page_table_lock); 349 spin_lock(&init_mm.page_table_lock);
333 if (!pmd_present(*pmd)) { /* Has another populated it ? */ 350 if (!pmd_present(*pmd)) { /* Has another populated it ? */
334 pmd_populate_kernel(&init_mm, pmd, new); 351 pmd_populate_kernel(&init_mm, pmd, new);
@@ -982,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
982 goto no_page_table; 999 goto no_page_table;
983 1000
984 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 1001 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
985 if (!ptep)
986 goto out;
987 1002
988 pte = *ptep; 1003 pte = *ptep;
989 if (!pte_present(pte)) 1004 if (!pte_present(pte))
990 goto unlock; 1005 goto no_page;
991 if ((flags & FOLL_WRITE) && !pte_write(pte)) 1006 if ((flags & FOLL_WRITE) && !pte_write(pte))
992 goto unlock; 1007 goto unlock;
993 page = vm_normal_page(vma, address, pte); 1008 page = vm_normal_page(vma, address, pte);
994 if (unlikely(!page)) 1009 if (unlikely(!page))
995 goto unlock; 1010 goto bad_page;
996 1011
997 if (flags & FOLL_GET) 1012 if (flags & FOLL_GET)
998 get_page(page); 1013 get_page(page);
@@ -1007,6 +1022,15 @@ unlock:
1007out: 1022out:
1008 return page; 1023 return page;
1009 1024
1025bad_page:
1026 pte_unmap_unlock(ptep, ptl);
1027 return ERR_PTR(-EFAULT);
1028
1029no_page:
1030 pte_unmap_unlock(ptep, ptl);
1031 if (!pte_none(pte))
1032 return page;
1033 /* Fall through to ZERO_PAGE handling */
1010no_page_table: 1034no_page_table:
1011 /* 1035 /*
1012 * When core dumping an enormous anonymous area that nobody 1036 * When core dumping an enormous anonymous area that nobody
@@ -1021,6 +1045,26 @@ no_page_table:
1021 return page; 1045 return page;
1022} 1046}
1023 1047
1048/* Can we do the FOLL_ANON optimization? */
1049static inline int use_zero_page(struct vm_area_struct *vma)
1050{
1051 /*
1052 * We don't want to optimize FOLL_ANON for make_pages_present()
1053 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
1054 * we want to get the page from the page tables to make sure
1055 * that we serialize and update with any other user of that
1056 * mapping.
1057 */
1058 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
1059 return 0;
1060 /*
1061 * And if we have a fault or a nopfn routine, it's not an
1062 * anonymous region.
1063 */
1064 return !vma->vm_ops ||
1065 (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
1066}
1067
1024int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1068int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1025 unsigned long start, int len, int write, int force, 1069 unsigned long start, int len, int write, int force,
1026 struct page **pages, struct vm_area_struct **vmas) 1070 struct page **pages, struct vm_area_struct **vmas)
@@ -1095,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1095 foll_flags = FOLL_TOUCH; 1139 foll_flags = FOLL_TOUCH;
1096 if (pages) 1140 if (pages)
1097 foll_flags |= FOLL_GET; 1141 foll_flags |= FOLL_GET;
1098 if (!write && !(vma->vm_flags & VM_LOCKED) && 1142 if (!write && use_zero_page(vma))
1099 (!vma->vm_ops || !vma->vm_ops->fault))
1100 foll_flags |= FOLL_ANON; 1143 foll_flags |= FOLL_ANON;
1101 1144
1102 do { 1145 do {
@@ -1108,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1108 * be processed until returning to user space. 1151 * be processed until returning to user space.
1109 */ 1152 */
1110 if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) 1153 if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE)))
1111 return -ENOMEM; 1154 return i ? i : -ENOMEM;
1112 1155
1113 if (write) 1156 if (write)
1114 foll_flags |= FOLL_WRITE; 1157 foll_flags |= FOLL_WRITE;
@@ -1142,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1142 1185
1143 cond_resched(); 1186 cond_resched();
1144 } 1187 }
1188 if (IS_ERR(page))
1189 return i ? i : PTR_ERR(page);
1145 if (pages) { 1190 if (pages) {
1146 pages[i] = page; 1191 pages[i] = page;
1147 1192
@@ -1652,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1652 struct page *dirty_page = NULL; 1697 struct page *dirty_page = NULL;
1653 1698
1654 old_page = vm_normal_page(vma, address, orig_pte); 1699 old_page = vm_normal_page(vma, address, orig_pte);
1655 if (!old_page) 1700 if (!old_page) {
1701 /*
1702 * VM_MIXEDMAP !pfn_valid() case
1703 *
1704 * We should not cow pages in a shared writeable mapping.
1705 * Just mark the pages writable as we can't do any dirty
1706 * accounting on raw pfn maps.
1707 */
1708 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1709 (VM_WRITE|VM_SHARED))
1710 goto reuse;
1656 goto gotten; 1711 goto gotten;
1712 }
1657 1713
1658 /* 1714 /*
1659 * Take out anonymous pages first, anonymous shared vmas are 1715 * Take out anonymous pages first, anonymous shared vmas are
@@ -1706,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1706 } 1762 }
1707 1763
1708 if (reuse) { 1764 if (reuse) {
1765reuse:
1709 flush_cache_page(vma, address, pte_pfn(orig_pte)); 1766 flush_cache_page(vma, address, pte_pfn(orig_pte));
1710 entry = pte_mkyoung(orig_pte); 1767 entry = pte_mkyoung(orig_pte);
1711 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1768 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1740,7 +1797,6 @@ gotten:
1740 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1797 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1741 if (likely(pte_same(*page_table, orig_pte))) { 1798 if (likely(pte_same(*page_table, orig_pte))) {
1742 if (old_page) { 1799 if (old_page) {
1743 page_remove_rmap(old_page, vma);
1744 if (!PageAnon(old_page)) { 1800 if (!PageAnon(old_page)) {
1745 dec_mm_counter(mm, file_rss); 1801 dec_mm_counter(mm, file_rss);
1746 inc_mm_counter(mm, anon_rss); 1802 inc_mm_counter(mm, anon_rss);
@@ -1762,6 +1818,32 @@ gotten:
1762 lru_cache_add_active(new_page); 1818 lru_cache_add_active(new_page);
1763 page_add_new_anon_rmap(new_page, vma, address); 1819 page_add_new_anon_rmap(new_page, vma, address);
1764 1820
1821 if (old_page) {
1822 /*
1823 * Only after switching the pte to the new page may
1824 * we remove the mapcount here. Otherwise another
1825 * process may come and find the rmap count decremented
1826 * before the pte is switched to the new page, and
1827 * "reuse" the old page writing into it while our pte
1828 * here still points into it and can be read by other
1829 * threads.
1830 *
1831 * The critical issue is to order this
1832 * page_remove_rmap with the ptp_clear_flush above.
1833 * Those stores are ordered by (if nothing else,)
1834 * the barrier present in the atomic_add_negative
1835 * in page_remove_rmap.
1836 *
1837 * Then the TLB flush in ptep_clear_flush ensures that
1838 * no process can access the old page before the
1839 * decremented mapcount is visible. And the old page
1840 * cannot be reused until after the decremented
1841 * mapcount is visible. So transitively, TLBs to
1842 * old page will be flushed before it can be reused.
1843 */
1844 page_remove_rmap(old_page, vma);
1845 }
1846
1765 /* Free the old page.. */ 1847 /* Free the old page.. */
1766 new_page = old_page; 1848 new_page = old_page;
1767 ret |= VM_FAULT_WRITE; 1849 ret |= VM_FAULT_WRITE;
@@ -2278,8 +2360,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2278 vmf.flags = flags; 2360 vmf.flags = flags;
2279 vmf.page = NULL; 2361 vmf.page = NULL;
2280 2362
2281 BUG_ON(vma->vm_flags & VM_PFNMAP);
2282
2283 ret = vma->vm_ops->fault(vma, &vmf); 2363 ret = vma->vm_ops->fault(vma, &vmf);
2284 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) 2364 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
2285 return ret; 2365 return ret;
@@ -2619,6 +2699,8 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
2619 if (!new) 2699 if (!new)
2620 return -ENOMEM; 2700 return -ENOMEM;
2621 2701
2702 smp_wmb(); /* See comment in __pte_alloc */
2703
2622 spin_lock(&mm->page_table_lock); 2704 spin_lock(&mm->page_table_lock);
2623 if (pgd_present(*pgd)) /* Another has populated it */ 2705 if (pgd_present(*pgd)) /* Another has populated it */
2624 pud_free(mm, new); 2706 pud_free(mm, new);
@@ -2640,6 +2722,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
2640 if (!new) 2722 if (!new)
2641 return -ENOMEM; 2723 return -ENOMEM;
2642 2724
2725 smp_wmb(); /* See comment in __pte_alloc */
2726
2643 spin_lock(&mm->page_table_lock); 2727 spin_lock(&mm->page_table_lock);
2644#ifndef __ARCH_HAS_4LEVEL_HACK 2728#ifndef __ARCH_HAS_4LEVEL_HACK
2645 if (pud_present(*pud)) /* Another has populated it */ 2729 if (pud_present(*pud)) /* Another has populated it */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b17dca7249f8..833f854eabe5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -159,21 +159,58 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
159} 159}
160#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ 160#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
161 161
162static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
163 unsigned long end_pfn)
164{
165 unsigned long old_zone_end_pfn;
166
167 zone_span_writelock(zone);
168
169 old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
170 if (start_pfn < zone->zone_start_pfn)
171 zone->zone_start_pfn = start_pfn;
172
173 zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
174 zone->zone_start_pfn;
175
176 zone_span_writeunlock(zone);
177}
178
179static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
180 unsigned long end_pfn)
181{
182 unsigned long old_pgdat_end_pfn =
183 pgdat->node_start_pfn + pgdat->node_spanned_pages;
184
185 if (start_pfn < pgdat->node_start_pfn)
186 pgdat->node_start_pfn = start_pfn;
187
188 pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
189 pgdat->node_start_pfn;
190}
191
162static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) 192static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
163{ 193{
164 struct pglist_data *pgdat = zone->zone_pgdat; 194 struct pglist_data *pgdat = zone->zone_pgdat;
165 int nr_pages = PAGES_PER_SECTION; 195 int nr_pages = PAGES_PER_SECTION;
166 int nid = pgdat->node_id; 196 int nid = pgdat->node_id;
167 int zone_type; 197 int zone_type;
198 unsigned long flags;
168 199
169 zone_type = zone - pgdat->node_zones; 200 zone_type = zone - pgdat->node_zones;
170 if (!zone->wait_table) { 201 if (!zone->wait_table) {
171 int ret = 0; 202 int ret;
203
172 ret = init_currently_empty_zone(zone, phys_start_pfn, 204 ret = init_currently_empty_zone(zone, phys_start_pfn,
173 nr_pages, MEMMAP_HOTPLUG); 205 nr_pages, MEMMAP_HOTPLUG);
174 if (ret < 0) 206 if (ret)
175 return ret; 207 return ret;
176 } 208 }
209 pgdat_resize_lock(zone->zone_pgdat, &flags);
210 grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
211 grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
212 phys_start_pfn + nr_pages);
213 pgdat_resize_unlock(zone->zone_pgdat, &flags);
177 memmap_init_zone(nr_pages, nid, zone_type, 214 memmap_init_zone(nr_pages, nid, zone_type,
178 phys_start_pfn, MEMMAP_HOTPLUG); 215 phys_start_pfn, MEMMAP_HOTPLUG);
179 return 0; 216 return 0;
@@ -299,36 +336,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
299} 336}
300EXPORT_SYMBOL_GPL(__remove_pages); 337EXPORT_SYMBOL_GPL(__remove_pages);
301 338
302static void grow_zone_span(struct zone *zone,
303 unsigned long start_pfn, unsigned long end_pfn)
304{
305 unsigned long old_zone_end_pfn;
306
307 zone_span_writelock(zone);
308
309 old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
310 if (start_pfn < zone->zone_start_pfn)
311 zone->zone_start_pfn = start_pfn;
312
313 zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
314 zone->zone_start_pfn;
315
316 zone_span_writeunlock(zone);
317}
318
319static void grow_pgdat_span(struct pglist_data *pgdat,
320 unsigned long start_pfn, unsigned long end_pfn)
321{
322 unsigned long old_pgdat_end_pfn =
323 pgdat->node_start_pfn + pgdat->node_spanned_pages;
324
325 if (start_pfn < pgdat->node_start_pfn)
326 pgdat->node_start_pfn = start_pfn;
327
328 pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
329 pgdat->node_start_pfn;
330}
331
332void online_page(struct page *page) 339void online_page(struct page *page)
333{ 340{
334 totalram_pages++; 341 totalram_pages++;
@@ -367,7 +374,6 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
367 374
368int online_pages(unsigned long pfn, unsigned long nr_pages) 375int online_pages(unsigned long pfn, unsigned long nr_pages)
369{ 376{
370 unsigned long flags;
371 unsigned long onlined_pages = 0; 377 unsigned long onlined_pages = 0;
372 struct zone *zone; 378 struct zone *zone;
373 int need_zonelists_rebuild = 0; 379 int need_zonelists_rebuild = 0;
@@ -395,11 +401,6 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
395 * memory_block->state_mutex. 401 * memory_block->state_mutex.
396 */ 402 */
397 zone = page_zone(pfn_to_page(pfn)); 403 zone = page_zone(pfn_to_page(pfn));
398 pgdat_resize_lock(zone->zone_pgdat, &flags);
399 grow_zone_span(zone, pfn, pfn + nr_pages);
400 grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
401 pgdat_resize_unlock(zone->zone_pgdat, &flags);
402
403 /* 404 /*
404 * If this zone is not populated, then it is not in zonelist. 405 * If this zone is not populated, then it is not in zonelist.
405 * This means the page allocator ignores this zone. 406 * This means the page allocator ignores this zone.
@@ -408,8 +409,15 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
408 if (!populated_zone(zone)) 409 if (!populated_zone(zone))
409 need_zonelists_rebuild = 1; 410 need_zonelists_rebuild = 1;
410 411
411 walk_memory_resource(pfn, nr_pages, &onlined_pages, 412 ret = walk_memory_resource(pfn, nr_pages, &onlined_pages,
412 online_pages_range); 413 online_pages_range);
414 if (ret) {
415 printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
416 nr_pages, pfn);
417 memory_notify(MEM_CANCEL_ONLINE, &arg);
418 return ret;
419 }
420
413 zone->present_pages += onlined_pages; 421 zone->present_pages += onlined_pages;
414 zone->zone_pgdat->node_present_pages += onlined_pages; 422 zone->zone_pgdat->node_present_pages += onlined_pages;
415 423
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a37a5034f63d..c94e58b192c3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
729 } else { 729 } else {
730 *policy = pol == &default_policy ? MPOL_DEFAULT : 730 *policy = pol == &default_policy ? MPOL_DEFAULT :
731 pol->mode; 731 pol->mode;
732 *policy |= pol->flags; 732 /*
733 * Internal mempolicy flags must be masked off before exposing
734 * the policy to userspace.
735 */
736 *policy |= (pol->flags & MPOL_MODE_FLAGS);
733 } 737 }
734 738
735 if (vma) { 739 if (vma) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 449d77d409f5..55bd355d170d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -9,7 +9,7 @@
9 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> 9 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
10 * Hirokazu Takahashi <taka@valinux.co.jp> 10 * Hirokazu Takahashi <taka@valinux.co.jp>
11 * Dave Hansen <haveblue@us.ibm.com> 11 * Dave Hansen <haveblue@us.ibm.com>
12 * Christoph Lameter <clameter@sgi.com> 12 * Christoph Lameter
13 */ 13 */
14 14
15#include <linux/migrate.h> 15#include <linux/migrate.h>
@@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
865 goto set_status; 865 goto set_status;
866 866
867 page = follow_page(vma, pp->addr, FOLL_GET); 867 page = follow_page(vma, pp->addr, FOLL_GET);
868
869 err = PTR_ERR(page);
870 if (IS_ERR(page))
871 goto set_status;
872
868 err = -ENOENT; 873 err = -ENOENT;
869 if (!page) 874 if (!page)
870 goto set_status; 875 goto set_status;
@@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
928 goto set_status; 933 goto set_status;
929 934
930 page = follow_page(vma, pm->addr, 0); 935 page = follow_page(vma, pm->addr, 0);
936
937 err = PTR_ERR(page);
938 if (IS_ERR(page))
939 goto set_status;
940
931 err = -ENOENT; 941 err = -ENOENT;
932 /* Use PageReserved to check for zero page */ 942 /* Use PageReserved to check for zero page */
933 if (!page || PageReserved(page)) 943 if (!page || PageReserved(page))
diff --git a/mm/mmap.c b/mm/mmap.c
index fac66337da2a..3354fdd83d4b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
80int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 80int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
81int sysctl_overcommit_ratio = 50; /* default is 50% */ 81int sysctl_overcommit_ratio = 50; /* default is 50% */
82int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 82int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
83atomic_t vm_committed_space = ATOMIC_INIT(0); 83atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
84 84
85/* 85/*
86 * Check that a process has enough memory to allocate a new virtual 86 * Check that a process has enough memory to allocate a new virtual
@@ -177,7 +177,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
177 * cast `allowed' as a signed long because vm_committed_space 177 * cast `allowed' as a signed long because vm_committed_space
178 * sometimes has a negative value 178 * sometimes has a negative value
179 */ 179 */
180 if (atomic_read(&vm_committed_space) < (long)allowed) 180 if (atomic_long_read(&vm_committed_space) < (long)allowed)
181 return 0; 181 return 0;
182error: 182error:
183 vm_unacct_memory(pages); 183 vm_unacct_memory(pages);
@@ -245,10 +245,16 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
245 unsigned long rlim, retval; 245 unsigned long rlim, retval;
246 unsigned long newbrk, oldbrk; 246 unsigned long newbrk, oldbrk;
247 struct mm_struct *mm = current->mm; 247 struct mm_struct *mm = current->mm;
248 unsigned long min_brk;
248 249
249 down_write(&mm->mmap_sem); 250 down_write(&mm->mmap_sem);
250 251
251 if (brk < mm->start_brk) 252#ifdef CONFIG_COMPAT_BRK
253 min_brk = mm->end_code;
254#else
255 min_brk = mm->start_brk;
256#endif
257 if (brk < min_brk)
252 goto out; 258 goto out;
253 259
254 /* 260 /*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4de546899dc1..acfe7c8d72fc 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -26,6 +26,13 @@
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
28 28
29#ifndef pgprot_modify
30static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
31{
32 return newprot;
33}
34#endif
35
29static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, 36static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
30 unsigned long addr, unsigned long end, pgprot_t newprot, 37 unsigned long addr, unsigned long end, pgprot_t newprot,
31 int dirty_accountable) 38 int dirty_accountable)
@@ -40,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
40 if (pte_present(oldpte)) { 47 if (pte_present(oldpte)) {
41 pte_t ptent; 48 pte_t ptent;
42 49
43 /* Avoid an SMP race with hardware updated dirty/clean 50 ptent = ptep_modify_prot_start(mm, addr, pte);
44 * bits by wiping the pte and then setting the new pte
45 * into place.
46 */
47 ptent = ptep_get_and_clear(mm, addr, pte);
48 ptent = pte_modify(ptent, newprot); 51 ptent = pte_modify(ptent, newprot);
52
49 /* 53 /*
50 * Avoid taking write faults for pages we know to be 54 * Avoid taking write faults for pages we know to be
51 * dirty. 55 * dirty.
52 */ 56 */
53 if (dirty_accountable && pte_dirty(ptent)) 57 if (dirty_accountable && pte_dirty(ptent))
54 ptent = pte_mkwrite(ptent); 58 ptent = pte_mkwrite(ptent);
55 set_pte_at(mm, addr, pte, ptent); 59
60 ptep_modify_prot_commit(mm, addr, pte, ptent);
56#ifdef CONFIG_MIGRATION 61#ifdef CONFIG_MIGRATION
57 } else if (!pte_file(oldpte)) { 62 } else if (!pte_file(oldpte)) {
58 swp_entry_t entry = pte_to_swp_entry(oldpte); 63 swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -192,7 +197,9 @@ success:
192 * held in write mode. 197 * held in write mode.
193 */ 198 */
194 vma->vm_flags = newflags; 199 vma->vm_flags = newflags;
195 vma->vm_page_prot = vm_get_page_prot(newflags); 200 vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
201 vm_get_page_prot(newflags));
202
196 if (vma_wants_writenotify(vma)) { 203 if (vma_wants_writenotify(vma)) {
197 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); 204 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
198 dirty_accountable = 1; 205 dirty_accountable = 1;
diff --git a/mm/nommu.c b/mm/nommu.c
index ef8c62cec697..4462b6a3fcb9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -39,7 +39,7 @@ struct page *mem_map;
39unsigned long max_mapnr; 39unsigned long max_mapnr;
40unsigned long num_physpages; 40unsigned long num_physpages;
41unsigned long askedalloc, realalloc; 41unsigned long askedalloc, realalloc;
42atomic_t vm_committed_space = ATOMIC_INIT(0); 42atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
43int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 43int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
44int sysctl_overcommit_ratio = 50; /* default is 50% */ 44int sysctl_overcommit_ratio = 50; /* default is 50% */
45int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 45int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
@@ -109,16 +109,23 @@ unsigned int kobjsize(const void *objp)
109 * If the object we have should not have ksize performed on it, 109 * If the object we have should not have ksize performed on it,
110 * return size of 0 110 * return size of 0
111 */ 111 */
112 if (!objp || (unsigned long)objp >= memory_end || !((page = virt_to_page(objp)))) 112 if (!objp || !virt_addr_valid(objp))
113 return 0; 113 return 0;
114 114
115 page = virt_to_head_page(objp);
116
117 /*
118 * If the allocator sets PageSlab, we know the pointer came from
119 * kmalloc().
120 */
115 if (PageSlab(page)) 121 if (PageSlab(page))
116 return ksize(objp); 122 return ksize(objp);
117 123
118 BUG_ON(page->index < 0); 124 /*
119 BUG_ON(page->index >= MAX_ORDER); 125 * The ksize() function is only guaranteed to work for pointers
120 126 * returned by kmalloc(). So handle arbitrary pointers here.
121 return (PAGE_SIZE << page->index); 127 */
128 return PAGE_SIZE << compound_order(page);
122} 129}
123 130
124/* 131/*
@@ -1410,7 +1417,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
1410 * cast `allowed' as a signed long because vm_committed_space 1417 * cast `allowed' as a signed long because vm_committed_space
1411 * sometimes has a negative value 1418 * sometimes has a negative value
1412 */ 1419 */
1413 if (atomic_read(&vm_committed_space) < (long)allowed) 1420 if (atomic_long_read(&vm_committed_space) < (long)allowed)
1414 return 0; 1421 return 0;
1415error: 1422error:
1416 vm_unacct_memory(pages); 1423 vm_unacct_memory(pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bdd5c432c426..f32fae3121f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -237,16 +237,7 @@ static void bad_page(struct page *page)
237 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" 237 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
238 KERN_EMERG "Backtrace:\n"); 238 KERN_EMERG "Backtrace:\n");
239 dump_stack(); 239 dump_stack();
240 page->flags &= ~(1 << PG_lru | 240 page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD;
241 1 << PG_private |
242 1 << PG_locked |
243 1 << PG_active |
244 1 << PG_dirty |
245 1 << PG_reclaim |
246 1 << PG_slab |
247 1 << PG_swapcache |
248 1 << PG_writeback |
249 1 << PG_buddy );
250 set_page_count(page, 0); 241 set_page_count(page, 0);
251 reset_page_mapcount(page); 242 reset_page_mapcount(page);
252 page->mapping = NULL; 243 page->mapping = NULL;
@@ -463,16 +454,7 @@ static inline int free_pages_check(struct page *page)
463 (page->mapping != NULL) | 454 (page->mapping != NULL) |
464 (page_get_page_cgroup(page) != NULL) | 455 (page_get_page_cgroup(page) != NULL) |
465 (page_count(page) != 0) | 456 (page_count(page) != 0) |
466 (page->flags & ( 457 (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
467 1 << PG_lru |
468 1 << PG_private |
469 1 << PG_locked |
470 1 << PG_active |
471 1 << PG_slab |
472 1 << PG_swapcache |
473 1 << PG_writeback |
474 1 << PG_reserved |
475 1 << PG_buddy ))))
476 bad_page(page); 458 bad_page(page);
477 if (PageDirty(page)) 459 if (PageDirty(page))
478 __ClearPageDirty(page); 460 __ClearPageDirty(page);
@@ -616,17 +598,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
616 (page->mapping != NULL) | 598 (page->mapping != NULL) |
617 (page_get_page_cgroup(page) != NULL) | 599 (page_get_page_cgroup(page) != NULL) |
618 (page_count(page) != 0) | 600 (page_count(page) != 0) |
619 (page->flags & ( 601 (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
620 1 << PG_lru |
621 1 << PG_private |
622 1 << PG_locked |
623 1 << PG_active |
624 1 << PG_dirty |
625 1 << PG_slab |
626 1 << PG_swapcache |
627 1 << PG_writeback |
628 1 << PG_reserved |
629 1 << PG_buddy ))))
630 bad_page(page); 602 bad_page(page);
631 603
632 /* 604 /*
@@ -1396,6 +1368,9 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
1396 1368
1397 (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask, 1369 (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
1398 &preferred_zone); 1370 &preferred_zone);
1371 if (!preferred_zone)
1372 return NULL;
1373
1399 classzone_idx = zone_idx(preferred_zone); 1374 classzone_idx = zone_idx(preferred_zone);
1400 1375
1401zonelist_scan: 1376zonelist_scan:
@@ -2353,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat)
2353static void build_zonelist_cache(pg_data_t *pgdat) 2328static void build_zonelist_cache(pg_data_t *pgdat)
2354{ 2329{
2355 pgdat->node_zonelists[0].zlcache_ptr = NULL; 2330 pgdat->node_zonelists[0].zlcache_ptr = NULL;
2356 pgdat->node_zonelists[1].zlcache_ptr = NULL;
2357} 2331}
2358 2332
2359#endif /* CONFIG_NUMA */ 2333#endif /* CONFIG_NUMA */
@@ -2804,7 +2778,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
2804 alloc_size = zone->wait_table_hash_nr_entries 2778 alloc_size = zone->wait_table_hash_nr_entries
2805 * sizeof(wait_queue_head_t); 2779 * sizeof(wait_queue_head_t);
2806 2780
2807 if (system_state == SYSTEM_BOOTING) { 2781 if (!slab_is_available()) {
2808 zone->wait_table = (wait_queue_head_t *) 2782 zone->wait_table = (wait_queue_head_t *)
2809 alloc_bootmem_node(pgdat, alloc_size); 2783 alloc_bootmem_node(pgdat, alloc_size);
2810 } else { 2784 } else {
@@ -2862,8 +2836,6 @@ __meminit int init_currently_empty_zone(struct zone *zone,
2862 2836
2863 zone->zone_start_pfn = zone_start_pfn; 2837 zone->zone_start_pfn = zone_start_pfn;
2864 2838
2865 memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
2866
2867 zone_init_free_lists(zone); 2839 zone_init_free_lists(zone);
2868 2840
2869 return 0; 2841 return 0;
@@ -3380,7 +3352,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3380 * is used by this zone for memmap. This affects the watermark 3352 * is used by this zone for memmap. This affects the watermark
3381 * and per-cpu initialisations 3353 * and per-cpu initialisations
3382 */ 3354 */
3383 memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; 3355 memmap_pages =
3356 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
3384 if (realsize >= memmap_pages) { 3357 if (realsize >= memmap_pages) {
3385 realsize -= memmap_pages; 3358 realsize -= memmap_pages;
3386 printk(KERN_DEBUG 3359 printk(KERN_DEBUG
@@ -3433,6 +3406,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3433 ret = init_currently_empty_zone(zone, zone_start_pfn, 3406 ret = init_currently_empty_zone(zone, zone_start_pfn,
3434 size, MEMMAP_EARLY); 3407 size, MEMMAP_EARLY);
3435 BUG_ON(ret); 3408 BUG_ON(ret);
3409 memmap_init(size, nid, j, zone_start_pfn);
3436 zone_start_pfn += size; 3410 zone_start_pfn += size;
3437 } 3411 }
3438} 3412}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 0afd2387e507..d5878bed7841 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -3,14 +3,14 @@
3#include <linux/sched.h> 3#include <linux/sched.h>
4 4
5static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 5static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
6 const struct mm_walk *walk, void *private) 6 struct mm_walk *walk)
7{ 7{
8 pte_t *pte; 8 pte_t *pte;
9 int err = 0; 9 int err = 0;
10 10
11 pte = pte_offset_map(pmd, addr); 11 pte = pte_offset_map(pmd, addr);
12 for (;;) { 12 for (;;) {
13 err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private); 13 err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
14 if (err) 14 if (err)
15 break; 15 break;
16 addr += PAGE_SIZE; 16 addr += PAGE_SIZE;
@@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
24} 24}
25 25
26static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, 26static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
27 const struct mm_walk *walk, void *private) 27 struct mm_walk *walk)
28{ 28{
29 pmd_t *pmd; 29 pmd_t *pmd;
30 unsigned long next; 30 unsigned long next;
@@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
35 next = pmd_addr_end(addr, end); 35 next = pmd_addr_end(addr, end);
36 if (pmd_none_or_clear_bad(pmd)) { 36 if (pmd_none_or_clear_bad(pmd)) {
37 if (walk->pte_hole) 37 if (walk->pte_hole)
38 err = walk->pte_hole(addr, next, private); 38 err = walk->pte_hole(addr, next, walk);
39 if (err) 39 if (err)
40 break; 40 break;
41 continue; 41 continue;
42 } 42 }
43 if (walk->pmd_entry) 43 if (walk->pmd_entry)
44 err = walk->pmd_entry(pmd, addr, next, private); 44 err = walk->pmd_entry(pmd, addr, next, walk);
45 if (!err && walk->pte_entry) 45 if (!err && walk->pte_entry)
46 err = walk_pte_range(pmd, addr, next, walk, private); 46 err = walk_pte_range(pmd, addr, next, walk);
47 if (err) 47 if (err)
48 break; 48 break;
49 } while (pmd++, addr = next, addr != end); 49 } while (pmd++, addr = next, addr != end);
@@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
52} 52}
53 53
54static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, 54static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
55 const struct mm_walk *walk, void *private) 55 struct mm_walk *walk)
56{ 56{
57 pud_t *pud; 57 pud_t *pud;
58 unsigned long next; 58 unsigned long next;
@@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
63 next = pud_addr_end(addr, end); 63 next = pud_addr_end(addr, end);
64 if (pud_none_or_clear_bad(pud)) { 64 if (pud_none_or_clear_bad(pud)) {
65 if (walk->pte_hole) 65 if (walk->pte_hole)
66 err = walk->pte_hole(addr, next, private); 66 err = walk->pte_hole(addr, next, walk);
67 if (err) 67 if (err)
68 break; 68 break;
69 continue; 69 continue;
70 } 70 }
71 if (walk->pud_entry) 71 if (walk->pud_entry)
72 err = walk->pud_entry(pud, addr, next, private); 72 err = walk->pud_entry(pud, addr, next, walk);
73 if (!err && (walk->pmd_entry || walk->pte_entry)) 73 if (!err && (walk->pmd_entry || walk->pte_entry))
74 err = walk_pmd_range(pud, addr, next, walk, private); 74 err = walk_pmd_range(pud, addr, next, walk);
75 if (err) 75 if (err)
76 break; 76 break;
77 } while (pud++, addr = next, addr != end); 77 } while (pud++, addr = next, addr != end);
@@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
85 * @addr: starting address 85 * @addr: starting address
86 * @end: ending address 86 * @end: ending address
87 * @walk: set of callbacks to invoke for each level of the tree 87 * @walk: set of callbacks to invoke for each level of the tree
88 * @private: private data passed to the callback function
89 * 88 *
90 * Recursively walk the page table for the memory area in a VMA, 89 * Recursively walk the page table for the memory area in a VMA,
91 * calling supplied callbacks. Callbacks are called in-order (first 90 * calling supplied callbacks. Callbacks are called in-order (first
92 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, 91 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
93 * etc.). If lower-level callbacks are omitted, walking depth is reduced. 92 * etc.). If lower-level callbacks are omitted, walking depth is reduced.
94 * 93 *
95 * Each callback receives an entry pointer, the start and end of the 94 * Each callback receives an entry pointer and the start and end of the
96 * associated range, and a caller-supplied private data pointer. 95 * associated range, and a copy of the original mm_walk for access to
96 * the ->private or ->mm fields.
97 * 97 *
98 * No locks are taken, but the bottom level iterator will map PTE 98 * No locks are taken, but the bottom level iterator will map PTE
99 * directories from highmem if necessary. 99 * directories from highmem if necessary.
@@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
101 * If any callback returns a non-zero value, the walk is aborted and 101 * If any callback returns a non-zero value, the walk is aborted and
102 * the return value is propagated back to the caller. Otherwise 0 is returned. 102 * the return value is propagated back to the caller. Otherwise 0 is returned.
103 */ 103 */
104int walk_page_range(const struct mm_struct *mm, 104int walk_page_range(unsigned long addr, unsigned long end,
105 unsigned long addr, unsigned long end, 105 struct mm_walk *walk)
106 const struct mm_walk *walk, void *private)
107{ 106{
108 pgd_t *pgd; 107 pgd_t *pgd;
109 unsigned long next; 108 unsigned long next;
@@ -112,21 +111,24 @@ int walk_page_range(const struct mm_struct *mm,
112 if (addr >= end) 111 if (addr >= end)
113 return err; 112 return err;
114 113
115 pgd = pgd_offset(mm, addr); 114 if (!walk->mm)
115 return -EINVAL;
116
117 pgd = pgd_offset(walk->mm, addr);
116 do { 118 do {
117 next = pgd_addr_end(addr, end); 119 next = pgd_addr_end(addr, end);
118 if (pgd_none_or_clear_bad(pgd)) { 120 if (pgd_none_or_clear_bad(pgd)) {
119 if (walk->pte_hole) 121 if (walk->pte_hole)
120 err = walk->pte_hole(addr, next, private); 122 err = walk->pte_hole(addr, next, walk);
121 if (err) 123 if (err)
122 break; 124 break;
123 continue; 125 continue;
124 } 126 }
125 if (walk->pgd_entry) 127 if (walk->pgd_entry)
126 err = walk->pgd_entry(pgd, addr, next, private); 128 err = walk->pgd_entry(pgd, addr, next, walk);
127 if (!err && 129 if (!err &&
128 (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) 130 (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
129 err = walk_pud_range(pgd, addr, next, walk, private); 131 err = walk_pud_range(pgd, addr, next, walk);
130 if (err) 132 if (err)
131 break; 133 break;
132 } while (pgd++, addr = next, addr != end); 134 } while (pgd++, addr = next, addr != end);
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 1c96cfc9e040..9d834aa4b979 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -207,7 +207,6 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
207 207
208 spin_lock_irqsave(&pdflush_lock, flags); 208 spin_lock_irqsave(&pdflush_lock, flags);
209 if (list_empty(&pdflush_list)) { 209 if (list_empty(&pdflush_list)) {
210 spin_unlock_irqrestore(&pdflush_lock, flags);
211 ret = -1; 210 ret = -1;
212 } else { 211 } else {
213 struct pdflush_work *pdf; 212 struct pdflush_work *pdf;
@@ -219,8 +218,9 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
219 pdf->fn = fn; 218 pdf->fn = fn;
220 pdf->arg0 = arg0; 219 pdf->arg0 = arg0;
221 wake_up_process(pdf->who); 220 wake_up_process(pdf->who);
222 spin_unlock_irqrestore(&pdflush_lock, flags);
223 } 221 }
222 spin_unlock_irqrestore(&pdflush_lock, flags);
223
224 return ret; 224 return ret;
225} 225}
226 226
diff --git a/mm/slab.c b/mm/slab.c
index 06236e4ddc1b..046607f05f3e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ retry:
3263 3263
3264 if (cpuset_zone_allowed_hardwall(zone, flags) && 3264 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3265 cache->nodelists[nid] && 3265 cache->nodelists[nid] &&
3266 cache->nodelists[nid]->free_objects) 3266 cache->nodelists[nid]->free_objects) {
3267 obj = ____cache_alloc_node(cache, 3267 obj = ____cache_alloc_node(cache,
3268 flags | GFP_THISNODE, nid); 3268 flags | GFP_THISNODE, nid);
3269 if (obj)
3270 break;
3271 }
3269 } 3272 }
3270 3273
3271 if (!obj) { 3274 if (!obj) {
diff --git a/mm/slob.c b/mm/slob.c
index 6038cbadf796..a3ad6671adf1 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -469,8 +469,9 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
469 return ZERO_SIZE_PTR; 469 return ZERO_SIZE_PTR;
470 470
471 m = slob_alloc(size + align, gfp, align, node); 471 m = slob_alloc(size + align, gfp, align, node);
472 if (m) 472 if (!m)
473 *m = size; 473 return NULL;
474 *m = size;
474 return (void *)m + align; 475 return (void *)m + align;
475 } else { 476 } else {
476 void *ret; 477 void *ret;
diff --git a/mm/slub.c b/mm/slub.c
index a505a828ef41..1a427c0ae83b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5,7 +5,7 @@
5 * The allocator synchronizes using per slab locks and only 5 * The allocator synchronizes using per slab locks and only
6 * uses a centralized lock to manage a pool of partial slabs. 6 * uses a centralized lock to manage a pool of partial slabs.
7 * 7 *
8 * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> 8 * (C) 2007 SGI, Christoph Lameter
9 */ 9 */
10 10
11#include <linux/mm.h> 11#include <linux/mm.h>
@@ -2726,9 +2726,10 @@ size_t ksize(const void *object)
2726 2726
2727 page = virt_to_head_page(object); 2727 page = virt_to_head_page(object);
2728 2728
2729 if (unlikely(!PageSlab(page))) 2729 if (unlikely(!PageSlab(page))) {
2730 WARN_ON(!PageCompound(page));
2730 return PAGE_SIZE << compound_order(page); 2731 return PAGE_SIZE << compound_order(page);
2731 2732 }
2732 s = page->slab; 2733 s = page->slab;
2733 2734
2734#ifdef CONFIG_SLUB_DEBUG 2735#ifdef CONFIG_SLUB_DEBUG
@@ -2994,8 +2995,6 @@ void __init kmem_cache_init(void)
2994 create_kmalloc_cache(&kmalloc_caches[1], 2995 create_kmalloc_cache(&kmalloc_caches[1],
2995 "kmalloc-96", 96, GFP_KERNEL); 2996 "kmalloc-96", 96, GFP_KERNEL);
2996 caches++; 2997 caches++;
2997 }
2998 if (KMALLOC_MIN_SIZE <= 128) {
2999 create_kmalloc_cache(&kmalloc_caches[2], 2998 create_kmalloc_cache(&kmalloc_caches[2],
3000 "kmalloc-192", 192, GFP_KERNEL); 2999 "kmalloc-192", 192, GFP_KERNEL);
3001 caches++; 3000 caches++;
@@ -3025,6 +3024,16 @@ void __init kmem_cache_init(void)
3025 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) 3024 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
3026 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; 3025 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
3027 3026
3027 if (KMALLOC_MIN_SIZE == 128) {
3028 /*
3029 * The 192 byte sized cache is not used if the alignment
3030 * is 128 byte. Redirect kmalloc to use the 256 byte cache
3031 * instead.
3032 */
3033 for (i = 128 + 8; i <= 192; i += 8)
3034 size_index[(i - 1) / 8] = 8;
3035 }
3036
3028 slab_state = UP; 3037 slab_state = UP;
3029 3038
3030 /* Provide the correct kmalloc names now that the caches are up */ 3039 /* Provide the correct kmalloc names now that the caches are up */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 99c4f36eb8a3..a91b5f8fcaf6 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Virtual Memory Map support 2 * Virtual Memory Map support
3 * 3 *
4 * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>. 4 * (C) 2007 sgi. Christoph Lameter.
5 * 5 *
6 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, 6 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
7 * virt_to_page, page_address() to be implemented as a base offset 7 * virt_to_page, page_address() to be implemented as a base offset
diff --git a/mm/swap.c b/mm/swap.c
index 91e194445a5e..45c9f25a8a3b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -503,7 +503,7 @@ void vm_acct_memory(long pages)
503 local = &__get_cpu_var(committed_space); 503 local = &__get_cpu_var(committed_space);
504 *local += pages; 504 *local += pages;
505 if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { 505 if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
506 atomic_add(*local, &vm_committed_space); 506 atomic_long_add(*local, &vm_committed_space);
507 *local = 0; 507 *local = 0;
508 } 508 }
509 preempt_enable(); 509 preempt_enable();
@@ -520,7 +520,7 @@ static int cpu_swap_callback(struct notifier_block *nfb,
520 520
521 committed = &per_cpu(committed_space, (long)hcpu); 521 committed = &per_cpu(committed_space, (long)hcpu);
522 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 522 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
523 atomic_add(*committed, &vm_committed_space); 523 atomic_long_add(*committed, &vm_committed_space);
524 *committed = 0; 524 *committed = 0;
525 drain_cpu_pagevecs((long)hcpu); 525 drain_cpu_pagevecs((long)hcpu);
526 } 526 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9a29901ad3b3..967d30ccd92b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1307,7 +1307,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1307 struct scan_control *sc) 1307 struct scan_control *sc)
1308{ 1308{
1309 int priority; 1309 int priority;
1310 int ret = 0; 1310 unsigned long ret = 0;
1311 unsigned long total_scanned = 0; 1311 unsigned long total_scanned = 0;
1312 unsigned long nr_reclaimed = 0; 1312 unsigned long nr_reclaimed = 0;
1313 struct reclaim_state *reclaim_state = current->reclaim_state; 1313 struct reclaim_state *reclaim_state = current->reclaim_state;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1a32130b958c..db9eabb2c5b3 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -41,7 +41,9 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
41*/ 41*/
42void all_vm_events(unsigned long *ret) 42void all_vm_events(unsigned long *ret)
43{ 43{
44 get_online_cpus();
44 sum_vm_events(ret, &cpu_online_map); 45 sum_vm_events(ret, &cpu_online_map);
46 put_online_cpus();
45} 47}
46EXPORT_SYMBOL_GPL(all_vm_events); 48EXPORT_SYMBOL_GPL(all_vm_events);
47 49