diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/allocpercpu.c | 2 | ||||
-rw-r--r-- | mm/backing-dev.c | 12 | ||||
-rw-r--r-- | mm/bootmem.c | 6 | ||||
-rw-r--r-- | mm/filemap.c | 5 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 106 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 86 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/migrate.c | 12 | ||||
-rw-r--r-- | mm/mmap.c | 12 | ||||
-rw-r--r-- | mm/mprotect.c | 21 | ||||
-rw-r--r-- | mm/nommu.c | 21 | ||||
-rw-r--r-- | mm/page_alloc.c | 46 | ||||
-rw-r--r-- | mm/pagewalk.c | 42 | ||||
-rw-r--r-- | mm/pdflush.c | 4 | ||||
-rw-r--r-- | mm/slab.c | 5 | ||||
-rw-r--r-- | mm/slob.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 19 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 2 |
22 files changed, 269 insertions, 153 deletions
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026bae6eed..05f2b4009ccc 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * linux/mm/allocpercpu.c | 2 | * linux/mm/allocpercpu.c |
3 | * | 3 | * |
4 | * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com> | 4 | * Separated from slab.c August 11, 2006 Christoph Lameter |
5 | */ | 5 | */ |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7c4f9e097095..f2e574dbc300 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -172,30 +172,22 @@ postcore_initcall(bdi_class_init); | |||
172 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 172 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
173 | const char *fmt, ...) | 173 | const char *fmt, ...) |
174 | { | 174 | { |
175 | char *name; | ||
176 | va_list args; | 175 | va_list args; |
177 | int ret = 0; | 176 | int ret = 0; |
178 | struct device *dev; | 177 | struct device *dev; |
179 | 178 | ||
180 | va_start(args, fmt); | 179 | va_start(args, fmt); |
181 | name = kvasprintf(GFP_KERNEL, fmt, args); | 180 | dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); |
182 | va_end(args); | 181 | va_end(args); |
183 | |||
184 | if (!name) | ||
185 | return -ENOMEM; | ||
186 | |||
187 | dev = device_create(bdi_class, parent, MKDEV(0, 0), name); | ||
188 | if (IS_ERR(dev)) { | 182 | if (IS_ERR(dev)) { |
189 | ret = PTR_ERR(dev); | 183 | ret = PTR_ERR(dev); |
190 | goto exit; | 184 | goto exit; |
191 | } | 185 | } |
192 | 186 | ||
193 | bdi->dev = dev; | 187 | bdi->dev = dev; |
194 | dev_set_drvdata(bdi->dev, bdi); | 188 | bdi_debug_register(bdi, dev_name(dev)); |
195 | bdi_debug_register(bdi, name); | ||
196 | 189 | ||
197 | exit: | 190 | exit: |
198 | kfree(name); | ||
199 | return ret; | 191 | return ret; |
200 | } | 192 | } |
201 | EXPORT_SYMBOL(bdi_register); | 193 | EXPORT_SYMBOL(bdi_register); |
diff --git a/mm/bootmem.c b/mm/bootmem.c index e8fb927392b9..8d9f60e06f62 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, | |||
442 | return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); | 442 | return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); |
443 | } | 443 | } |
444 | 444 | ||
445 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 445 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
446 | unsigned long size, int flags) | 446 | unsigned long size, int flags) |
447 | { | 447 | { |
448 | int ret; | 448 | int ret; |
449 | 449 | ||
450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
451 | if (ret < 0) | 451 | if (ret < 0) |
452 | return; | 452 | return -ENOMEM; |
453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
454 | |||
455 | return 0; | ||
454 | } | 456 | } |
455 | 457 | ||
456 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 458 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
diff --git a/mm/filemap.c b/mm/filemap.c index 2dead9adf8b7..1e6a7d34874f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1461,6 +1461,11 @@ page_not_uptodate: | |||
1461 | */ | 1461 | */ |
1462 | ClearPageError(page); | 1462 | ClearPageError(page); |
1463 | error = mapping->a_ops->readpage(file, page); | 1463 | error = mapping->a_ops->readpage(file, page); |
1464 | if (!error) { | ||
1465 | wait_on_page_locked(page); | ||
1466 | if (!PageUptodate(page)) | ||
1467 | error = -EIO; | ||
1468 | } | ||
1464 | page_cache_release(page); | 1469 | page_cache_release(page); |
1465 | 1470 | ||
1466 | if (!error || error == AOP_TRUNCATED_PAGE) | 1471 | if (!error || error == AOP_TRUNCATED_PAGE) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bbf953eeb58b..ab171274ef21 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -785,7 +785,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
785 | continue; | 785 | continue; |
786 | 786 | ||
787 | spin_lock(&dst->page_table_lock); | 787 | spin_lock(&dst->page_table_lock); |
788 | spin_lock(&src->page_table_lock); | 788 | spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); |
789 | if (!huge_pte_none(huge_ptep_get(src_pte))) { | 789 | if (!huge_pte_none(huge_ptep_get(src_pte))) { |
790 | if (cow) | 790 | if (cow) |
791 | huge_ptep_set_wrprotect(src, addr, src_pte); | 791 | huge_ptep_set_wrprotect(src, addr, src_pte); |
diff --git a/mm/memory.c b/mm/memory.c index 48c122d42ed7..2302d228fe04 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -311,6 +311,21 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) | |||
311 | if (!new) | 311 | if (!new) |
312 | return -ENOMEM; | 312 | return -ENOMEM; |
313 | 313 | ||
314 | /* | ||
315 | * Ensure all pte setup (eg. pte page lock and page clearing) are | ||
316 | * visible before the pte is made visible to other CPUs by being | ||
317 | * put into page tables. | ||
318 | * | ||
319 | * The other side of the story is the pointer chasing in the page | ||
320 | * table walking code (when walking the page table without locking; | ||
321 | * ie. most of the time). Fortunately, these data accesses consist | ||
322 | * of a chain of data-dependent loads, meaning most CPUs (alpha | ||
323 | * being the notable exception) will already guarantee loads are | ||
324 | * seen in-order. See the alpha page table accessors for the | ||
325 | * smp_read_barrier_depends() barriers in page table walking code. | ||
326 | */ | ||
327 | smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ | ||
328 | |||
314 | spin_lock(&mm->page_table_lock); | 329 | spin_lock(&mm->page_table_lock); |
315 | if (!pmd_present(*pmd)) { /* Has another populated it ? */ | 330 | if (!pmd_present(*pmd)) { /* Has another populated it ? */ |
316 | mm->nr_ptes++; | 331 | mm->nr_ptes++; |
@@ -329,6 +344,8 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) | |||
329 | if (!new) | 344 | if (!new) |
330 | return -ENOMEM; | 345 | return -ENOMEM; |
331 | 346 | ||
347 | smp_wmb(); /* See comment in __pte_alloc */ | ||
348 | |||
332 | spin_lock(&init_mm.page_table_lock); | 349 | spin_lock(&init_mm.page_table_lock); |
333 | if (!pmd_present(*pmd)) { /* Has another populated it ? */ | 350 | if (!pmd_present(*pmd)) { /* Has another populated it ? */ |
334 | pmd_populate_kernel(&init_mm, pmd, new); | 351 | pmd_populate_kernel(&init_mm, pmd, new); |
@@ -982,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
982 | goto no_page_table; | 999 | goto no_page_table; |
983 | 1000 | ||
984 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | 1001 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); |
985 | if (!ptep) | ||
986 | goto out; | ||
987 | 1002 | ||
988 | pte = *ptep; | 1003 | pte = *ptep; |
989 | if (!pte_present(pte)) | 1004 | if (!pte_present(pte)) |
990 | goto unlock; | 1005 | goto no_page; |
991 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | 1006 | if ((flags & FOLL_WRITE) && !pte_write(pte)) |
992 | goto unlock; | 1007 | goto unlock; |
993 | page = vm_normal_page(vma, address, pte); | 1008 | page = vm_normal_page(vma, address, pte); |
994 | if (unlikely(!page)) | 1009 | if (unlikely(!page)) |
995 | goto unlock; | 1010 | goto bad_page; |
996 | 1011 | ||
997 | if (flags & FOLL_GET) | 1012 | if (flags & FOLL_GET) |
998 | get_page(page); | 1013 | get_page(page); |
@@ -1007,6 +1022,15 @@ unlock: | |||
1007 | out: | 1022 | out: |
1008 | return page; | 1023 | return page; |
1009 | 1024 | ||
1025 | bad_page: | ||
1026 | pte_unmap_unlock(ptep, ptl); | ||
1027 | return ERR_PTR(-EFAULT); | ||
1028 | |||
1029 | no_page: | ||
1030 | pte_unmap_unlock(ptep, ptl); | ||
1031 | if (!pte_none(pte)) | ||
1032 | return page; | ||
1033 | /* Fall through to ZERO_PAGE handling */ | ||
1010 | no_page_table: | 1034 | no_page_table: |
1011 | /* | 1035 | /* |
1012 | * When core dumping an enormous anonymous area that nobody | 1036 | * When core dumping an enormous anonymous area that nobody |
@@ -1021,6 +1045,26 @@ no_page_table: | |||
1021 | return page; | 1045 | return page; |
1022 | } | 1046 | } |
1023 | 1047 | ||
1048 | /* Can we do the FOLL_ANON optimization? */ | ||
1049 | static inline int use_zero_page(struct vm_area_struct *vma) | ||
1050 | { | ||
1051 | /* | ||
1052 | * We don't want to optimize FOLL_ANON for make_pages_present() | ||
1053 | * when it tries to page in a VM_LOCKED region. As to VM_SHARED, | ||
1054 | * we want to get the page from the page tables to make sure | ||
1055 | * that we serialize and update with any other user of that | ||
1056 | * mapping. | ||
1057 | */ | ||
1058 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) | ||
1059 | return 0; | ||
1060 | /* | ||
1061 | * And if we have a fault or a nopfn routine, it's not an | ||
1062 | * anonymous region. | ||
1063 | */ | ||
1064 | return !vma->vm_ops || | ||
1065 | (!vma->vm_ops->fault && !vma->vm_ops->nopfn); | ||
1066 | } | ||
1067 | |||
1024 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1068 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1025 | unsigned long start, int len, int write, int force, | 1069 | unsigned long start, int len, int write, int force, |
1026 | struct page **pages, struct vm_area_struct **vmas) | 1070 | struct page **pages, struct vm_area_struct **vmas) |
@@ -1095,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1095 | foll_flags = FOLL_TOUCH; | 1139 | foll_flags = FOLL_TOUCH; |
1096 | if (pages) | 1140 | if (pages) |
1097 | foll_flags |= FOLL_GET; | 1141 | foll_flags |= FOLL_GET; |
1098 | if (!write && !(vma->vm_flags & VM_LOCKED) && | 1142 | if (!write && use_zero_page(vma)) |
1099 | (!vma->vm_ops || !vma->vm_ops->fault)) | ||
1100 | foll_flags |= FOLL_ANON; | 1143 | foll_flags |= FOLL_ANON; |
1101 | 1144 | ||
1102 | do { | 1145 | do { |
@@ -1108,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1108 | * be processed until returning to user space. | 1151 | * be processed until returning to user space. |
1109 | */ | 1152 | */ |
1110 | if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) | 1153 | if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) |
1111 | return -ENOMEM; | 1154 | return i ? i : -ENOMEM; |
1112 | 1155 | ||
1113 | if (write) | 1156 | if (write) |
1114 | foll_flags |= FOLL_WRITE; | 1157 | foll_flags |= FOLL_WRITE; |
@@ -1142,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1142 | 1185 | ||
1143 | cond_resched(); | 1186 | cond_resched(); |
1144 | } | 1187 | } |
1188 | if (IS_ERR(page)) | ||
1189 | return i ? i : PTR_ERR(page); | ||
1145 | if (pages) { | 1190 | if (pages) { |
1146 | pages[i] = page; | 1191 | pages[i] = page; |
1147 | 1192 | ||
@@ -1652,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1652 | struct page *dirty_page = NULL; | 1697 | struct page *dirty_page = NULL; |
1653 | 1698 | ||
1654 | old_page = vm_normal_page(vma, address, orig_pte); | 1699 | old_page = vm_normal_page(vma, address, orig_pte); |
1655 | if (!old_page) | 1700 | if (!old_page) { |
1701 | /* | ||
1702 | * VM_MIXEDMAP !pfn_valid() case | ||
1703 | * | ||
1704 | * We should not cow pages in a shared writeable mapping. | ||
1705 | * Just mark the pages writable as we can't do any dirty | ||
1706 | * accounting on raw pfn maps. | ||
1707 | */ | ||
1708 | if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
1709 | (VM_WRITE|VM_SHARED)) | ||
1710 | goto reuse; | ||
1656 | goto gotten; | 1711 | goto gotten; |
1712 | } | ||
1657 | 1713 | ||
1658 | /* | 1714 | /* |
1659 | * Take out anonymous pages first, anonymous shared vmas are | 1715 | * Take out anonymous pages first, anonymous shared vmas are |
@@ -1706,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1706 | } | 1762 | } |
1707 | 1763 | ||
1708 | if (reuse) { | 1764 | if (reuse) { |
1765 | reuse: | ||
1709 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 1766 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
1710 | entry = pte_mkyoung(orig_pte); | 1767 | entry = pte_mkyoung(orig_pte); |
1711 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1768 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -1740,7 +1797,6 @@ gotten: | |||
1740 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1797 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
1741 | if (likely(pte_same(*page_table, orig_pte))) { | 1798 | if (likely(pte_same(*page_table, orig_pte))) { |
1742 | if (old_page) { | 1799 | if (old_page) { |
1743 | page_remove_rmap(old_page, vma); | ||
1744 | if (!PageAnon(old_page)) { | 1800 | if (!PageAnon(old_page)) { |
1745 | dec_mm_counter(mm, file_rss); | 1801 | dec_mm_counter(mm, file_rss); |
1746 | inc_mm_counter(mm, anon_rss); | 1802 | inc_mm_counter(mm, anon_rss); |
@@ -1762,6 +1818,32 @@ gotten: | |||
1762 | lru_cache_add_active(new_page); | 1818 | lru_cache_add_active(new_page); |
1763 | page_add_new_anon_rmap(new_page, vma, address); | 1819 | page_add_new_anon_rmap(new_page, vma, address); |
1764 | 1820 | ||
1821 | if (old_page) { | ||
1822 | /* | ||
1823 | * Only after switching the pte to the new page may | ||
1824 | * we remove the mapcount here. Otherwise another | ||
1825 | * process may come and find the rmap count decremented | ||
1826 | * before the pte is switched to the new page, and | ||
1827 | * "reuse" the old page writing into it while our pte | ||
1828 | * here still points into it and can be read by other | ||
1829 | * threads. | ||
1830 | * | ||
1831 | * The critical issue is to order this | ||
1832 | * page_remove_rmap with the ptp_clear_flush above. | ||
1833 | * Those stores are ordered by (if nothing else,) | ||
1834 | * the barrier present in the atomic_add_negative | ||
1835 | * in page_remove_rmap. | ||
1836 | * | ||
1837 | * Then the TLB flush in ptep_clear_flush ensures that | ||
1838 | * no process can access the old page before the | ||
1839 | * decremented mapcount is visible. And the old page | ||
1840 | * cannot be reused until after the decremented | ||
1841 | * mapcount is visible. So transitively, TLBs to | ||
1842 | * old page will be flushed before it can be reused. | ||
1843 | */ | ||
1844 | page_remove_rmap(old_page, vma); | ||
1845 | } | ||
1846 | |||
1765 | /* Free the old page.. */ | 1847 | /* Free the old page.. */ |
1766 | new_page = old_page; | 1848 | new_page = old_page; |
1767 | ret |= VM_FAULT_WRITE; | 1849 | ret |= VM_FAULT_WRITE; |
@@ -2278,8 +2360,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2278 | vmf.flags = flags; | 2360 | vmf.flags = flags; |
2279 | vmf.page = NULL; | 2361 | vmf.page = NULL; |
2280 | 2362 | ||
2281 | BUG_ON(vma->vm_flags & VM_PFNMAP); | ||
2282 | |||
2283 | ret = vma->vm_ops->fault(vma, &vmf); | 2363 | ret = vma->vm_ops->fault(vma, &vmf); |
2284 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) | 2364 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) |
2285 | return ret; | 2365 | return ret; |
@@ -2619,6 +2699,8 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | |||
2619 | if (!new) | 2699 | if (!new) |
2620 | return -ENOMEM; | 2700 | return -ENOMEM; |
2621 | 2701 | ||
2702 | smp_wmb(); /* See comment in __pte_alloc */ | ||
2703 | |||
2622 | spin_lock(&mm->page_table_lock); | 2704 | spin_lock(&mm->page_table_lock); |
2623 | if (pgd_present(*pgd)) /* Another has populated it */ | 2705 | if (pgd_present(*pgd)) /* Another has populated it */ |
2624 | pud_free(mm, new); | 2706 | pud_free(mm, new); |
@@ -2640,6 +2722,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | |||
2640 | if (!new) | 2722 | if (!new) |
2641 | return -ENOMEM; | 2723 | return -ENOMEM; |
2642 | 2724 | ||
2725 | smp_wmb(); /* See comment in __pte_alloc */ | ||
2726 | |||
2643 | spin_lock(&mm->page_table_lock); | 2727 | spin_lock(&mm->page_table_lock); |
2644 | #ifndef __ARCH_HAS_4LEVEL_HACK | 2728 | #ifndef __ARCH_HAS_4LEVEL_HACK |
2645 | if (pud_present(*pud)) /* Another has populated it */ | 2729 | if (pud_present(*pud)) /* Another has populated it */ |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b17dca7249f8..833f854eabe5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -159,21 +159,58 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) | |||
159 | } | 159 | } |
160 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 160 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
161 | 161 | ||
162 | static void grow_zone_span(struct zone *zone, unsigned long start_pfn, | ||
163 | unsigned long end_pfn) | ||
164 | { | ||
165 | unsigned long old_zone_end_pfn; | ||
166 | |||
167 | zone_span_writelock(zone); | ||
168 | |||
169 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
170 | if (start_pfn < zone->zone_start_pfn) | ||
171 | zone->zone_start_pfn = start_pfn; | ||
172 | |||
173 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - | ||
174 | zone->zone_start_pfn; | ||
175 | |||
176 | zone_span_writeunlock(zone); | ||
177 | } | ||
178 | |||
179 | static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, | ||
180 | unsigned long end_pfn) | ||
181 | { | ||
182 | unsigned long old_pgdat_end_pfn = | ||
183 | pgdat->node_start_pfn + pgdat->node_spanned_pages; | ||
184 | |||
185 | if (start_pfn < pgdat->node_start_pfn) | ||
186 | pgdat->node_start_pfn = start_pfn; | ||
187 | |||
188 | pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) - | ||
189 | pgdat->node_start_pfn; | ||
190 | } | ||
191 | |||
162 | static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) | 192 | static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) |
163 | { | 193 | { |
164 | struct pglist_data *pgdat = zone->zone_pgdat; | 194 | struct pglist_data *pgdat = zone->zone_pgdat; |
165 | int nr_pages = PAGES_PER_SECTION; | 195 | int nr_pages = PAGES_PER_SECTION; |
166 | int nid = pgdat->node_id; | 196 | int nid = pgdat->node_id; |
167 | int zone_type; | 197 | int zone_type; |
198 | unsigned long flags; | ||
168 | 199 | ||
169 | zone_type = zone - pgdat->node_zones; | 200 | zone_type = zone - pgdat->node_zones; |
170 | if (!zone->wait_table) { | 201 | if (!zone->wait_table) { |
171 | int ret = 0; | 202 | int ret; |
203 | |||
172 | ret = init_currently_empty_zone(zone, phys_start_pfn, | 204 | ret = init_currently_empty_zone(zone, phys_start_pfn, |
173 | nr_pages, MEMMAP_HOTPLUG); | 205 | nr_pages, MEMMAP_HOTPLUG); |
174 | if (ret < 0) | 206 | if (ret) |
175 | return ret; | 207 | return ret; |
176 | } | 208 | } |
209 | pgdat_resize_lock(zone->zone_pgdat, &flags); | ||
210 | grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages); | ||
211 | grow_pgdat_span(zone->zone_pgdat, phys_start_pfn, | ||
212 | phys_start_pfn + nr_pages); | ||
213 | pgdat_resize_unlock(zone->zone_pgdat, &flags); | ||
177 | memmap_init_zone(nr_pages, nid, zone_type, | 214 | memmap_init_zone(nr_pages, nid, zone_type, |
178 | phys_start_pfn, MEMMAP_HOTPLUG); | 215 | phys_start_pfn, MEMMAP_HOTPLUG); |
179 | return 0; | 216 | return 0; |
@@ -299,36 +336,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
299 | } | 336 | } |
300 | EXPORT_SYMBOL_GPL(__remove_pages); | 337 | EXPORT_SYMBOL_GPL(__remove_pages); |
301 | 338 | ||
302 | static void grow_zone_span(struct zone *zone, | ||
303 | unsigned long start_pfn, unsigned long end_pfn) | ||
304 | { | ||
305 | unsigned long old_zone_end_pfn; | ||
306 | |||
307 | zone_span_writelock(zone); | ||
308 | |||
309 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
310 | if (start_pfn < zone->zone_start_pfn) | ||
311 | zone->zone_start_pfn = start_pfn; | ||
312 | |||
313 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - | ||
314 | zone->zone_start_pfn; | ||
315 | |||
316 | zone_span_writeunlock(zone); | ||
317 | } | ||
318 | |||
319 | static void grow_pgdat_span(struct pglist_data *pgdat, | ||
320 | unsigned long start_pfn, unsigned long end_pfn) | ||
321 | { | ||
322 | unsigned long old_pgdat_end_pfn = | ||
323 | pgdat->node_start_pfn + pgdat->node_spanned_pages; | ||
324 | |||
325 | if (start_pfn < pgdat->node_start_pfn) | ||
326 | pgdat->node_start_pfn = start_pfn; | ||
327 | |||
328 | pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) - | ||
329 | pgdat->node_start_pfn; | ||
330 | } | ||
331 | |||
332 | void online_page(struct page *page) | 339 | void online_page(struct page *page) |
333 | { | 340 | { |
334 | totalram_pages++; | 341 | totalram_pages++; |
@@ -367,7 +374,6 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, | |||
367 | 374 | ||
368 | int online_pages(unsigned long pfn, unsigned long nr_pages) | 375 | int online_pages(unsigned long pfn, unsigned long nr_pages) |
369 | { | 376 | { |
370 | unsigned long flags; | ||
371 | unsigned long onlined_pages = 0; | 377 | unsigned long onlined_pages = 0; |
372 | struct zone *zone; | 378 | struct zone *zone; |
373 | int need_zonelists_rebuild = 0; | 379 | int need_zonelists_rebuild = 0; |
@@ -395,11 +401,6 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
395 | * memory_block->state_mutex. | 401 | * memory_block->state_mutex. |
396 | */ | 402 | */ |
397 | zone = page_zone(pfn_to_page(pfn)); | 403 | zone = page_zone(pfn_to_page(pfn)); |
398 | pgdat_resize_lock(zone->zone_pgdat, &flags); | ||
399 | grow_zone_span(zone, pfn, pfn + nr_pages); | ||
400 | grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); | ||
401 | pgdat_resize_unlock(zone->zone_pgdat, &flags); | ||
402 | |||
403 | /* | 404 | /* |
404 | * If this zone is not populated, then it is not in zonelist. | 405 | * If this zone is not populated, then it is not in zonelist. |
405 | * This means the page allocator ignores this zone. | 406 | * This means the page allocator ignores this zone. |
@@ -408,8 +409,15 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
408 | if (!populated_zone(zone)) | 409 | if (!populated_zone(zone)) |
409 | need_zonelists_rebuild = 1; | 410 | need_zonelists_rebuild = 1; |
410 | 411 | ||
411 | walk_memory_resource(pfn, nr_pages, &onlined_pages, | 412 | ret = walk_memory_resource(pfn, nr_pages, &onlined_pages, |
412 | online_pages_range); | 413 | online_pages_range); |
414 | if (ret) { | ||
415 | printk(KERN_DEBUG "online_pages %lx at %lx failed\n", | ||
416 | nr_pages, pfn); | ||
417 | memory_notify(MEM_CANCEL_ONLINE, &arg); | ||
418 | return ret; | ||
419 | } | ||
420 | |||
413 | zone->present_pages += onlined_pages; | 421 | zone->present_pages += onlined_pages; |
414 | zone->zone_pgdat->node_present_pages += onlined_pages; | 422 | zone->zone_pgdat->node_present_pages += onlined_pages; |
415 | 423 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a37a5034f63d..c94e58b192c3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
729 | } else { | 729 | } else { |
730 | *policy = pol == &default_policy ? MPOL_DEFAULT : | 730 | *policy = pol == &default_policy ? MPOL_DEFAULT : |
731 | pol->mode; | 731 | pol->mode; |
732 | *policy |= pol->flags; | 732 | /* |
733 | * Internal mempolicy flags must be masked off before exposing | ||
734 | * the policy to userspace. | ||
735 | */ | ||
736 | *policy |= (pol->flags & MPOL_MODE_FLAGS); | ||
733 | } | 737 | } |
734 | 738 | ||
735 | if (vma) { | 739 | if (vma) { |
diff --git a/mm/migrate.c b/mm/migrate.c index 449d77d409f5..55bd355d170d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | 9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> |
10 | * Hirokazu Takahashi <taka@valinux.co.jp> | 10 | * Hirokazu Takahashi <taka@valinux.co.jp> |
11 | * Dave Hansen <haveblue@us.ibm.com> | 11 | * Dave Hansen <haveblue@us.ibm.com> |
12 | * Christoph Lameter <clameter@sgi.com> | 12 | * Christoph Lameter |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/migrate.h> | 15 | #include <linux/migrate.h> |
@@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | |||
865 | goto set_status; | 865 | goto set_status; |
866 | 866 | ||
867 | page = follow_page(vma, pp->addr, FOLL_GET); | 867 | page = follow_page(vma, pp->addr, FOLL_GET); |
868 | |||
869 | err = PTR_ERR(page); | ||
870 | if (IS_ERR(page)) | ||
871 | goto set_status; | ||
872 | |||
868 | err = -ENOENT; | 873 | err = -ENOENT; |
869 | if (!page) | 874 | if (!page) |
870 | goto set_status; | 875 | goto set_status; |
@@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) | |||
928 | goto set_status; | 933 | goto set_status; |
929 | 934 | ||
930 | page = follow_page(vma, pm->addr, 0); | 935 | page = follow_page(vma, pm->addr, 0); |
936 | |||
937 | err = PTR_ERR(page); | ||
938 | if (IS_ERR(page)) | ||
939 | goto set_status; | ||
940 | |||
931 | err = -ENOENT; | 941 | err = -ENOENT; |
932 | /* Use PageReserved to check for zero page */ | 942 | /* Use PageReserved to check for zero page */ |
933 | if (!page || PageReserved(page)) | 943 | if (!page || PageReserved(page)) |
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(vm_get_page_prot); | |||
80 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 80 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
81 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 81 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
82 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 82 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
83 | atomic_t vm_committed_space = ATOMIC_INIT(0); | 83 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * Check that a process has enough memory to allocate a new virtual | 86 | * Check that a process has enough memory to allocate a new virtual |
@@ -177,7 +177,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
177 | * cast `allowed' as a signed long because vm_committed_space | 177 | * cast `allowed' as a signed long because vm_committed_space |
178 | * sometimes has a negative value | 178 | * sometimes has a negative value |
179 | */ | 179 | */ |
180 | if (atomic_read(&vm_committed_space) < (long)allowed) | 180 | if (atomic_long_read(&vm_committed_space) < (long)allowed) |
181 | return 0; | 181 | return 0; |
182 | error: | 182 | error: |
183 | vm_unacct_memory(pages); | 183 | vm_unacct_memory(pages); |
@@ -245,10 +245,16 @@ asmlinkage unsigned long sys_brk(unsigned long brk) | |||
245 | unsigned long rlim, retval; | 245 | unsigned long rlim, retval; |
246 | unsigned long newbrk, oldbrk; | 246 | unsigned long newbrk, oldbrk; |
247 | struct mm_struct *mm = current->mm; | 247 | struct mm_struct *mm = current->mm; |
248 | unsigned long min_brk; | ||
248 | 249 | ||
249 | down_write(&mm->mmap_sem); | 250 | down_write(&mm->mmap_sem); |
250 | 251 | ||
251 | if (brk < mm->start_brk) | 252 | #ifdef CONFIG_COMPAT_BRK |
253 | min_brk = mm->end_code; | ||
254 | #else | ||
255 | min_brk = mm->start_brk; | ||
256 | #endif | ||
257 | if (brk < min_brk) | ||
252 | goto out; | 258 | goto out; |
253 | 259 | ||
254 | /* | 260 | /* |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 4de546899dc1..acfe7c8d72fc 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -26,6 +26,13 @@ | |||
26 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
27 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
28 | 28 | ||
29 | #ifndef pgprot_modify | ||
30 | static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | ||
31 | { | ||
32 | return newprot; | ||
33 | } | ||
34 | #endif | ||
35 | |||
29 | static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | 36 | static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, |
30 | unsigned long addr, unsigned long end, pgprot_t newprot, | 37 | unsigned long addr, unsigned long end, pgprot_t newprot, |
31 | int dirty_accountable) | 38 | int dirty_accountable) |
@@ -40,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
40 | if (pte_present(oldpte)) { | 47 | if (pte_present(oldpte)) { |
41 | pte_t ptent; | 48 | pte_t ptent; |
42 | 49 | ||
43 | /* Avoid an SMP race with hardware updated dirty/clean | 50 | ptent = ptep_modify_prot_start(mm, addr, pte); |
44 | * bits by wiping the pte and then setting the new pte | ||
45 | * into place. | ||
46 | */ | ||
47 | ptent = ptep_get_and_clear(mm, addr, pte); | ||
48 | ptent = pte_modify(ptent, newprot); | 51 | ptent = pte_modify(ptent, newprot); |
52 | |||
49 | /* | 53 | /* |
50 | * Avoid taking write faults for pages we know to be | 54 | * Avoid taking write faults for pages we know to be |
51 | * dirty. | 55 | * dirty. |
52 | */ | 56 | */ |
53 | if (dirty_accountable && pte_dirty(ptent)) | 57 | if (dirty_accountable && pte_dirty(ptent)) |
54 | ptent = pte_mkwrite(ptent); | 58 | ptent = pte_mkwrite(ptent); |
55 | set_pte_at(mm, addr, pte, ptent); | 59 | |
60 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
56 | #ifdef CONFIG_MIGRATION | 61 | #ifdef CONFIG_MIGRATION |
57 | } else if (!pte_file(oldpte)) { | 62 | } else if (!pte_file(oldpte)) { |
58 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 63 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
@@ -192,7 +197,9 @@ success: | |||
192 | * held in write mode. | 197 | * held in write mode. |
193 | */ | 198 | */ |
194 | vma->vm_flags = newflags; | 199 | vma->vm_flags = newflags; |
195 | vma->vm_page_prot = vm_get_page_prot(newflags); | 200 | vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, |
201 | vm_get_page_prot(newflags)); | ||
202 | |||
196 | if (vma_wants_writenotify(vma)) { | 203 | if (vma_wants_writenotify(vma)) { |
197 | vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); | 204 | vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); |
198 | dirty_accountable = 1; | 205 | dirty_accountable = 1; |
diff --git a/mm/nommu.c b/mm/nommu.c index ef8c62cec697..4462b6a3fcb9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -39,7 +39,7 @@ struct page *mem_map; | |||
39 | unsigned long max_mapnr; | 39 | unsigned long max_mapnr; |
40 | unsigned long num_physpages; | 40 | unsigned long num_physpages; |
41 | unsigned long askedalloc, realalloc; | 41 | unsigned long askedalloc, realalloc; |
42 | atomic_t vm_committed_space = ATOMIC_INIT(0); | 42 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); |
43 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 43 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
44 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 44 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
45 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 45 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
@@ -109,16 +109,23 @@ unsigned int kobjsize(const void *objp) | |||
109 | * If the object we have should not have ksize performed on it, | 109 | * If the object we have should not have ksize performed on it, |
110 | * return size of 0 | 110 | * return size of 0 |
111 | */ | 111 | */ |
112 | if (!objp || (unsigned long)objp >= memory_end || !((page = virt_to_page(objp)))) | 112 | if (!objp || !virt_addr_valid(objp)) |
113 | return 0; | 113 | return 0; |
114 | 114 | ||
115 | page = virt_to_head_page(objp); | ||
116 | |||
117 | /* | ||
118 | * If the allocator sets PageSlab, we know the pointer came from | ||
119 | * kmalloc(). | ||
120 | */ | ||
115 | if (PageSlab(page)) | 121 | if (PageSlab(page)) |
116 | return ksize(objp); | 122 | return ksize(objp); |
117 | 123 | ||
118 | BUG_ON(page->index < 0); | 124 | /* |
119 | BUG_ON(page->index >= MAX_ORDER); | 125 | * The ksize() function is only guaranteed to work for pointers |
120 | 126 | * returned by kmalloc(). So handle arbitrary pointers here. | |
121 | return (PAGE_SIZE << page->index); | 127 | */ |
128 | return PAGE_SIZE << compound_order(page); | ||
122 | } | 129 | } |
123 | 130 | ||
124 | /* | 131 | /* |
@@ -1410,7 +1417,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1410 | * cast `allowed' as a signed long because vm_committed_space | 1417 | * cast `allowed' as a signed long because vm_committed_space |
1411 | * sometimes has a negative value | 1418 | * sometimes has a negative value |
1412 | */ | 1419 | */ |
1413 | if (atomic_read(&vm_committed_space) < (long)allowed) | 1420 | if (atomic_long_read(&vm_committed_space) < (long)allowed) |
1414 | return 0; | 1421 | return 0; |
1415 | error: | 1422 | error: |
1416 | vm_unacct_memory(pages); | 1423 | vm_unacct_memory(pages); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdd5c432c426..f32fae3121f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -237,16 +237,7 @@ static void bad_page(struct page *page) | |||
237 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | 237 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" |
238 | KERN_EMERG "Backtrace:\n"); | 238 | KERN_EMERG "Backtrace:\n"); |
239 | dump_stack(); | 239 | dump_stack(); |
240 | page->flags &= ~(1 << PG_lru | | 240 | page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD; |
241 | 1 << PG_private | | ||
242 | 1 << PG_locked | | ||
243 | 1 << PG_active | | ||
244 | 1 << PG_dirty | | ||
245 | 1 << PG_reclaim | | ||
246 | 1 << PG_slab | | ||
247 | 1 << PG_swapcache | | ||
248 | 1 << PG_writeback | | ||
249 | 1 << PG_buddy ); | ||
250 | set_page_count(page, 0); | 241 | set_page_count(page, 0); |
251 | reset_page_mapcount(page); | 242 | reset_page_mapcount(page); |
252 | page->mapping = NULL; | 243 | page->mapping = NULL; |
@@ -463,16 +454,7 @@ static inline int free_pages_check(struct page *page) | |||
463 | (page->mapping != NULL) | | 454 | (page->mapping != NULL) | |
464 | (page_get_page_cgroup(page) != NULL) | | 455 | (page_get_page_cgroup(page) != NULL) | |
465 | (page_count(page) != 0) | | 456 | (page_count(page) != 0) | |
466 | (page->flags & ( | 457 | (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) |
467 | 1 << PG_lru | | ||
468 | 1 << PG_private | | ||
469 | 1 << PG_locked | | ||
470 | 1 << PG_active | | ||
471 | 1 << PG_slab | | ||
472 | 1 << PG_swapcache | | ||
473 | 1 << PG_writeback | | ||
474 | 1 << PG_reserved | | ||
475 | 1 << PG_buddy )))) | ||
476 | bad_page(page); | 458 | bad_page(page); |
477 | if (PageDirty(page)) | 459 | if (PageDirty(page)) |
478 | __ClearPageDirty(page); | 460 | __ClearPageDirty(page); |
@@ -616,17 +598,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
616 | (page->mapping != NULL) | | 598 | (page->mapping != NULL) | |
617 | (page_get_page_cgroup(page) != NULL) | | 599 | (page_get_page_cgroup(page) != NULL) | |
618 | (page_count(page) != 0) | | 600 | (page_count(page) != 0) | |
619 | (page->flags & ( | 601 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) |
620 | 1 << PG_lru | | ||
621 | 1 << PG_private | | ||
622 | 1 << PG_locked | | ||
623 | 1 << PG_active | | ||
624 | 1 << PG_dirty | | ||
625 | 1 << PG_slab | | ||
626 | 1 << PG_swapcache | | ||
627 | 1 << PG_writeback | | ||
628 | 1 << PG_reserved | | ||
629 | 1 << PG_buddy )))) | ||
630 | bad_page(page); | 602 | bad_page(page); |
631 | 603 | ||
632 | /* | 604 | /* |
@@ -1396,6 +1368,9 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, | |||
1396 | 1368 | ||
1397 | (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask, | 1369 | (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask, |
1398 | &preferred_zone); | 1370 | &preferred_zone); |
1371 | if (!preferred_zone) | ||
1372 | return NULL; | ||
1373 | |||
1399 | classzone_idx = zone_idx(preferred_zone); | 1374 | classzone_idx = zone_idx(preferred_zone); |
1400 | 1375 | ||
1401 | zonelist_scan: | 1376 | zonelist_scan: |
@@ -2353,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat) | |||
2353 | static void build_zonelist_cache(pg_data_t *pgdat) | 2328 | static void build_zonelist_cache(pg_data_t *pgdat) |
2354 | { | 2329 | { |
2355 | pgdat->node_zonelists[0].zlcache_ptr = NULL; | 2330 | pgdat->node_zonelists[0].zlcache_ptr = NULL; |
2356 | pgdat->node_zonelists[1].zlcache_ptr = NULL; | ||
2357 | } | 2331 | } |
2358 | 2332 | ||
2359 | #endif /* CONFIG_NUMA */ | 2333 | #endif /* CONFIG_NUMA */ |
@@ -2804,7 +2778,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | |||
2804 | alloc_size = zone->wait_table_hash_nr_entries | 2778 | alloc_size = zone->wait_table_hash_nr_entries |
2805 | * sizeof(wait_queue_head_t); | 2779 | * sizeof(wait_queue_head_t); |
2806 | 2780 | ||
2807 | if (system_state == SYSTEM_BOOTING) { | 2781 | if (!slab_is_available()) { |
2808 | zone->wait_table = (wait_queue_head_t *) | 2782 | zone->wait_table = (wait_queue_head_t *) |
2809 | alloc_bootmem_node(pgdat, alloc_size); | 2783 | alloc_bootmem_node(pgdat, alloc_size); |
2810 | } else { | 2784 | } else { |
@@ -2862,8 +2836,6 @@ __meminit int init_currently_empty_zone(struct zone *zone, | |||
2862 | 2836 | ||
2863 | zone->zone_start_pfn = zone_start_pfn; | 2837 | zone->zone_start_pfn = zone_start_pfn; |
2864 | 2838 | ||
2865 | memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); | ||
2866 | |||
2867 | zone_init_free_lists(zone); | 2839 | zone_init_free_lists(zone); |
2868 | 2840 | ||
2869 | return 0; | 2841 | return 0; |
@@ -3380,7 +3352,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3380 | * is used by this zone for memmap. This affects the watermark | 3352 | * is used by this zone for memmap. This affects the watermark |
3381 | * and per-cpu initialisations | 3353 | * and per-cpu initialisations |
3382 | */ | 3354 | */ |
3383 | memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; | 3355 | memmap_pages = |
3356 | PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; | ||
3384 | if (realsize >= memmap_pages) { | 3357 | if (realsize >= memmap_pages) { |
3385 | realsize -= memmap_pages; | 3358 | realsize -= memmap_pages; |
3386 | printk(KERN_DEBUG | 3359 | printk(KERN_DEBUG |
@@ -3433,6 +3406,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3433 | ret = init_currently_empty_zone(zone, zone_start_pfn, | 3406 | ret = init_currently_empty_zone(zone, zone_start_pfn, |
3434 | size, MEMMAP_EARLY); | 3407 | size, MEMMAP_EARLY); |
3435 | BUG_ON(ret); | 3408 | BUG_ON(ret); |
3409 | memmap_init(size, nid, j, zone_start_pfn); | ||
3436 | zone_start_pfn += size; | 3410 | zone_start_pfn += size; |
3437 | } | 3411 | } |
3438 | } | 3412 | } |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 0afd2387e507..d5878bed7841 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -3,14 +3,14 @@ | |||
3 | #include <linux/sched.h> | 3 | #include <linux/sched.h> |
4 | 4 | ||
5 | static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 5 | static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
6 | const struct mm_walk *walk, void *private) | 6 | struct mm_walk *walk) |
7 | { | 7 | { |
8 | pte_t *pte; | 8 | pte_t *pte; |
9 | int err = 0; | 9 | int err = 0; |
10 | 10 | ||
11 | pte = pte_offset_map(pmd, addr); | 11 | pte = pte_offset_map(pmd, addr); |
12 | for (;;) { | 12 | for (;;) { |
13 | err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private); | 13 | err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
14 | if (err) | 14 | if (err) |
15 | break; | 15 | break; |
16 | addr += PAGE_SIZE; | 16 | addr += PAGE_SIZE; |
@@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
24 | } | 24 | } |
25 | 25 | ||
26 | static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, | 26 | static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, |
27 | const struct mm_walk *walk, void *private) | 27 | struct mm_walk *walk) |
28 | { | 28 | { |
29 | pmd_t *pmd; | 29 | pmd_t *pmd; |
30 | unsigned long next; | 30 | unsigned long next; |
@@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, | |||
35 | next = pmd_addr_end(addr, end); | 35 | next = pmd_addr_end(addr, end); |
36 | if (pmd_none_or_clear_bad(pmd)) { | 36 | if (pmd_none_or_clear_bad(pmd)) { |
37 | if (walk->pte_hole) | 37 | if (walk->pte_hole) |
38 | err = walk->pte_hole(addr, next, private); | 38 | err = walk->pte_hole(addr, next, walk); |
39 | if (err) | 39 | if (err) |
40 | break; | 40 | break; |
41 | continue; | 41 | continue; |
42 | } | 42 | } |
43 | if (walk->pmd_entry) | 43 | if (walk->pmd_entry) |
44 | err = walk->pmd_entry(pmd, addr, next, private); | 44 | err = walk->pmd_entry(pmd, addr, next, walk); |
45 | if (!err && walk->pte_entry) | 45 | if (!err && walk->pte_entry) |
46 | err = walk_pte_range(pmd, addr, next, walk, private); | 46 | err = walk_pte_range(pmd, addr, next, walk); |
47 | if (err) | 47 | if (err) |
48 | break; | 48 | break; |
49 | } while (pmd++, addr = next, addr != end); | 49 | } while (pmd++, addr = next, addr != end); |
@@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, | |||
52 | } | 52 | } |
53 | 53 | ||
54 | static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, | 54 | static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, |
55 | const struct mm_walk *walk, void *private) | 55 | struct mm_walk *walk) |
56 | { | 56 | { |
57 | pud_t *pud; | 57 | pud_t *pud; |
58 | unsigned long next; | 58 | unsigned long next; |
@@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
63 | next = pud_addr_end(addr, end); | 63 | next = pud_addr_end(addr, end); |
64 | if (pud_none_or_clear_bad(pud)) { | 64 | if (pud_none_or_clear_bad(pud)) { |
65 | if (walk->pte_hole) | 65 | if (walk->pte_hole) |
66 | err = walk->pte_hole(addr, next, private); | 66 | err = walk->pte_hole(addr, next, walk); |
67 | if (err) | 67 | if (err) |
68 | break; | 68 | break; |
69 | continue; | 69 | continue; |
70 | } | 70 | } |
71 | if (walk->pud_entry) | 71 | if (walk->pud_entry) |
72 | err = walk->pud_entry(pud, addr, next, private); | 72 | err = walk->pud_entry(pud, addr, next, walk); |
73 | if (!err && (walk->pmd_entry || walk->pte_entry)) | 73 | if (!err && (walk->pmd_entry || walk->pte_entry)) |
74 | err = walk_pmd_range(pud, addr, next, walk, private); | 74 | err = walk_pmd_range(pud, addr, next, walk); |
75 | if (err) | 75 | if (err) |
76 | break; | 76 | break; |
77 | } while (pud++, addr = next, addr != end); | 77 | } while (pud++, addr = next, addr != end); |
@@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
85 | * @addr: starting address | 85 | * @addr: starting address |
86 | * @end: ending address | 86 | * @end: ending address |
87 | * @walk: set of callbacks to invoke for each level of the tree | 87 | * @walk: set of callbacks to invoke for each level of the tree |
88 | * @private: private data passed to the callback function | ||
89 | * | 88 | * |
90 | * Recursively walk the page table for the memory area in a VMA, | 89 | * Recursively walk the page table for the memory area in a VMA, |
91 | * calling supplied callbacks. Callbacks are called in-order (first | 90 | * calling supplied callbacks. Callbacks are called in-order (first |
92 | * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, | 91 | * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, |
93 | * etc.). If lower-level callbacks are omitted, walking depth is reduced. | 92 | * etc.). If lower-level callbacks are omitted, walking depth is reduced. |
94 | * | 93 | * |
95 | * Each callback receives an entry pointer, the start and end of the | 94 | * Each callback receives an entry pointer and the start and end of the |
96 | * associated range, and a caller-supplied private data pointer. | 95 | * associated range, and a copy of the original mm_walk for access to |
96 | * the ->private or ->mm fields. | ||
97 | * | 97 | * |
98 | * No locks are taken, but the bottom level iterator will map PTE | 98 | * No locks are taken, but the bottom level iterator will map PTE |
99 | * directories from highmem if necessary. | 99 | * directories from highmem if necessary. |
@@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
101 | * If any callback returns a non-zero value, the walk is aborted and | 101 | * If any callback returns a non-zero value, the walk is aborted and |
102 | * the return value is propagated back to the caller. Otherwise 0 is returned. | 102 | * the return value is propagated back to the caller. Otherwise 0 is returned. |
103 | */ | 103 | */ |
104 | int walk_page_range(const struct mm_struct *mm, | 104 | int walk_page_range(unsigned long addr, unsigned long end, |
105 | unsigned long addr, unsigned long end, | 105 | struct mm_walk *walk) |
106 | const struct mm_walk *walk, void *private) | ||
107 | { | 106 | { |
108 | pgd_t *pgd; | 107 | pgd_t *pgd; |
109 | unsigned long next; | 108 | unsigned long next; |
@@ -112,21 +111,24 @@ int walk_page_range(const struct mm_struct *mm, | |||
112 | if (addr >= end) | 111 | if (addr >= end) |
113 | return err; | 112 | return err; |
114 | 113 | ||
115 | pgd = pgd_offset(mm, addr); | 114 | if (!walk->mm) |
115 | return -EINVAL; | ||
116 | |||
117 | pgd = pgd_offset(walk->mm, addr); | ||
116 | do { | 118 | do { |
117 | next = pgd_addr_end(addr, end); | 119 | next = pgd_addr_end(addr, end); |
118 | if (pgd_none_or_clear_bad(pgd)) { | 120 | if (pgd_none_or_clear_bad(pgd)) { |
119 | if (walk->pte_hole) | 121 | if (walk->pte_hole) |
120 | err = walk->pte_hole(addr, next, private); | 122 | err = walk->pte_hole(addr, next, walk); |
121 | if (err) | 123 | if (err) |
122 | break; | 124 | break; |
123 | continue; | 125 | continue; |
124 | } | 126 | } |
125 | if (walk->pgd_entry) | 127 | if (walk->pgd_entry) |
126 | err = walk->pgd_entry(pgd, addr, next, private); | 128 | err = walk->pgd_entry(pgd, addr, next, walk); |
127 | if (!err && | 129 | if (!err && |
128 | (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) | 130 | (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) |
129 | err = walk_pud_range(pgd, addr, next, walk, private); | 131 | err = walk_pud_range(pgd, addr, next, walk); |
130 | if (err) | 132 | if (err) |
131 | break; | 133 | break; |
132 | } while (pgd++, addr = next, addr != end); | 134 | } while (pgd++, addr = next, addr != end); |
diff --git a/mm/pdflush.c b/mm/pdflush.c index 1c96cfc9e040..9d834aa4b979 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c | |||
@@ -207,7 +207,6 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0) | |||
207 | 207 | ||
208 | spin_lock_irqsave(&pdflush_lock, flags); | 208 | spin_lock_irqsave(&pdflush_lock, flags); |
209 | if (list_empty(&pdflush_list)) { | 209 | if (list_empty(&pdflush_list)) { |
210 | spin_unlock_irqrestore(&pdflush_lock, flags); | ||
211 | ret = -1; | 210 | ret = -1; |
212 | } else { | 211 | } else { |
213 | struct pdflush_work *pdf; | 212 | struct pdflush_work *pdf; |
@@ -219,8 +218,9 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0) | |||
219 | pdf->fn = fn; | 218 | pdf->fn = fn; |
220 | pdf->arg0 = arg0; | 219 | pdf->arg0 = arg0; |
221 | wake_up_process(pdf->who); | 220 | wake_up_process(pdf->who); |
222 | spin_unlock_irqrestore(&pdflush_lock, flags); | ||
223 | } | 221 | } |
222 | spin_unlock_irqrestore(&pdflush_lock, flags); | ||
223 | |||
224 | return ret; | 224 | return ret; |
225 | } | 225 | } |
226 | 226 | ||
@@ -3263,9 +3263,12 @@ retry: | |||
3263 | 3263 | ||
3264 | if (cpuset_zone_allowed_hardwall(zone, flags) && | 3264 | if (cpuset_zone_allowed_hardwall(zone, flags) && |
3265 | cache->nodelists[nid] && | 3265 | cache->nodelists[nid] && |
3266 | cache->nodelists[nid]->free_objects) | 3266 | cache->nodelists[nid]->free_objects) { |
3267 | obj = ____cache_alloc_node(cache, | 3267 | obj = ____cache_alloc_node(cache, |
3268 | flags | GFP_THISNODE, nid); | 3268 | flags | GFP_THISNODE, nid); |
3269 | if (obj) | ||
3270 | break; | ||
3271 | } | ||
3269 | } | 3272 | } |
3270 | 3273 | ||
3271 | if (!obj) { | 3274 | if (!obj) { |
@@ -469,8 +469,9 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) | |||
469 | return ZERO_SIZE_PTR; | 469 | return ZERO_SIZE_PTR; |
470 | 470 | ||
471 | m = slob_alloc(size + align, gfp, align, node); | 471 | m = slob_alloc(size + align, gfp, align, node); |
472 | if (m) | 472 | if (!m) |
473 | *m = size; | 473 | return NULL; |
474 | *m = size; | ||
474 | return (void *)m + align; | 475 | return (void *)m + align; |
475 | } else { | 476 | } else { |
476 | void *ret; | 477 | void *ret; |
@@ -5,7 +5,7 @@ | |||
5 | * The allocator synchronizes using per slab locks and only | 5 | * The allocator synchronizes using per slab locks and only |
6 | * uses a centralized lock to manage a pool of partial slabs. | 6 | * uses a centralized lock to manage a pool of partial slabs. |
7 | * | 7 | * |
8 | * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> | 8 | * (C) 2007 SGI, Christoph Lameter |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
@@ -2726,9 +2726,10 @@ size_t ksize(const void *object) | |||
2726 | 2726 | ||
2727 | page = virt_to_head_page(object); | 2727 | page = virt_to_head_page(object); |
2728 | 2728 | ||
2729 | if (unlikely(!PageSlab(page))) | 2729 | if (unlikely(!PageSlab(page))) { |
2730 | WARN_ON(!PageCompound(page)); | ||
2730 | return PAGE_SIZE << compound_order(page); | 2731 | return PAGE_SIZE << compound_order(page); |
2731 | 2732 | } | |
2732 | s = page->slab; | 2733 | s = page->slab; |
2733 | 2734 | ||
2734 | #ifdef CONFIG_SLUB_DEBUG | 2735 | #ifdef CONFIG_SLUB_DEBUG |
@@ -2994,8 +2995,6 @@ void __init kmem_cache_init(void) | |||
2994 | create_kmalloc_cache(&kmalloc_caches[1], | 2995 | create_kmalloc_cache(&kmalloc_caches[1], |
2995 | "kmalloc-96", 96, GFP_KERNEL); | 2996 | "kmalloc-96", 96, GFP_KERNEL); |
2996 | caches++; | 2997 | caches++; |
2997 | } | ||
2998 | if (KMALLOC_MIN_SIZE <= 128) { | ||
2999 | create_kmalloc_cache(&kmalloc_caches[2], | 2998 | create_kmalloc_cache(&kmalloc_caches[2], |
3000 | "kmalloc-192", 192, GFP_KERNEL); | 2999 | "kmalloc-192", 192, GFP_KERNEL); |
3001 | caches++; | 3000 | caches++; |
@@ -3025,6 +3024,16 @@ void __init kmem_cache_init(void) | |||
3025 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) | 3024 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) |
3026 | size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; | 3025 | size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; |
3027 | 3026 | ||
3027 | if (KMALLOC_MIN_SIZE == 128) { | ||
3028 | /* | ||
3029 | * The 192 byte sized cache is not used if the alignment | ||
3030 | * is 128 byte. Redirect kmalloc to use the 256 byte cache | ||
3031 | * instead. | ||
3032 | */ | ||
3033 | for (i = 128 + 8; i <= 192; i += 8) | ||
3034 | size_index[(i - 1) / 8] = 8; | ||
3035 | } | ||
3036 | |||
3028 | slab_state = UP; | 3037 | slab_state = UP; |
3029 | 3038 | ||
3030 | /* Provide the correct kmalloc names now that the caches are up */ | 3039 | /* Provide the correct kmalloc names now that the caches are up */ |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 99c4f36eb8a3..a91b5f8fcaf6 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Virtual Memory Map support | 2 | * Virtual Memory Map support |
3 | * | 3 | * |
4 | * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>. | 4 | * (C) 2007 sgi. Christoph Lameter. |
5 | * | 5 | * |
6 | * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, | 6 | * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, |
7 | * virt_to_page, page_address() to be implemented as a base offset | 7 | * virt_to_page, page_address() to be implemented as a base offset |
@@ -503,7 +503,7 @@ void vm_acct_memory(long pages) | |||
503 | local = &__get_cpu_var(committed_space); | 503 | local = &__get_cpu_var(committed_space); |
504 | *local += pages; | 504 | *local += pages; |
505 | if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { | 505 | if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { |
506 | atomic_add(*local, &vm_committed_space); | 506 | atomic_long_add(*local, &vm_committed_space); |
507 | *local = 0; | 507 | *local = 0; |
508 | } | 508 | } |
509 | preempt_enable(); | 509 | preempt_enable(); |
@@ -520,7 +520,7 @@ static int cpu_swap_callback(struct notifier_block *nfb, | |||
520 | 520 | ||
521 | committed = &per_cpu(committed_space, (long)hcpu); | 521 | committed = &per_cpu(committed_space, (long)hcpu); |
522 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | 522 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
523 | atomic_add(*committed, &vm_committed_space); | 523 | atomic_long_add(*committed, &vm_committed_space); |
524 | *committed = 0; | 524 | *committed = 0; |
525 | drain_cpu_pagevecs((long)hcpu); | 525 | drain_cpu_pagevecs((long)hcpu); |
526 | } | 526 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 9a29901ad3b3..967d30ccd92b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1307,7 +1307,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1307 | struct scan_control *sc) | 1307 | struct scan_control *sc) |
1308 | { | 1308 | { |
1309 | int priority; | 1309 | int priority; |
1310 | int ret = 0; | 1310 | unsigned long ret = 0; |
1311 | unsigned long total_scanned = 0; | 1311 | unsigned long total_scanned = 0; |
1312 | unsigned long nr_reclaimed = 0; | 1312 | unsigned long nr_reclaimed = 0; |
1313 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1313 | struct reclaim_state *reclaim_state = current->reclaim_state; |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 1a32130b958c..db9eabb2c5b3 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -41,7 +41,9 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask) | |||
41 | */ | 41 | */ |
42 | void all_vm_events(unsigned long *ret) | 42 | void all_vm_events(unsigned long *ret) |
43 | { | 43 | { |
44 | get_online_cpus(); | ||
44 | sum_vm_events(ret, &cpu_online_map); | 45 | sum_vm_events(ret, &cpu_online_map); |
46 | put_online_cpus(); | ||
45 | } | 47 | } |
46 | EXPORT_SYMBOL_GPL(all_vm_events); | 48 | EXPORT_SYMBOL_GPL(all_vm_events); |
47 | 49 | ||