aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/bootmem.c195
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/memory.c14
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmu_context.c3
-rw-r--r--mm/page_alloc.c263
-rw-r--r--mm/percpu.c36
-rw-r--r--mm/sparse-vmemmap.c76
-rw-r--r--mm/sparse.c196
-rw-r--r--mm/vmstat.c15
11 files changed, 620 insertions, 190 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 17b8947aa7da..9c61158308dc 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME
115config SPARSEMEM_VMEMMAP_ENABLE 115config SPARSEMEM_VMEMMAP_ENABLE
116 bool 116 bool
117 117
118config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
119 def_bool y
120 depends on SPARSEMEM && X86_64
121
118config SPARSEMEM_VMEMMAP 122config SPARSEMEM_VMEMMAP
119 bool "Sparse Memory virtual memmap" 123 bool "Sparse Memory virtual memmap"
120 depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE 124 depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
@@ -195,7 +199,7 @@ config BOUNCE
195config NR_QUICK 199config NR_QUICK
196 int 200 int
197 depends on QUICKLIST 201 depends on QUICKLIST
198 default "2" if SUPERH || AVR32 202 default "2" if AVR32
199 default "1" 203 default "1"
200 204
201config VIRT_TO_BUS 205config VIRT_TO_BUS
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 7d1486875e1c..d7c791ef0036 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -13,6 +13,7 @@
13#include <linux/bootmem.h> 13#include <linux/bootmem.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/kmemleak.h> 15#include <linux/kmemleak.h>
16#include <linux/range.h>
16 17
17#include <asm/bug.h> 18#include <asm/bug.h>
18#include <asm/io.h> 19#include <asm/io.h>
@@ -32,6 +33,7 @@ unsigned long max_pfn;
32unsigned long saved_max_pfn; 33unsigned long saved_max_pfn;
33#endif 34#endif
34 35
36#ifndef CONFIG_NO_BOOTMEM
35bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; 37bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
36 38
37static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); 39static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
142 min_low_pfn = start; 144 min_low_pfn = start;
143 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 145 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
144} 146}
145 147#endif
146/* 148/*
147 * free_bootmem_late - free bootmem pages directly to page allocator 149 * free_bootmem_late - free bootmem pages directly to page allocator
148 * @addr: starting address of the range 150 * @addr: starting address of the range
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
167 } 169 }
168} 170}
169 171
172#ifdef CONFIG_NO_BOOTMEM
173static void __init __free_pages_memory(unsigned long start, unsigned long end)
174{
175 int i;
176 unsigned long start_aligned, end_aligned;
177 int order = ilog2(BITS_PER_LONG);
178
179 start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
180 end_aligned = end & ~(BITS_PER_LONG - 1);
181
182 if (end_aligned <= start_aligned) {
183#if 1
184 printk(KERN_DEBUG " %lx - %lx\n", start, end);
185#endif
186 for (i = start; i < end; i++)
187 __free_pages_bootmem(pfn_to_page(i), 0);
188
189 return;
190 }
191
192#if 1
193 printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
194 start, start_aligned, end_aligned, end);
195#endif
196 for (i = start; i < start_aligned; i++)
197 __free_pages_bootmem(pfn_to_page(i), 0);
198
199 for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
200 __free_pages_bootmem(pfn_to_page(i), order);
201
202 for (i = end_aligned; i < end; i++)
203 __free_pages_bootmem(pfn_to_page(i), 0);
204}
205
206unsigned long __init free_all_memory_core_early(int nodeid)
207{
208 int i;
209 u64 start, end;
210 unsigned long count = 0;
211 struct range *range = NULL;
212 int nr_range;
213
214 nr_range = get_free_all_memory_range(&range, nodeid);
215
216 for (i = 0; i < nr_range; i++) {
217 start = range[i].start;
218 end = range[i].end;
219 count += end - start;
220 __free_pages_memory(start, end);
221 }
222
223 return count;
224}
225#else
170static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 226static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
171{ 227{
172 int aligned; 228 int aligned;
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
227 283
228 return count; 284 return count;
229} 285}
286#endif
230 287
231/** 288/**
232 * free_all_bootmem_node - release a node's free pages to the buddy allocator 289 * free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
237unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) 294unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
238{ 295{
239 register_page_bootmem_info_node(pgdat); 296 register_page_bootmem_info_node(pgdat);
297#ifdef CONFIG_NO_BOOTMEM
298 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
299 return 0;
300#else
240 return free_all_bootmem_core(pgdat->bdata); 301 return free_all_bootmem_core(pgdat->bdata);
302#endif
241} 303}
242 304
243/** 305/**
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
247 */ 309 */
248unsigned long __init free_all_bootmem(void) 310unsigned long __init free_all_bootmem(void)
249{ 311{
312#ifdef CONFIG_NO_BOOTMEM
313 return free_all_memory_core_early(NODE_DATA(0)->node_id);
314#else
250 return free_all_bootmem_core(NODE_DATA(0)->bdata); 315 return free_all_bootmem_core(NODE_DATA(0)->bdata);
316#endif
251} 317}
252 318
319#ifndef CONFIG_NO_BOOTMEM
253static void __init __free(bootmem_data_t *bdata, 320static void __init __free(bootmem_data_t *bdata,
254 unsigned long sidx, unsigned long eidx) 321 unsigned long sidx, unsigned long eidx)
255{ 322{
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
344 } 411 }
345 BUG(); 412 BUG();
346} 413}
414#endif
347 415
348/** 416/**
349 * free_bootmem_node - mark a page range as usable 417 * free_bootmem_node - mark a page range as usable
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
358void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 426void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
359 unsigned long size) 427 unsigned long size)
360{ 428{
429#ifdef CONFIG_NO_BOOTMEM
430 free_early(physaddr, physaddr + size);
431#if 0
432 printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
433#endif
434#else
361 unsigned long start, end; 435 unsigned long start, end;
362 436
363 kmemleak_free_part(__va(physaddr), size); 437 kmemleak_free_part(__va(physaddr), size);
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
366 end = PFN_DOWN(physaddr + size); 440 end = PFN_DOWN(physaddr + size);
367 441
368 mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 442 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
443#endif
369} 444}
370 445
371/** 446/**
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
379 */ 454 */
380void __init free_bootmem(unsigned long addr, unsigned long size) 455void __init free_bootmem(unsigned long addr, unsigned long size)
381{ 456{
457#ifdef CONFIG_NO_BOOTMEM
458 free_early(addr, addr + size);
459#if 0
460 printk(KERN_DEBUG "free %lx %lx\n", addr, size);
461#endif
462#else
382 unsigned long start, end; 463 unsigned long start, end;
383 464
384 kmemleak_free_part(__va(addr), size); 465 kmemleak_free_part(__va(addr), size);
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
387 end = PFN_DOWN(addr + size); 468 end = PFN_DOWN(addr + size);
388 469
389 mark_bootmem(start, end, 0, 0); 470 mark_bootmem(start, end, 0, 0);
471#endif
390} 472}
391 473
392/** 474/**
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
403int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 485int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
404 unsigned long size, int flags) 486 unsigned long size, int flags)
405{ 487{
488#ifdef CONFIG_NO_BOOTMEM
489 panic("no bootmem");
490 return 0;
491#else
406 unsigned long start, end; 492 unsigned long start, end;
407 493
408 start = PFN_DOWN(physaddr); 494 start = PFN_DOWN(physaddr);
409 end = PFN_UP(physaddr + size); 495 end = PFN_UP(physaddr + size);
410 496
411 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); 497 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
498#endif
412} 499}
413 500
414/** 501/**
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
424int __init reserve_bootmem(unsigned long addr, unsigned long size, 511int __init reserve_bootmem(unsigned long addr, unsigned long size,
425 int flags) 512 int flags)
426{ 513{
514#ifdef CONFIG_NO_BOOTMEM
515 panic("no bootmem");
516 return 0;
517#else
427 unsigned long start, end; 518 unsigned long start, end;
428 519
429 start = PFN_DOWN(addr); 520 start = PFN_DOWN(addr);
430 end = PFN_UP(addr + size); 521 end = PFN_UP(addr + size);
431 522
432 return mark_bootmem(start, end, 1, flags); 523 return mark_bootmem(start, end, 1, flags);
524#endif
433} 525}
434 526
527#ifndef CONFIG_NO_BOOTMEM
435static unsigned long __init align_idx(struct bootmem_data *bdata, 528static unsigned long __init align_idx(struct bootmem_data *bdata,
436 unsigned long idx, unsigned long step) 529 unsigned long idx, unsigned long step)
437{ 530{
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
582#endif 675#endif
583 return NULL; 676 return NULL;
584} 677}
678#endif
585 679
586static void * __init ___alloc_bootmem_nopanic(unsigned long size, 680static void * __init ___alloc_bootmem_nopanic(unsigned long size,
587 unsigned long align, 681 unsigned long align,
588 unsigned long goal, 682 unsigned long goal,
589 unsigned long limit) 683 unsigned long limit)
590{ 684{
685#ifdef CONFIG_NO_BOOTMEM
686 void *ptr;
687
688 if (WARN_ON_ONCE(slab_is_available()))
689 return kzalloc(size, GFP_NOWAIT);
690
691restart:
692
693 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
694
695 if (ptr)
696 return ptr;
697
698 if (goal != 0) {
699 goal = 0;
700 goto restart;
701 }
702
703 return NULL;
704#else
591 bootmem_data_t *bdata; 705 bootmem_data_t *bdata;
592 void *region; 706 void *region;
593 707
@@ -613,6 +727,7 @@ restart:
613 } 727 }
614 728
615 return NULL; 729 return NULL;
730#endif
616} 731}
617 732
618/** 733/**
@@ -631,7 +746,13 @@ restart:
631void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, 746void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
632 unsigned long goal) 747 unsigned long goal)
633{ 748{
634 return ___alloc_bootmem_nopanic(size, align, goal, 0); 749 unsigned long limit = 0;
750
751#ifdef CONFIG_NO_BOOTMEM
752 limit = -1UL;
753#endif
754
755 return ___alloc_bootmem_nopanic(size, align, goal, limit);
635} 756}
636 757
637static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, 758static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
665void * __init __alloc_bootmem(unsigned long size, unsigned long align, 786void * __init __alloc_bootmem(unsigned long size, unsigned long align,
666 unsigned long goal) 787 unsigned long goal)
667{ 788{
668 return ___alloc_bootmem(size, align, goal, 0); 789 unsigned long limit = 0;
790
791#ifdef CONFIG_NO_BOOTMEM
792 limit = -1UL;
793#endif
794
795 return ___alloc_bootmem(size, align, goal, limit);
669} 796}
670 797
798#ifndef CONFIG_NO_BOOTMEM
671static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 799static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
672 unsigned long size, unsigned long align, 800 unsigned long size, unsigned long align,
673 unsigned long goal, unsigned long limit) 801 unsigned long goal, unsigned long limit)
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
684 812
685 return ___alloc_bootmem(size, align, goal, limit); 813 return ___alloc_bootmem(size, align, goal, limit);
686} 814}
815#endif
687 816
688/** 817/**
689 * __alloc_bootmem_node - allocate boot memory from a specific node 818 * __alloc_bootmem_node - allocate boot memory from a specific node
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
706 if (WARN_ON_ONCE(slab_is_available())) 835 if (WARN_ON_ONCE(slab_is_available()))
707 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 836 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
708 837
838#ifdef CONFIG_NO_BOOTMEM
839 return __alloc_memory_core_early(pgdat->node_id, size, align,
840 goal, -1ULL);
841#else
709 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 842 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
843#endif
844}
845
846void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
847 unsigned long align, unsigned long goal)
848{
849#ifdef MAX_DMA32_PFN
850 unsigned long end_pfn;
851
852 if (WARN_ON_ONCE(slab_is_available()))
853 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
854
855 /* update goal according ...MAX_DMA32_PFN */
856 end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
857
858 if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
859 (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
860 void *ptr;
861 unsigned long new_goal;
862
863 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
864#ifdef CONFIG_NO_BOOTMEM
865 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
866 new_goal, -1ULL);
867#else
868 ptr = alloc_bootmem_core(pgdat->bdata, size, align,
869 new_goal, 0);
870#endif
871 if (ptr)
872 return ptr;
873 }
874#endif
875
876 return __alloc_bootmem_node(pgdat, size, align, goal);
877
710} 878}
711 879
712#ifdef CONFIG_SPARSEMEM 880#ifdef CONFIG_SPARSEMEM
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
720void * __init alloc_bootmem_section(unsigned long size, 888void * __init alloc_bootmem_section(unsigned long size,
721 unsigned long section_nr) 889 unsigned long section_nr)
722{ 890{
891#ifdef CONFIG_NO_BOOTMEM
892 unsigned long pfn, goal, limit;
893
894 pfn = section_nr_to_pfn(section_nr);
895 goal = pfn << PAGE_SHIFT;
896 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
897
898 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
899 SMP_CACHE_BYTES, goal, limit);
900#else
723 bootmem_data_t *bdata; 901 bootmem_data_t *bdata;
724 unsigned long pfn, goal, limit; 902 unsigned long pfn, goal, limit;
725 903
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size,
729 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; 907 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
730 908
731 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); 909 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
910#endif
732} 911}
733#endif 912#endif
734 913
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
740 if (WARN_ON_ONCE(slab_is_available())) 919 if (WARN_ON_ONCE(slab_is_available()))
741 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 920 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
742 921
922#ifdef CONFIG_NO_BOOTMEM
923 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
924 goal, -1ULL);
925#else
743 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); 926 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
744 if (ptr) 927 if (ptr)
745 return ptr; 928 return ptr;
746 929
747 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 930 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
931#endif
748 if (ptr) 932 if (ptr)
749 return ptr; 933 return ptr;
750 934
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
795 if (WARN_ON_ONCE(slab_is_available())) 979 if (WARN_ON_ONCE(slab_is_available()))
796 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 980 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
797 981
982#ifdef CONFIG_NO_BOOTMEM
983 return __alloc_memory_core_early(pgdat->node_id, size, align,
984 goal, ARCH_LOW_ADDRESS_LIMIT);
985#else
798 return ___alloc_bootmem_node(pgdat->bdata, size, align, 986 return ___alloc_bootmem_node(pgdat->bdata, size, align,
799 goal, ARCH_LOW_ADDRESS_LIMIT); 987 goal, ARCH_LOW_ADDRESS_LIMIT);
988#endif
800} 989}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2d16fa6b8c2d..3a5aeb37c110 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2087,7 +2087,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
2087 2087
2088 entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); 2088 entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep)));
2089 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { 2089 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) {
2090 update_mmu_cache(vma, address, entry); 2090 update_mmu_cache(vma, address, ptep);
2091 } 2091 }
2092} 2092}
2093 2093
@@ -2558,7 +2558,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2558 entry = pte_mkyoung(entry); 2558 entry = pte_mkyoung(entry);
2559 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 2559 if (huge_ptep_set_access_flags(vma, address, ptep, entry,
2560 flags & FAULT_FLAG_WRITE)) 2560 flags & FAULT_FLAG_WRITE))
2561 update_mmu_cache(vma, address, entry); 2561 update_mmu_cache(vma, address, ptep);
2562 2562
2563out_page_table_lock: 2563out_page_table_lock:
2564 spin_unlock(&mm->page_table_lock); 2564 spin_unlock(&mm->page_table_lock);
diff --git a/mm/memory.c b/mm/memory.c
index 09e4b1be7b67..72fb5f39bccc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1593,7 +1593,7 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1593 /* Ok, finally just insert the thing.. */ 1593 /* Ok, finally just insert the thing.. */
1594 entry = pte_mkspecial(pfn_pte(pfn, prot)); 1594 entry = pte_mkspecial(pfn_pte(pfn, prot));
1595 set_pte_at(mm, addr, pte, entry); 1595 set_pte_at(mm, addr, pte, entry);
1596 update_mmu_cache(vma, addr, entry); /* XXX: why not for insert_page? */ 1596 update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
1597 1597
1598 retval = 0; 1598 retval = 0;
1599out_unlock: 1599out_unlock:
@@ -2116,7 +2116,7 @@ reuse:
2116 entry = pte_mkyoung(orig_pte); 2116 entry = pte_mkyoung(orig_pte);
2117 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2117 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2118 if (ptep_set_access_flags(vma, address, page_table, entry,1)) 2118 if (ptep_set_access_flags(vma, address, page_table, entry,1))
2119 update_mmu_cache(vma, address, entry); 2119 update_mmu_cache(vma, address, page_table);
2120 ret |= VM_FAULT_WRITE; 2120 ret |= VM_FAULT_WRITE;
2121 goto unlock; 2121 goto unlock;
2122 } 2122 }
@@ -2185,7 +2185,7 @@ gotten:
2185 * new page to be mapped directly into the secondary page table. 2185 * new page to be mapped directly into the secondary page table.
2186 */ 2186 */
2187 set_pte_at_notify(mm, address, page_table, entry); 2187 set_pte_at_notify(mm, address, page_table, entry);
2188 update_mmu_cache(vma, address, entry); 2188 update_mmu_cache(vma, address, page_table);
2189 if (old_page) { 2189 if (old_page) {
2190 /* 2190 /*
2191 * Only after switching the pte to the new page may 2191 * Only after switching the pte to the new page may
@@ -2629,7 +2629,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2629 } 2629 }
2630 2630
2631 /* No need to invalidate - it was non-present before */ 2631 /* No need to invalidate - it was non-present before */
2632 update_mmu_cache(vma, address, pte); 2632 update_mmu_cache(vma, address, page_table);
2633unlock: 2633unlock:
2634 pte_unmap_unlock(page_table, ptl); 2634 pte_unmap_unlock(page_table, ptl);
2635out: 2635out:
@@ -2694,7 +2694,7 @@ setpte:
2694 set_pte_at(mm, address, page_table, entry); 2694 set_pte_at(mm, address, page_table, entry);
2695 2695
2696 /* No need to invalidate - it was non-present before */ 2696 /* No need to invalidate - it was non-present before */
2697 update_mmu_cache(vma, address, entry); 2697 update_mmu_cache(vma, address, page_table);
2698unlock: 2698unlock:
2699 pte_unmap_unlock(page_table, ptl); 2699 pte_unmap_unlock(page_table, ptl);
2700 return 0; 2700 return 0;
@@ -2855,7 +2855,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2855 set_pte_at(mm, address, page_table, entry); 2855 set_pte_at(mm, address, page_table, entry);
2856 2856
2857 /* no need to invalidate: a not-present page won't be cached */ 2857 /* no need to invalidate: a not-present page won't be cached */
2858 update_mmu_cache(vma, address, entry); 2858 update_mmu_cache(vma, address, page_table);
2859 } else { 2859 } else {
2860 if (charged) 2860 if (charged)
2861 mem_cgroup_uncharge_page(page); 2861 mem_cgroup_uncharge_page(page);
@@ -2992,7 +2992,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2992 } 2992 }
2993 entry = pte_mkyoung(entry); 2993 entry = pte_mkyoung(entry);
2994 if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { 2994 if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
2995 update_mmu_cache(vma, address, entry); 2995 update_mmu_cache(vma, address, pte);
2996 } else { 2996 } else {
2997 /* 2997 /*
2998 * This is needed only for protection faults but the arch code 2998 * This is needed only for protection faults but the arch code
diff --git a/mm/migrate.c b/mm/migrate.c
index 880bd592d38e..edb6101ed774 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -134,7 +134,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
134 page_add_file_rmap(new); 134 page_add_file_rmap(new);
135 135
136 /* No need to invalidate - it was non-present before */ 136 /* No need to invalidate - it was non-present before */
137 update_mmu_cache(vma, addr, pte); 137 update_mmu_cache(vma, addr, ptep);
138unlock: 138unlock:
139 pte_unmap_unlock(ptep, ptl); 139 pte_unmap_unlock(ptep, ptl);
140out: 140out:
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index ded9081f4021..0777654147c9 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -5,6 +5,7 @@
5 5
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/mmu_context.h> 7#include <linux/mmu_context.h>
8#include <linux/module.h>
8#include <linux/sched.h> 9#include <linux/sched.h>
9 10
10#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
@@ -37,6 +38,7 @@ void use_mm(struct mm_struct *mm)
37 if (active_mm != mm) 38 if (active_mm != mm)
38 mmdrop(active_mm); 39 mmdrop(active_mm);
39} 40}
41EXPORT_SYMBOL_GPL(use_mm);
40 42
41/* 43/*
42 * unuse_mm 44 * unuse_mm
@@ -56,3 +58,4 @@ void unuse_mm(struct mm_struct *mm)
56 enter_lazy_tlb(mm, tsk); 58 enter_lazy_tlb(mm, tsk);
57 task_unlock(tsk); 59 task_unlock(tsk);
58} 60}
61EXPORT_SYMBOL_GPL(unuse_mm);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8deb9d0fd5b1..a6b17aa4740b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1009,10 +1009,10 @@ static void drain_pages(unsigned int cpu)
1009 struct per_cpu_pageset *pset; 1009 struct per_cpu_pageset *pset;
1010 struct per_cpu_pages *pcp; 1010 struct per_cpu_pages *pcp;
1011 1011
1012 pset = zone_pcp(zone, cpu); 1012 local_irq_save(flags);
1013 pset = per_cpu_ptr(zone->pageset, cpu);
1013 1014
1014 pcp = &pset->pcp; 1015 pcp = &pset->pcp;
1015 local_irq_save(flags);
1016 free_pcppages_bulk(zone, pcp->count, pcp); 1016 free_pcppages_bulk(zone, pcp->count, pcp);
1017 pcp->count = 0; 1017 pcp->count = 0;
1018 local_irq_restore(flags); 1018 local_irq_restore(flags);
@@ -1096,7 +1096,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1096 arch_free_page(page, 0); 1096 arch_free_page(page, 0);
1097 kernel_map_pages(page, 1, 0); 1097 kernel_map_pages(page, 1, 0);
1098 1098
1099 pcp = &zone_pcp(zone, get_cpu())->pcp;
1100 migratetype = get_pageblock_migratetype(page); 1099 migratetype = get_pageblock_migratetype(page);
1101 set_page_private(page, migratetype); 1100 set_page_private(page, migratetype);
1102 local_irq_save(flags); 1101 local_irq_save(flags);
@@ -1119,6 +1118,7 @@ static void free_hot_cold_page(struct page *page, int cold)
1119 migratetype = MIGRATE_MOVABLE; 1118 migratetype = MIGRATE_MOVABLE;
1120 } 1119 }
1121 1120
1121 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1122 if (cold) 1122 if (cold)
1123 list_add_tail(&page->lru, &pcp->lists[migratetype]); 1123 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1124 else 1124 else
@@ -1131,7 +1131,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1131 1131
1132out: 1132out:
1133 local_irq_restore(flags); 1133 local_irq_restore(flags);
1134 put_cpu();
1135} 1134}
1136 1135
1137void free_hot_page(struct page *page) 1136void free_hot_page(struct page *page)
@@ -1181,17 +1180,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
1181 unsigned long flags; 1180 unsigned long flags;
1182 struct page *page; 1181 struct page *page;
1183 int cold = !!(gfp_flags & __GFP_COLD); 1182 int cold = !!(gfp_flags & __GFP_COLD);
1184 int cpu;
1185 1183
1186again: 1184again:
1187 cpu = get_cpu();
1188 if (likely(order == 0)) { 1185 if (likely(order == 0)) {
1189 struct per_cpu_pages *pcp; 1186 struct per_cpu_pages *pcp;
1190 struct list_head *list; 1187 struct list_head *list;
1191 1188
1192 pcp = &zone_pcp(zone, cpu)->pcp;
1193 list = &pcp->lists[migratetype];
1194 local_irq_save(flags); 1189 local_irq_save(flags);
1190 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1191 list = &pcp->lists[migratetype];
1195 if (list_empty(list)) { 1192 if (list_empty(list)) {
1196 pcp->count += rmqueue_bulk(zone, 0, 1193 pcp->count += rmqueue_bulk(zone, 0,
1197 pcp->batch, list, 1194 pcp->batch, list,
@@ -1232,7 +1229,6 @@ again:
1232 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1229 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1233 zone_statistics(preferred_zone, zone); 1230 zone_statistics(preferred_zone, zone);
1234 local_irq_restore(flags); 1231 local_irq_restore(flags);
1235 put_cpu();
1236 1232
1237 VM_BUG_ON(bad_range(zone, page)); 1233 VM_BUG_ON(bad_range(zone, page));
1238 if (prep_new_page(page, order, gfp_flags)) 1234 if (prep_new_page(page, order, gfp_flags))
@@ -1241,7 +1237,6 @@ again:
1241 1237
1242failed: 1238failed:
1243 local_irq_restore(flags); 1239 local_irq_restore(flags);
1244 put_cpu();
1245 return NULL; 1240 return NULL;
1246} 1241}
1247 1242
@@ -2180,7 +2175,7 @@ void show_free_areas(void)
2180 for_each_online_cpu(cpu) { 2175 for_each_online_cpu(cpu) {
2181 struct per_cpu_pageset *pageset; 2176 struct per_cpu_pageset *pageset;
2182 2177
2183 pageset = zone_pcp(zone, cpu); 2178 pageset = per_cpu_ptr(zone->pageset, cpu);
2184 2179
2185 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", 2180 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
2186 cpu, pageset->pcp.high, 2181 cpu, pageset->pcp.high,
@@ -2745,10 +2740,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2745 2740
2746#endif /* CONFIG_NUMA */ 2741#endif /* CONFIG_NUMA */
2747 2742
2743/*
2744 * Boot pageset table. One per cpu which is going to be used for all
2745 * zones and all nodes. The parameters will be set in such a way
2746 * that an item put on a list will immediately be handed over to
2747 * the buddy list. This is safe since pageset manipulation is done
2748 * with interrupts disabled.
2749 *
2750 * The boot_pagesets must be kept even after bootup is complete for
2751 * unused processors and/or zones. They do play a role for bootstrapping
2752 * hotplugged processors.
2753 *
2754 * zoneinfo_show() and maybe other functions do
2755 * not check if the processor is online before following the pageset pointer.
2756 * Other parts of the kernel may not check if the zone is available.
2757 */
2758static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
2759static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
2760
2748/* return values int ....just for stop_machine() */ 2761/* return values int ....just for stop_machine() */
2749static int __build_all_zonelists(void *dummy) 2762static int __build_all_zonelists(void *dummy)
2750{ 2763{
2751 int nid; 2764 int nid;
2765 int cpu;
2752 2766
2753#ifdef CONFIG_NUMA 2767#ifdef CONFIG_NUMA
2754 memset(node_load, 0, sizeof(node_load)); 2768 memset(node_load, 0, sizeof(node_load));
@@ -2759,6 +2773,23 @@ static int __build_all_zonelists(void *dummy)
2759 build_zonelists(pgdat); 2773 build_zonelists(pgdat);
2760 build_zonelist_cache(pgdat); 2774 build_zonelist_cache(pgdat);
2761 } 2775 }
2776
2777 /*
2778 * Initialize the boot_pagesets that are going to be used
2779 * for bootstrapping processors. The real pagesets for
2780 * each zone will be allocated later when the per cpu
2781 * allocator is available.
2782 *
2783 * boot_pagesets are used also for bootstrapping offline
2784 * cpus if the system is already booted because the pagesets
2785 * are needed to initialize allocators on a specific cpu too.
2786 * F.e. the percpu allocator needs the page allocator which
2787 * needs the percpu allocator in order to allocate its pagesets
2788 * (a chicken-egg dilemma).
2789 */
2790 for_each_possible_cpu(cpu)
2791 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
2792
2762 return 0; 2793 return 0;
2763} 2794}
2764 2795
@@ -3096,121 +3127,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
3096 pcp->batch = PAGE_SHIFT * 8; 3127 pcp->batch = PAGE_SHIFT * 8;
3097} 3128}
3098 3129
3099
3100#ifdef CONFIG_NUMA
3101/*
3102 * Boot pageset table. One per cpu which is going to be used for all
3103 * zones and all nodes. The parameters will be set in such a way
3104 * that an item put on a list will immediately be handed over to
3105 * the buddy list. This is safe since pageset manipulation is done
3106 * with interrupts disabled.
3107 *
3108 * Some NUMA counter updates may also be caught by the boot pagesets.
3109 *
3110 * The boot_pagesets must be kept even after bootup is complete for
3111 * unused processors and/or zones. They do play a role for bootstrapping
3112 * hotplugged processors.
3113 *
3114 * zoneinfo_show() and maybe other functions do
3115 * not check if the processor is online before following the pageset pointer.
3116 * Other parts of the kernel may not check if the zone is available.
3117 */
3118static struct per_cpu_pageset boot_pageset[NR_CPUS];
3119
3120/* 3130/*
3121 * Dynamically allocate memory for the 3131 * Allocate per cpu pagesets and initialize them.
3122 * per cpu pageset array in struct zone. 3132 * Before this call only boot pagesets were available.
3133 * Boot pagesets will no longer be used by this processorr
3134 * after setup_per_cpu_pageset().
3123 */ 3135 */
3124static int __cpuinit process_zones(int cpu) 3136void __init setup_per_cpu_pageset(void)
3125{ 3137{
3126 struct zone *zone, *dzone; 3138 struct zone *zone;
3127 int node = cpu_to_node(cpu); 3139 int cpu;
3128
3129 node_set_state(node, N_CPU); /* this node has a cpu */
3130 3140
3131 for_each_populated_zone(zone) { 3141 for_each_populated_zone(zone) {
3132 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), 3142 zone->pageset = alloc_percpu(struct per_cpu_pageset);
3133 GFP_KERNEL, node);
3134 if (!zone_pcp(zone, cpu))
3135 goto bad;
3136 3143
3137 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); 3144 for_each_possible_cpu(cpu) {
3145 struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
3138 3146
3139 if (percpu_pagelist_fraction) 3147 setup_pageset(pcp, zone_batchsize(zone));
3140 setup_pagelist_highmark(zone_pcp(zone, cpu),
3141 (zone->present_pages / percpu_pagelist_fraction));
3142 }
3143 3148
3144 return 0; 3149 if (percpu_pagelist_fraction)
3145bad: 3150 setup_pagelist_highmark(pcp,
3146 for_each_zone(dzone) { 3151 (zone->present_pages /
3147 if (!populated_zone(dzone)) 3152 percpu_pagelist_fraction));
3148 continue; 3153 }
3149 if (dzone == zone)
3150 break;
3151 kfree(zone_pcp(dzone, cpu));
3152 zone_pcp(dzone, cpu) = &boot_pageset[cpu];
3153 }
3154 return -ENOMEM;
3155}
3156
3157static inline void free_zone_pagesets(int cpu)
3158{
3159 struct zone *zone;
3160
3161 for_each_zone(zone) {
3162 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
3163
3164 /* Free per_cpu_pageset if it is slab allocated */
3165 if (pset != &boot_pageset[cpu])
3166 kfree(pset);
3167 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3168 }
3169}
3170
3171static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
3172 unsigned long action,
3173 void *hcpu)
3174{
3175 int cpu = (long)hcpu;
3176 int ret = NOTIFY_OK;
3177
3178 switch (action) {
3179 case CPU_UP_PREPARE:
3180 case CPU_UP_PREPARE_FROZEN:
3181 if (process_zones(cpu))
3182 ret = NOTIFY_BAD;
3183 break;
3184 case CPU_UP_CANCELED:
3185 case CPU_UP_CANCELED_FROZEN:
3186 case CPU_DEAD:
3187 case CPU_DEAD_FROZEN:
3188 free_zone_pagesets(cpu);
3189 break;
3190 default:
3191 break;
3192 } 3154 }
3193 return ret;
3194}
3195
3196static struct notifier_block __cpuinitdata pageset_notifier =
3197 { &pageset_cpuup_callback, NULL, 0 };
3198
3199void __init setup_per_cpu_pageset(void)
3200{
3201 int err;
3202
3203 /* Initialize per_cpu_pageset for cpu 0.
3204 * A cpuup callback will do this for every cpu
3205 * as it comes online
3206 */
3207 err = process_zones(smp_processor_id());
3208 BUG_ON(err);
3209 register_cpu_notifier(&pageset_notifier);
3210} 3155}
3211 3156
3212#endif
3213
3214static noinline __init_refok 3157static noinline __init_refok
3215int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) 3158int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3216{ 3159{
@@ -3264,7 +3207,7 @@ static int __zone_pcp_update(void *data)
3264 struct per_cpu_pageset *pset; 3207 struct per_cpu_pageset *pset;
3265 struct per_cpu_pages *pcp; 3208 struct per_cpu_pages *pcp;
3266 3209
3267 pset = zone_pcp(zone, cpu); 3210 pset = per_cpu_ptr(zone->pageset, cpu);
3268 pcp = &pset->pcp; 3211 pcp = &pset->pcp;
3269 3212
3270 local_irq_save(flags); 3213 local_irq_save(flags);
@@ -3282,21 +3225,17 @@ void zone_pcp_update(struct zone *zone)
3282 3225
3283static __meminit void zone_pcp_init(struct zone *zone) 3226static __meminit void zone_pcp_init(struct zone *zone)
3284{ 3227{
3285 int cpu; 3228 /*
3286 unsigned long batch = zone_batchsize(zone); 3229 * per cpu subsystem is not up at this point. The following code
3230 * relies on the ability of the linker to provide the
3231 * offset of a (static) per cpu variable into the per cpu area.
3232 */
3233 zone->pageset = &boot_pageset;
3287 3234
3288 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3289#ifdef CONFIG_NUMA
3290 /* Early boot. Slab allocator not functional yet */
3291 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3292 setup_pageset(&boot_pageset[cpu],0);
3293#else
3294 setup_pageset(zone_pcp(zone,cpu), batch);
3295#endif
3296 }
3297 if (zone->present_pages) 3235 if (zone->present_pages)
3298 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", 3236 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
3299 zone->name, zone->present_pages, batch); 3237 zone->name, zone->present_pages,
3238 zone_batchsize(zone));
3300} 3239}
3301 3240
3302__meminit int init_currently_empty_zone(struct zone *zone, 3241__meminit int init_currently_empty_zone(struct zone *zone,
@@ -3435,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid,
3435 } 3374 }
3436} 3375}
3437 3376
3377int __init add_from_early_node_map(struct range *range, int az,
3378 int nr_range, int nid)
3379{
3380 int i;
3381 u64 start, end;
3382
3383 /* need to go over early_node_map to find out good range for node */
3384 for_each_active_range_index_in_nid(i, nid) {
3385 start = early_node_map[i].start_pfn;
3386 end = early_node_map[i].end_pfn;
3387 nr_range = add_range(range, az, nr_range, start, end);
3388 }
3389 return nr_range;
3390}
3391
3392#ifdef CONFIG_NO_BOOTMEM
3393void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3394 u64 goal, u64 limit)
3395{
3396 int i;
3397 void *ptr;
3398
3399 /* need to go over early_node_map to find out good range for node */
3400 for_each_active_range_index_in_nid(i, nid) {
3401 u64 addr;
3402 u64 ei_start, ei_last;
3403
3404 ei_last = early_node_map[i].end_pfn;
3405 ei_last <<= PAGE_SHIFT;
3406 ei_start = early_node_map[i].start_pfn;
3407 ei_start <<= PAGE_SHIFT;
3408 addr = find_early_area(ei_start, ei_last,
3409 goal, limit, size, align);
3410
3411 if (addr == -1ULL)
3412 continue;
3413
3414#if 0
3415 printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
3416 nid,
3417 ei_start, ei_last, goal, limit, size,
3418 align, addr);
3419#endif
3420
3421 ptr = phys_to_virt(addr);
3422 memset(ptr, 0, size);
3423 reserve_early_without_check(addr, addr + size, "BOOTMEM");
3424 return ptr;
3425 }
3426
3427 return NULL;
3428}
3429#endif
3430
3431
3438void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3432void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3439{ 3433{
3440 int i; 3434 int i;
@@ -4467,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4467} 4461}
4468 4462
4469#ifndef CONFIG_NEED_MULTIPLE_NODES 4463#ifndef CONFIG_NEED_MULTIPLE_NODES
4470struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; 4464struct pglist_data __refdata contig_page_data = {
4465#ifndef CONFIG_NO_BOOTMEM
4466 .bdata = &bootmem_node_data[0]
4467#endif
4468 };
4471EXPORT_SYMBOL(contig_page_data); 4469EXPORT_SYMBOL(contig_page_data);
4472#endif 4470#endif
4473 4471
@@ -4810,10 +4808,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4810 if (!write || (ret == -EINVAL)) 4808 if (!write || (ret == -EINVAL))
4811 return ret; 4809 return ret;
4812 for_each_populated_zone(zone) { 4810 for_each_populated_zone(zone) {
4813 for_each_online_cpu(cpu) { 4811 for_each_possible_cpu(cpu) {
4814 unsigned long high; 4812 unsigned long high;
4815 high = zone->present_pages / percpu_pagelist_fraction; 4813 high = zone->present_pages / percpu_pagelist_fraction;
4816 setup_pagelist_highmark(zone_pcp(zone, cpu), high); 4814 setup_pagelist_highmark(
4815 per_cpu_ptr(zone->pageset, cpu), high);
4817 } 4816 }
4818 } 4817 }
4819 return 0; 4818 return 0;
diff --git a/mm/percpu.c b/mm/percpu.c
index 083e7c91e5f6..768419d44ad7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,13 +80,15 @@
80/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ 80/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
81#ifndef __addr_to_pcpu_ptr 81#ifndef __addr_to_pcpu_ptr
82#define __addr_to_pcpu_ptr(addr) \ 82#define __addr_to_pcpu_ptr(addr) \
83 (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ 83 (void __percpu *)((unsigned long)(addr) - \
84 + (unsigned long)__per_cpu_start) 84 (unsigned long)pcpu_base_addr + \
85 (unsigned long)__per_cpu_start)
85#endif 86#endif
86#ifndef __pcpu_ptr_to_addr 87#ifndef __pcpu_ptr_to_addr
87#define __pcpu_ptr_to_addr(ptr) \ 88#define __pcpu_ptr_to_addr(ptr) \
88 (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ 89 (void __force *)((unsigned long)(ptr) + \
89 - (unsigned long)__per_cpu_start) 90 (unsigned long)pcpu_base_addr - \
91 (unsigned long)__per_cpu_start)
90#endif 92#endif
91 93
92struct pcpu_chunk { 94struct pcpu_chunk {
@@ -913,11 +915,10 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
913 int rs, re; 915 int rs, re;
914 916
915 /* quick path, check whether it's empty already */ 917 /* quick path, check whether it's empty already */
916 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 918 rs = page_start;
917 if (rs == page_start && re == page_end) 919 pcpu_next_unpop(chunk, &rs, &re, page_end);
918 return; 920 if (rs == page_start && re == page_end)
919 break; 921 return;
920 }
921 922
922 /* immutable chunks can't be depopulated */ 923 /* immutable chunks can't be depopulated */
923 WARN_ON(chunk->immutable); 924 WARN_ON(chunk->immutable);
@@ -968,11 +969,10 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
968 int rs, re, rc; 969 int rs, re, rc;
969 970
970 /* quick path, check whether all pages are already there */ 971 /* quick path, check whether all pages are already there */
971 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { 972 rs = page_start;
972 if (rs == page_start && re == page_end) 973 pcpu_next_pop(chunk, &rs, &re, page_end);
973 goto clear; 974 if (rs == page_start && re == page_end)
974 break; 975 goto clear;
975 }
976 976
977 /* need to allocate and map pages, this chunk can't be immutable */ 977 /* need to allocate and map pages, this chunk can't be immutable */
978 WARN_ON(chunk->immutable); 978 WARN_ON(chunk->immutable);
@@ -1067,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
1067 * RETURNS: 1067 * RETURNS:
1068 * Percpu pointer to the allocated area on success, NULL on failure. 1068 * Percpu pointer to the allocated area on success, NULL on failure.
1069 */ 1069 */
1070static void *pcpu_alloc(size_t size, size_t align, bool reserved) 1070static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
1071{ 1071{
1072 static int warn_limit = 10; 1072 static int warn_limit = 10;
1073 struct pcpu_chunk *chunk; 1073 struct pcpu_chunk *chunk;
@@ -1196,7 +1196,7 @@ fail_unlock_mutex:
1196 * RETURNS: 1196 * RETURNS:
1197 * Percpu pointer to the allocated area on success, NULL on failure. 1197 * Percpu pointer to the allocated area on success, NULL on failure.
1198 */ 1198 */
1199void *__alloc_percpu(size_t size, size_t align) 1199void __percpu *__alloc_percpu(size_t size, size_t align)
1200{ 1200{
1201 return pcpu_alloc(size, align, false); 1201 return pcpu_alloc(size, align, false);
1202} 1202}
@@ -1217,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
1217 * RETURNS: 1217 * RETURNS:
1218 * Percpu pointer to the allocated area on success, NULL on failure. 1218 * Percpu pointer to the allocated area on success, NULL on failure.
1219 */ 1219 */
1220void *__alloc_reserved_percpu(size_t size, size_t align) 1220void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1221{ 1221{
1222 return pcpu_alloc(size, align, true); 1222 return pcpu_alloc(size, align, true);
1223} 1223}
@@ -1269,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work)
1269 * CONTEXT: 1269 * CONTEXT:
1270 * Can be called from atomic context. 1270 * Can be called from atomic context.
1271 */ 1271 */
1272void free_percpu(void *ptr) 1272void free_percpu(void __percpu *ptr)
1273{ 1273{
1274 void *addr; 1274 void *addr;
1275 struct pcpu_chunk *chunk; 1275 struct pcpu_chunk *chunk;
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index d9714bdcb4a3..392b9bb5bc01 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -40,9 +40,11 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
40 unsigned long align, 40 unsigned long align,
41 unsigned long goal) 41 unsigned long goal)
42{ 42{
43 return __alloc_bootmem_node(NODE_DATA(node), size, align, goal); 43 return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
44} 44}
45 45
46static void *vmemmap_buf;
47static void *vmemmap_buf_end;
46 48
47void * __meminit vmemmap_alloc_block(unsigned long size, int node) 49void * __meminit vmemmap_alloc_block(unsigned long size, int node)
48{ 50{
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
64 __pa(MAX_DMA_ADDRESS)); 66 __pa(MAX_DMA_ADDRESS));
65} 67}
66 68
69/* need to make sure size is all the same during early stage */
70void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
71{
72 void *ptr;
73
74 if (!vmemmap_buf)
75 return vmemmap_alloc_block(size, node);
76
77 /* take the from buf */
78 ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
79 if (ptr + size > vmemmap_buf_end)
80 return vmemmap_alloc_block(size, node);
81
82 vmemmap_buf = ptr + size;
83
84 return ptr;
85}
86
67void __meminit vmemmap_verify(pte_t *pte, int node, 87void __meminit vmemmap_verify(pte_t *pte, int node,
68 unsigned long start, unsigned long end) 88 unsigned long start, unsigned long end)
69{ 89{
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
80 pte_t *pte = pte_offset_kernel(pmd, addr); 100 pte_t *pte = pte_offset_kernel(pmd, addr);
81 if (pte_none(*pte)) { 101 if (pte_none(*pte)) {
82 pte_t entry; 102 pte_t entry;
83 void *p = vmemmap_alloc_block(PAGE_SIZE, node); 103 void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
84 if (!p) 104 if (!p)
85 return NULL; 105 return NULL;
86 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 106 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
163 183
164 return map; 184 return map;
165} 185}
186
187void __init sparse_mem_maps_populate_node(struct page **map_map,
188 unsigned long pnum_begin,
189 unsigned long pnum_end,
190 unsigned long map_count, int nodeid)
191{
192 unsigned long pnum;
193 unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
194 void *vmemmap_buf_start;
195
196 size = ALIGN(size, PMD_SIZE);
197 vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
198 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
199
200 if (vmemmap_buf_start) {
201 vmemmap_buf = vmemmap_buf_start;
202 vmemmap_buf_end = vmemmap_buf_start + size * map_count;
203 }
204
205 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
206 struct mem_section *ms;
207
208 if (!present_section_nr(pnum))
209 continue;
210
211 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
212 if (map_map[pnum])
213 continue;
214 ms = __nr_to_section(pnum);
215 printk(KERN_ERR "%s: sparsemem memory map backing failed "
216 "some memory will not be available.\n", __func__);
217 ms->section_mem_map = 0;
218 }
219
220 if (vmemmap_buf_start) {
221 /* need to free left buf */
222#ifdef CONFIG_NO_BOOTMEM
223 free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end));
224 if (vmemmap_buf_start < vmemmap_buf) {
225 char name[15];
226
227 snprintf(name, sizeof(name), "MEMMAP %d", nodeid);
228 reserve_early_without_check(__pa(vmemmap_buf_start),
229 __pa(vmemmap_buf), name);
230 }
231#else
232 free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
233#endif
234 vmemmap_buf = NULL;
235 vmemmap_buf_end = NULL;
236 }
237}
diff --git a/mm/sparse.c b/mm/sparse.c
index 6ce4aab69e99..22896d589133 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void)
271 271
272#ifdef CONFIG_MEMORY_HOTREMOVE 272#ifdef CONFIG_MEMORY_HOTREMOVE
273static unsigned long * __init 273static unsigned long * __init
274sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) 274sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
275 unsigned long count)
275{ 276{
276 unsigned long section_nr; 277 unsigned long section_nr;
277 278
@@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
286 * this problem. 287 * this problem.
287 */ 288 */
288 section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 289 section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
289 return alloc_bootmem_section(usemap_size(), section_nr); 290 return alloc_bootmem_section(usemap_size() * count, section_nr);
290} 291}
291 292
292static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 293static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
329} 330}
330#else 331#else
331static unsigned long * __init 332static unsigned long * __init
332sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) 333sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
334 unsigned long count)
333{ 335{
334 return NULL; 336 return NULL;
335} 337}
@@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
339} 341}
340#endif /* CONFIG_MEMORY_HOTREMOVE */ 342#endif /* CONFIG_MEMORY_HOTREMOVE */
341 343
342static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) 344static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
345 unsigned long pnum_begin,
346 unsigned long pnum_end,
347 unsigned long usemap_count, int nodeid)
343{ 348{
344 unsigned long *usemap; 349 void *usemap;
345 struct mem_section *ms = __nr_to_section(pnum); 350 unsigned long pnum;
346 int nid = sparse_early_nid(ms); 351 int size = usemap_size();
347
348 usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
349 if (usemap)
350 return usemap;
351 352
352 usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); 353 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
354 usemap_count);
353 if (usemap) { 355 if (usemap) {
354 check_usemap_section_nr(nid, usemap); 356 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
355 return usemap; 357 if (!present_section_nr(pnum))
358 continue;
359 usemap_map[pnum] = usemap;
360 usemap += size;
361 }
362 return;
356 } 363 }
357 364
358 /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ 365 usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
359 nid = 0; 366 if (usemap) {
367 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
368 if (!present_section_nr(pnum))
369 continue;
370 usemap_map[pnum] = usemap;
371 usemap += size;
372 check_usemap_section_nr(nodeid, usemap_map[pnum]);
373 }
374 return;
375 }
360 376
361 printk(KERN_WARNING "%s: allocation failed\n", __func__); 377 printk(KERN_WARNING "%s: allocation failed\n", __func__);
362 return NULL;
363} 378}
364 379
365#ifndef CONFIG_SPARSEMEM_VMEMMAP 380#ifndef CONFIG_SPARSEMEM_VMEMMAP
@@ -375,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
375 PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); 390 PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
376 return map; 391 return map;
377} 392}
393void __init sparse_mem_maps_populate_node(struct page **map_map,
394 unsigned long pnum_begin,
395 unsigned long pnum_end,
396 unsigned long map_count, int nodeid)
397{
398 void *map;
399 unsigned long pnum;
400 unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
401
402 map = alloc_remap(nodeid, size * map_count);
403 if (map) {
404 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
405 if (!present_section_nr(pnum))
406 continue;
407 map_map[pnum] = map;
408 map += size;
409 }
410 return;
411 }
412
413 size = PAGE_ALIGN(size);
414 map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count);
415 if (map) {
416 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
417 if (!present_section_nr(pnum))
418 continue;
419 map_map[pnum] = map;
420 map += size;
421 }
422 return;
423 }
424
425 /* fallback */
426 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
427 struct mem_section *ms;
428
429 if (!present_section_nr(pnum))
430 continue;
431 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
432 if (map_map[pnum])
433 continue;
434 ms = __nr_to_section(pnum);
435 printk(KERN_ERR "%s: sparsemem memory map backing failed "
436 "some memory will not be available.\n", __func__);
437 ms->section_mem_map = 0;
438 }
439}
378#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ 440#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
379 441
442#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
443static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
444 unsigned long pnum_begin,
445 unsigned long pnum_end,
446 unsigned long map_count, int nodeid)
447{
448 sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
449 map_count, nodeid);
450}
451#else
380static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) 452static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
381{ 453{
382 struct page *map; 454 struct page *map;
@@ -392,10 +464,12 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
392 ms->section_mem_map = 0; 464 ms->section_mem_map = 0;
393 return NULL; 465 return NULL;
394} 466}
467#endif
395 468
396void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) 469void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
397{ 470{
398} 471}
472
399/* 473/*
400 * Allocate the accumulated non-linear sections, allocate a mem_map 474 * Allocate the accumulated non-linear sections, allocate a mem_map
401 * for each and record the physical to section mapping. 475 * for each and record the physical to section mapping.
@@ -407,6 +481,14 @@ void __init sparse_init(void)
407 unsigned long *usemap; 481 unsigned long *usemap;
408 unsigned long **usemap_map; 482 unsigned long **usemap_map;
409 int size; 483 int size;
484 int nodeid_begin = 0;
485 unsigned long pnum_begin = 0;
486 unsigned long usemap_count;
487#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
488 unsigned long map_count;
489 int size2;
490 struct page **map_map;
491#endif
410 492
411 /* 493 /*
412 * map is using big page (aka 2M in x86 64 bit) 494 * map is using big page (aka 2M in x86 64 bit)
@@ -425,10 +507,81 @@ void __init sparse_init(void)
425 panic("can not allocate usemap_map\n"); 507 panic("can not allocate usemap_map\n");
426 508
427 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 509 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
510 struct mem_section *ms;
511
428 if (!present_section_nr(pnum)) 512 if (!present_section_nr(pnum))
429 continue; 513 continue;
430 usemap_map[pnum] = sparse_early_usemap_alloc(pnum); 514 ms = __nr_to_section(pnum);
515 nodeid_begin = sparse_early_nid(ms);
516 pnum_begin = pnum;
517 break;
431 } 518 }
519 usemap_count = 1;
520 for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
521 struct mem_section *ms;
522 int nodeid;
523
524 if (!present_section_nr(pnum))
525 continue;
526 ms = __nr_to_section(pnum);
527 nodeid = sparse_early_nid(ms);
528 if (nodeid == nodeid_begin) {
529 usemap_count++;
530 continue;
531 }
532 /* ok, we need to take cake of from pnum_begin to pnum - 1*/
533 sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
534 usemap_count, nodeid_begin);
535 /* new start, update count etc*/
536 nodeid_begin = nodeid;
537 pnum_begin = pnum;
538 usemap_count = 1;
539 }
540 /* ok, last chunk */
541 sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
542 usemap_count, nodeid_begin);
543
544#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
545 size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
546 map_map = alloc_bootmem(size2);
547 if (!map_map)
548 panic("can not allocate map_map\n");
549
550 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
551 struct mem_section *ms;
552
553 if (!present_section_nr(pnum))
554 continue;
555 ms = __nr_to_section(pnum);
556 nodeid_begin = sparse_early_nid(ms);
557 pnum_begin = pnum;
558 break;
559 }
560 map_count = 1;
561 for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
562 struct mem_section *ms;
563 int nodeid;
564
565 if (!present_section_nr(pnum))
566 continue;
567 ms = __nr_to_section(pnum);
568 nodeid = sparse_early_nid(ms);
569 if (nodeid == nodeid_begin) {
570 map_count++;
571 continue;
572 }
573 /* ok, we need to take cake of from pnum_begin to pnum - 1*/
574 sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
575 map_count, nodeid_begin);
576 /* new start, update count etc*/
577 nodeid_begin = nodeid;
578 pnum_begin = pnum;
579 map_count = 1;
580 }
581 /* ok, last chunk */
582 sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
583 map_count, nodeid_begin);
584#endif
432 585
433 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 586 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
434 if (!present_section_nr(pnum)) 587 if (!present_section_nr(pnum))
@@ -438,7 +591,11 @@ void __init sparse_init(void)
438 if (!usemap) 591 if (!usemap)
439 continue; 592 continue;
440 593
594#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
595 map = map_map[pnum];
596#else
441 map = sparse_early_mem_map_alloc(pnum); 597 map = sparse_early_mem_map_alloc(pnum);
598#endif
442 if (!map) 599 if (!map)
443 continue; 600 continue;
444 601
@@ -448,6 +605,9 @@ void __init sparse_init(void)
448 605
449 vmemmap_populate_print_last(); 606 vmemmap_populate_print_last();
450 607
608#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
609 free_bootmem(__pa(map_map), size2);
610#endif
451 free_bootmem(__pa(usemap_map), size); 611 free_bootmem(__pa(usemap_map), size);
452} 612}
453 613
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6051fbab67ba..fc5aa183bc45 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void)
139 threshold = calculate_threshold(zone); 139 threshold = calculate_threshold(zone);
140 140
141 for_each_online_cpu(cpu) 141 for_each_online_cpu(cpu)
142 zone_pcp(zone, cpu)->stat_threshold = threshold; 142 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
143 = threshold;
143 } 144 }
144} 145}
145 146
@@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void)
149void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 150void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
150 int delta) 151 int delta)
151{ 152{
152 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 153 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
154
153 s8 *p = pcp->vm_stat_diff + item; 155 s8 *p = pcp->vm_stat_diff + item;
154 long x; 156 long x;
155 157
@@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
202 */ 204 */
203void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 205void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
204{ 206{
205 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 207 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
206 s8 *p = pcp->vm_stat_diff + item; 208 s8 *p = pcp->vm_stat_diff + item;
207 209
208 (*p)++; 210 (*p)++;
@@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state);
223 225
224void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 226void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
225{ 227{
226 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 228 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
227 s8 *p = pcp->vm_stat_diff + item; 229 s8 *p = pcp->vm_stat_diff + item;
228 230
229 (*p)--; 231 (*p)--;
@@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu)
300 for_each_populated_zone(zone) { 302 for_each_populated_zone(zone) {
301 struct per_cpu_pageset *p; 303 struct per_cpu_pageset *p;
302 304
303 p = zone_pcp(zone, cpu); 305 p = per_cpu_ptr(zone->pageset, cpu);
304 306
305 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 307 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
306 if (p->vm_stat_diff[i]) { 308 if (p->vm_stat_diff[i]) {
@@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
741 for_each_online_cpu(i) { 743 for_each_online_cpu(i) {
742 struct per_cpu_pageset *pageset; 744 struct per_cpu_pageset *pageset;
743 745
744 pageset = zone_pcp(zone, i); 746 pageset = per_cpu_ptr(zone->pageset, i);
745 seq_printf(m, 747 seq_printf(m,
746 "\n cpu: %i" 748 "\n cpu: %i"
747 "\n count: %i" 749 "\n count: %i"
@@ -906,6 +908,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
906 case CPU_ONLINE: 908 case CPU_ONLINE:
907 case CPU_ONLINE_FROZEN: 909 case CPU_ONLINE_FROZEN:
908 start_cpu_timer(cpu); 910 start_cpu_timer(cpu);
911 node_set_state(cpu_to_node(cpu), N_CPU);
909 break; 912 break;
910 case CPU_DOWN_PREPARE: 913 case CPU_DOWN_PREPARE:
911 case CPU_DOWN_PREPARE_FROZEN: 914 case CPU_DOWN_PREPARE_FROZEN: