aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/fault.c7
-rw-r--r--arch/powerpc/mm/gup.c235
-rw-r--r--arch/powerpc/mm/hash_low_64.S19
-rw-r--r--arch/powerpc/mm/hash_native_64.c41
-rw-r--r--arch/powerpc/mm/hash_utils_64.c114
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c60
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage.c51
-rw-r--r--arch/powerpc/mm/init_32.c10
-rw-r--r--arch/powerpc/mm/init_64.c1
-rw-r--r--arch/powerpc/mm/mem.c77
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c8
-rw-r--r--arch/powerpc/mm/mmu_decl.h2
-rw-r--r--arch/powerpc/mm/numa.c224
-rw-r--r--arch/powerpc/mm/pgtable_32.c3
-rw-r--r--arch/powerpc/mm/pgtable_64.c104
18 files changed, 265 insertions, 705 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 325e861616a1..438dcd3fd0d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 7ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
8 8
9obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ 9obj-y := fault.o mem.o pgtable.o mmap.o \
10 init_$(CONFIG_WORD_SIZE).o \ 10 init_$(CONFIG_WORD_SIZE).o \
11 pgtable_$(CONFIG_WORD_SIZE).o 11 pgtable_$(CONFIG_WORD_SIZE).o
12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ 12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 08d659a9fcdb..eb79907f34fa 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -43,7 +43,6 @@
43#include <asm/tlbflush.h> 43#include <asm/tlbflush.h>
44#include <asm/siginfo.h> 44#include <asm/siginfo.h>
45#include <asm/debug.h> 45#include <asm/debug.h>
46#include <mm/mmu_decl.h>
47 46
48#include "icswx.h" 47#include "icswx.h"
49 48
@@ -380,12 +379,6 @@ good_area:
380 goto bad_area; 379 goto bad_area;
381#endif /* CONFIG_6xx */ 380#endif /* CONFIG_6xx */
382#if defined(CONFIG_8xx) 381#if defined(CONFIG_8xx)
383 /* 8xx sometimes need to load a invalid/non-present TLBs.
384 * These must be invalidated separately as linux mm don't.
385 */
386 if (error_code & 0x40000000) /* no translation? */
387 _tlbil_va(address, 0, 0, 0);
388
389 /* The MPC8xx seems to always set 0x80000000, which is 382 /* The MPC8xx seems to always set 0x80000000, which is
390 * "undefined". Of those that can be set, this is the only 383 * "undefined". Of those that can be set, this is the only
391 * one which seems bad. 384 * one which seems bad.
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
deleted file mode 100644
index d8746684f606..000000000000
--- a/arch/powerpc/mm/gup.c
+++ /dev/null
@@ -1,235 +0,0 @@
1/*
2 * Lockless get_user_pages_fast for powerpc
3 *
4 * Copyright (C) 2008 Nick Piggin
5 * Copyright (C) 2008 Novell Inc.
6 */
7#undef DEBUG
8
9#include <linux/sched.h>
10#include <linux/mm.h>
11#include <linux/hugetlb.h>
12#include <linux/vmstat.h>
13#include <linux/pagemap.h>
14#include <linux/rwsem.h>
15#include <asm/pgtable.h>
16
17#ifdef __HAVE_ARCH_PTE_SPECIAL
18
19/*
20 * The performance critical leaf functions are made noinline otherwise gcc
21 * inlines everything into a single function which results in too much
22 * register pressure.
23 */
24static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
25 unsigned long end, int write, struct page **pages, int *nr)
26{
27 unsigned long mask, result;
28 pte_t *ptep;
29
30 result = _PAGE_PRESENT|_PAGE_USER;
31 if (write)
32 result |= _PAGE_RW;
33 mask = result | _PAGE_SPECIAL;
34
35 ptep = pte_offset_kernel(&pmd, addr);
36 do {
37 pte_t pte = ACCESS_ONCE(*ptep);
38 struct page *page;
39 /*
40 * Similar to the PMD case, NUMA hinting must take slow path
41 */
42 if (pte_numa(pte))
43 return 0;
44
45 if ((pte_val(pte) & mask) != result)
46 return 0;
47 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
48 page = pte_page(pte);
49 if (!page_cache_get_speculative(page))
50 return 0;
51 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
52 put_page(page);
53 return 0;
54 }
55 pages[*nr] = page;
56 (*nr)++;
57
58 } while (ptep++, addr += PAGE_SIZE, addr != end);
59
60 return 1;
61}
62
63static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
64 int write, struct page **pages, int *nr)
65{
66 unsigned long next;
67 pmd_t *pmdp;
68
69 pmdp = pmd_offset(&pud, addr);
70 do {
71 pmd_t pmd = ACCESS_ONCE(*pmdp);
72
73 next = pmd_addr_end(addr, end);
74 /*
75 * If we find a splitting transparent hugepage we
76 * return zero. That will result in taking the slow
77 * path which will call wait_split_huge_page()
78 * if the pmd is still in splitting state
79 */
80 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
81 return 0;
82 if (pmd_huge(pmd) || pmd_large(pmd)) {
83 /*
84 * NUMA hinting faults need to be handled in the GUP
85 * slowpath for accounting purposes and so that they
86 * can be serialised against THP migration.
87 */
88 if (pmd_numa(pmd))
89 return 0;
90
91 if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
92 write, pages, nr))
93 return 0;
94 } else if (is_hugepd(pmdp)) {
95 if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
96 addr, next, write, pages, nr))
97 return 0;
98 } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
99 return 0;
100 } while (pmdp++, addr = next, addr != end);
101
102 return 1;
103}
104
105static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
106 int write, struct page **pages, int *nr)
107{
108 unsigned long next;
109 pud_t *pudp;
110
111 pudp = pud_offset(&pgd, addr);
112 do {
113 pud_t pud = ACCESS_ONCE(*pudp);
114
115 next = pud_addr_end(addr, end);
116 if (pud_none(pud))
117 return 0;
118 if (pud_huge(pud)) {
119 if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
120 write, pages, nr))
121 return 0;
122 } else if (is_hugepd(pudp)) {
123 if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
124 addr, next, write, pages, nr))
125 return 0;
126 } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
127 return 0;
128 } while (pudp++, addr = next, addr != end);
129
130 return 1;
131}
132
133int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
134 struct page **pages)
135{
136 struct mm_struct *mm = current->mm;
137 unsigned long addr, len, end;
138 unsigned long next;
139 unsigned long flags;
140 pgd_t *pgdp;
141 int nr = 0;
142
143 pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
144
145 start &= PAGE_MASK;
146 addr = start;
147 len = (unsigned long) nr_pages << PAGE_SHIFT;
148 end = start + len;
149
150 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
151 start, len)))
152 return 0;
153
154 pr_devel(" aligned: %lx .. %lx\n", start, end);
155
156 /*
157 * XXX: batch / limit 'nr', to avoid large irq off latency
158 * needs some instrumenting to determine the common sizes used by
159 * important workloads (eg. DB2), and whether limiting the batch size
160 * will decrease performance.
161 *
162 * It seems like we're in the clear for the moment. Direct-IO is
163 * the main guy that batches up lots of get_user_pages, and even
164 * they are limited to 64-at-a-time which is not so many.
165 */
166 /*
167 * This doesn't prevent pagetable teardown, but does prevent
168 * the pagetables from being freed on powerpc.
169 *
170 * So long as we atomically load page table pointers versus teardown,
171 * we can follow the address down to the the page and take a ref on it.
172 */
173 local_irq_save(flags);
174
175 pgdp = pgd_offset(mm, addr);
176 do {
177 pgd_t pgd = ACCESS_ONCE(*pgdp);
178
179 pr_devel(" %016lx: normal pgd %p\n", addr,
180 (void *)pgd_val(pgd));
181 next = pgd_addr_end(addr, end);
182 if (pgd_none(pgd))
183 break;
184 if (pgd_huge(pgd)) {
185 if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
186 write, pages, &nr))
187 break;
188 } else if (is_hugepd(pgdp)) {
189 if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
190 addr, next, write, pages, &nr))
191 break;
192 } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
193 break;
194 } while (pgdp++, addr = next, addr != end);
195
196 local_irq_restore(flags);
197
198 return nr;
199}
200
201int get_user_pages_fast(unsigned long start, int nr_pages, int write,
202 struct page **pages)
203{
204 struct mm_struct *mm = current->mm;
205 int nr, ret;
206
207 start &= PAGE_MASK;
208 nr = __get_user_pages_fast(start, nr_pages, write, pages);
209 ret = nr;
210
211 if (nr < nr_pages) {
212 pr_devel(" slow path ! nr = %d\n", nr);
213
214 /* Try to get the remaining pages with get_user_pages */
215 start += nr << PAGE_SHIFT;
216 pages += nr;
217
218 down_read(&mm->mmap_sem);
219 ret = get_user_pages(current, mm, start,
220 nr_pages - nr, write, 0, pages, NULL);
221 up_read(&mm->mmap_sem);
222
223 /* Have to be a bit careful with return values */
224 if (nr > 0) {
225 if (ret < 0)
226 ret = nr;
227 else
228 ret += nr;
229 }
230 }
231
232 return ret;
233}
234
235#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 057cbbb4c576..463174a4a647 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -46,7 +46,8 @@
46 46
47/* 47/*
48 * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, 48 * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
49 * pte_t *ptep, unsigned long trap, int local, int ssize) 49 * pte_t *ptep, unsigned long trap, unsigned long flags,
50 * int ssize)
50 * 51 *
51 * Adds a 4K page to the hash table in a segment of 4K pages only 52 * Adds a 4K page to the hash table in a segment of 4K pages only
52 */ 53 */
@@ -298,7 +299,7 @@ htab_modify_pte:
298 li r6,MMU_PAGE_4K /* base page size */ 299 li r6,MMU_PAGE_4K /* base page size */
299 li r7,MMU_PAGE_4K /* actual page size */ 300 li r7,MMU_PAGE_4K /* actual page size */
300 ld r8,STK_PARAM(R9)(r1) /* segment size */ 301 ld r8,STK_PARAM(R9)(r1) /* segment size */
301 ld r9,STK_PARAM(R8)(r1) /* get "local" param */ 302 ld r9,STK_PARAM(R8)(r1) /* get "flags" param */
302.globl htab_call_hpte_updatepp 303.globl htab_call_hpte_updatepp
303htab_call_hpte_updatepp: 304htab_call_hpte_updatepp:
304 bl . /* Patched by htab_finish_init() */ 305 bl . /* Patched by htab_finish_init() */
@@ -338,8 +339,8 @@ htab_pte_insert_failure:
338 *****************************************************************************/ 339 *****************************************************************************/
339 340
340/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, 341/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
341 * pte_t *ptep, unsigned long trap, int local, int ssize, 342 * pte_t *ptep, unsigned long trap, unsigned local flags,
342 * int subpg_prot) 343 * int ssize, int subpg_prot)
343 */ 344 */
344 345
345/* 346/*
@@ -514,7 +515,7 @@ htab_insert_pte:
514 andis. r0,r31,_PAGE_4K_PFN@h 515 andis. r0,r31,_PAGE_4K_PFN@h
515 srdi r5,r31,PTE_RPN_SHIFT 516 srdi r5,r31,PTE_RPN_SHIFT
516 bne- htab_special_pfn 517 bne- htab_special_pfn
517 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT 518 sldi r5,r5,PAGE_FACTOR
518 add r5,r5,r25 519 add r5,r5,r25
519htab_special_pfn: 520htab_special_pfn:
520 sldi r5,r5,HW_PAGE_SHIFT 521 sldi r5,r5,HW_PAGE_SHIFT
@@ -544,7 +545,7 @@ htab_call_hpte_insert1:
544 andis. r0,r31,_PAGE_4K_PFN@h 545 andis. r0,r31,_PAGE_4K_PFN@h
545 srdi r5,r31,PTE_RPN_SHIFT 546 srdi r5,r31,PTE_RPN_SHIFT
546 bne- 3f 547 bne- 3f
547 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT 548 sldi r5,r5,PAGE_FACTOR
548 add r5,r5,r25 549 add r5,r5,r25
5493: sldi r5,r5,HW_PAGE_SHIFT 5503: sldi r5,r5,HW_PAGE_SHIFT
550 551
@@ -594,7 +595,7 @@ htab_inval_old_hpte:
594 li r5,0 /* PTE.hidx */ 595 li r5,0 /* PTE.hidx */
595 li r6,MMU_PAGE_64K /* psize */ 596 li r6,MMU_PAGE_64K /* psize */
596 ld r7,STK_PARAM(R9)(r1) /* ssize */ 597 ld r7,STK_PARAM(R9)(r1) /* ssize */
597 ld r8,STK_PARAM(R8)(r1) /* local */ 598 ld r8,STK_PARAM(R8)(r1) /* flags */
598 bl flush_hash_page 599 bl flush_hash_page
599 /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ 600 /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */
600 lis r0,_PAGE_HPTE_SUB@h 601 lis r0,_PAGE_HPTE_SUB@h
@@ -666,7 +667,7 @@ htab_modify_pte:
666 li r6,MMU_PAGE_4K /* base page size */ 667 li r6,MMU_PAGE_4K /* base page size */
667 li r7,MMU_PAGE_4K /* actual page size */ 668 li r7,MMU_PAGE_4K /* actual page size */
668 ld r8,STK_PARAM(R9)(r1) /* segment size */ 669 ld r8,STK_PARAM(R9)(r1) /* segment size */
669 ld r9,STK_PARAM(R8)(r1) /* get "local" param */ 670 ld r9,STK_PARAM(R8)(r1) /* get "flags" param */
670.globl htab_call_hpte_updatepp 671.globl htab_call_hpte_updatepp
671htab_call_hpte_updatepp: 672htab_call_hpte_updatepp:
672 bl . /* patched by htab_finish_init() */ 673 bl . /* patched by htab_finish_init() */
@@ -962,7 +963,7 @@ ht64_modify_pte:
962 li r6,MMU_PAGE_64K /* base page size */ 963 li r6,MMU_PAGE_64K /* base page size */
963 li r7,MMU_PAGE_64K /* actual page size */ 964 li r7,MMU_PAGE_64K /* actual page size */
964 ld r8,STK_PARAM(R9)(r1) /* segment size */ 965 ld r8,STK_PARAM(R9)(r1) /* segment size */
965 ld r9,STK_PARAM(R8)(r1) /* get "local" param */ 966 ld r9,STK_PARAM(R8)(r1) /* get "flags" param */
966.globl ht64_call_hpte_updatepp 967.globl ht64_call_hpte_updatepp
967ht64_call_hpte_updatepp: 968ht64_call_hpte_updatepp:
968 bl . /* patched by htab_finish_init() */ 969 bl . /* patched by htab_finish_init() */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index ae4962a06476..9c4880ddecd6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -283,19 +283,17 @@ static long native_hpte_remove(unsigned long hpte_group)
283 283
284static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, 284static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
285 unsigned long vpn, int bpsize, 285 unsigned long vpn, int bpsize,
286 int apsize, int ssize, int local) 286 int apsize, int ssize, unsigned long flags)
287{ 287{
288 struct hash_pte *hptep = htab_address + slot; 288 struct hash_pte *hptep = htab_address + slot;
289 unsigned long hpte_v, want_v; 289 unsigned long hpte_v, want_v;
290 int ret = 0; 290 int ret = 0, local = 0;
291 291
292 want_v = hpte_encode_avpn(vpn, bpsize, ssize); 292 want_v = hpte_encode_avpn(vpn, bpsize, ssize);
293 293
294 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", 294 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
295 vpn, want_v & HPTE_V_AVPN, slot, newpp); 295 vpn, want_v & HPTE_V_AVPN, slot, newpp);
296 296
297 native_lock_hpte(hptep);
298
299 hpte_v = be64_to_cpu(hptep->v); 297 hpte_v = be64_to_cpu(hptep->v);
300 /* 298 /*
301 * We need to invalidate the TLB always because hpte_remove doesn't do 299 * We need to invalidate the TLB always because hpte_remove doesn't do
@@ -308,15 +306,30 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
308 DBG_LOW(" -> miss\n"); 306 DBG_LOW(" -> miss\n");
309 ret = -1; 307 ret = -1;
310 } else { 308 } else {
311 DBG_LOW(" -> hit\n"); 309 native_lock_hpte(hptep);
312 /* Update the HPTE */ 310 /* recheck with locks held */
313 hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | 311 hpte_v = be64_to_cpu(hptep->v);
314 (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C))); 312 if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
313 !(hpte_v & HPTE_V_VALID))) {
314 ret = -1;
315 } else {
316 DBG_LOW(" -> hit\n");
317 /* Update the HPTE */
318 hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
319 ~(HPTE_R_PP | HPTE_R_N)) |
320 (newpp & (HPTE_R_PP | HPTE_R_N |
321 HPTE_R_C)));
322 }
323 native_unlock_hpte(hptep);
315 } 324 }
316 native_unlock_hpte(hptep);
317 325
318 /* Ensure it is out of the tlb too. */ 326 if (flags & HPTE_LOCAL_UPDATE)
319 tlbie(vpn, bpsize, apsize, ssize, local); 327 local = 1;
328 /*
329 * Ensure it is out of the tlb too if it is not a nohpte fault
330 */
331 if (!(flags & HPTE_NOHPTE_UPDATE))
332 tlbie(vpn, bpsize, apsize, ssize, local);
320 333
321 return ret; 334 return ret;
322} 335}
@@ -419,7 +432,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
419static void native_hugepage_invalidate(unsigned long vsid, 432static void native_hugepage_invalidate(unsigned long vsid,
420 unsigned long addr, 433 unsigned long addr,
421 unsigned char *hpte_slot_array, 434 unsigned char *hpte_slot_array,
422 int psize, int ssize) 435 int psize, int ssize, int local)
423{ 436{
424 int i; 437 int i;
425 struct hash_pte *hptep; 438 struct hash_pte *hptep;
@@ -465,7 +478,7 @@ static void native_hugepage_invalidate(unsigned long vsid,
465 * instruction compares entry_VA in tlb with the VA specified 478 * instruction compares entry_VA in tlb with the VA specified
466 * here 479 * here
467 */ 480 */
468 tlbie(vpn, psize, actual_psize, ssize, 0); 481 tlbie(vpn, psize, actual_psize, ssize, local);
469 } 482 }
470 local_irq_restore(flags); 483 local_irq_restore(flags);
471} 484}
@@ -629,7 +642,7 @@ static void native_flush_hash_range(unsigned long number, int local)
629 unsigned long want_v; 642 unsigned long want_v;
630 unsigned long flags; 643 unsigned long flags;
631 real_pte_t pte; 644 real_pte_t pte;
632 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 645 struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
633 unsigned long psize = batch->psize; 646 unsigned long psize = batch->psize;
634 int ssize = batch->ssize; 647 int ssize = batch->ssize;
635 int i; 648 int i;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d5339a3b9945..e56a307bc676 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
989 * -1 - critical hash insertion error 989 * -1 - critical hash insertion error
990 * -2 - access not permitted by subpage protection mechanism 990 * -2 - access not permitted by subpage protection mechanism
991 */ 991 */
992int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) 992int hash_page_mm(struct mm_struct *mm, unsigned long ea,
993 unsigned long access, unsigned long trap,
994 unsigned long flags)
993{ 995{
994 enum ctx_state prev_state = exception_enter(); 996 enum ctx_state prev_state = exception_enter();
995 pgd_t *pgdir; 997 pgd_t *pgdir;
@@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
997 pte_t *ptep; 999 pte_t *ptep;
998 unsigned hugeshift; 1000 unsigned hugeshift;
999 const struct cpumask *tmp; 1001 const struct cpumask *tmp;
1000 int rc, user_region = 0, local = 0; 1002 int rc, user_region = 0;
1001 int psize, ssize; 1003 int psize, ssize;
1002 1004
1003 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", 1005 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
@@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
1049 /* Check CPU locality */ 1051 /* Check CPU locality */
1050 tmp = cpumask_of(smp_processor_id()); 1052 tmp = cpumask_of(smp_processor_id());
1051 if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) 1053 if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
1052 local = 1; 1054 flags |= HPTE_LOCAL_UPDATE;
1053 1055
1054#ifndef CONFIG_PPC_64K_PAGES 1056#ifndef CONFIG_PPC_64K_PAGES
1055 /* If we use 4K pages and our psize is not 4K, then we might 1057 /* If we use 4K pages and our psize is not 4K, then we might
@@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
1086 if (hugeshift) { 1088 if (hugeshift) {
1087 if (pmd_trans_huge(*(pmd_t *)ptep)) 1089 if (pmd_trans_huge(*(pmd_t *)ptep))
1088 rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, 1090 rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
1089 trap, local, ssize, psize); 1091 trap, flags, ssize, psize);
1090#ifdef CONFIG_HUGETLB_PAGE 1092#ifdef CONFIG_HUGETLB_PAGE
1091 else 1093 else
1092 rc = __hash_page_huge(ea, access, vsid, ptep, trap, 1094 rc = __hash_page_huge(ea, access, vsid, ptep, trap,
1093 local, ssize, hugeshift, psize); 1095 flags, ssize, hugeshift, psize);
1094#else 1096#else
1095 else { 1097 else {
1096 /* 1098 /*
@@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
1149 1151
1150#ifdef CONFIG_PPC_HAS_HASH_64K 1152#ifdef CONFIG_PPC_HAS_HASH_64K
1151 if (psize == MMU_PAGE_64K) 1153 if (psize == MMU_PAGE_64K)
1152 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1154 rc = __hash_page_64K(ea, access, vsid, ptep, trap,
1155 flags, ssize);
1153 else 1156 else
1154#endif /* CONFIG_PPC_HAS_HASH_64K */ 1157#endif /* CONFIG_PPC_HAS_HASH_64K */
1155 { 1158 {
@@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
1158 rc = -2; 1161 rc = -2;
1159 else 1162 else
1160 rc = __hash_page_4K(ea, access, vsid, ptep, trap, 1163 rc = __hash_page_4K(ea, access, vsid, ptep, trap,
1161 local, ssize, spp); 1164 flags, ssize, spp);
1162 } 1165 }
1163 1166
1164 /* Dump some info in case of hash insertion failure, they should 1167 /* Dump some info in case of hash insertion failure, they should
@@ -1181,14 +1184,19 @@ bail:
1181} 1184}
1182EXPORT_SYMBOL_GPL(hash_page_mm); 1185EXPORT_SYMBOL_GPL(hash_page_mm);
1183 1186
1184int hash_page(unsigned long ea, unsigned long access, unsigned long trap) 1187int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
1188 unsigned long dsisr)
1185{ 1189{
1190 unsigned long flags = 0;
1186 struct mm_struct *mm = current->mm; 1191 struct mm_struct *mm = current->mm;
1187 1192
1188 if (REGION_ID(ea) == VMALLOC_REGION_ID) 1193 if (REGION_ID(ea) == VMALLOC_REGION_ID)
1189 mm = &init_mm; 1194 mm = &init_mm;
1190 1195
1191 return hash_page_mm(mm, ea, access, trap); 1196 if (dsisr & DSISR_NOHPTE)
1197 flags |= HPTE_NOHPTE_UPDATE;
1198
1199 return hash_page_mm(mm, ea, access, trap, flags);
1192} 1200}
1193EXPORT_SYMBOL_GPL(hash_page); 1201EXPORT_SYMBOL_GPL(hash_page);
1194 1202
@@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1200 pgd_t *pgdir; 1208 pgd_t *pgdir;
1201 pte_t *ptep; 1209 pte_t *ptep;
1202 unsigned long flags; 1210 unsigned long flags;
1203 int rc, ssize, local = 0; 1211 int rc, ssize, update_flags = 0;
1204 1212
1205 BUG_ON(REGION_ID(ea) != USER_REGION_ID); 1213 BUG_ON(REGION_ID(ea) != USER_REGION_ID);
1206 1214
@@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1251 1259
1252 /* Is that local to this CPU ? */ 1260 /* Is that local to this CPU ? */
1253 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 1261 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
1254 local = 1; 1262 update_flags |= HPTE_LOCAL_UPDATE;
1255 1263
1256 /* Hash it in */ 1264 /* Hash it in */
1257#ifdef CONFIG_PPC_HAS_HASH_64K 1265#ifdef CONFIG_PPC_HAS_HASH_64K
1258 if (mm->context.user_psize == MMU_PAGE_64K) 1266 if (mm->context.user_psize == MMU_PAGE_64K)
1259 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1267 rc = __hash_page_64K(ea, access, vsid, ptep, trap,
1268 update_flags, ssize);
1260 else 1269 else
1261#endif /* CONFIG_PPC_HAS_HASH_64K */ 1270#endif /* CONFIG_PPC_HAS_HASH_64K */
1262 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, 1271 rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
1263 subpage_protection(mm, ea)); 1272 ssize, subpage_protection(mm, ea));
1264 1273
1265 /* Dump some info in case of hash insertion failure, they should 1274 /* Dump some info in case of hash insertion failure, they should
1266 * never happen so it is really useful to know if/when they do 1275 * never happen so it is really useful to know if/when they do
@@ -1278,9 +1287,10 @@ out_exit:
1278 * do not forget to update the assembly call site ! 1287 * do not forget to update the assembly call site !
1279 */ 1288 */
1280void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, 1289void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
1281 int local) 1290 unsigned long flags)
1282{ 1291{
1283 unsigned long hash, index, shift, hidx, slot; 1292 unsigned long hash, index, shift, hidx, slot;
1293 int local = flags & HPTE_LOCAL_UPDATE;
1284 1294
1285 DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); 1295 DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
1286 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 1296 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
@@ -1315,6 +1325,78 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
1315#endif 1325#endif
1316} 1326}
1317 1327
1328#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1329void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
1330 pmd_t *pmdp, unsigned int psize, int ssize,
1331 unsigned long flags)
1332{
1333 int i, max_hpte_count, valid;
1334 unsigned long s_addr;
1335 unsigned char *hpte_slot_array;
1336 unsigned long hidx, shift, vpn, hash, slot;
1337 int local = flags & HPTE_LOCAL_UPDATE;
1338
1339 s_addr = addr & HPAGE_PMD_MASK;
1340 hpte_slot_array = get_hpte_slot_array(pmdp);
1341 /*
1342 * IF we try to do a HUGE PTE update after a withdraw is done.
1343 * we will find the below NULL. This happens when we do
1344 * split_huge_page_pmd
1345 */
1346 if (!hpte_slot_array)
1347 return;
1348
1349 if (ppc_md.hugepage_invalidate) {
1350 ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
1351 psize, ssize, local);
1352 goto tm_abort;
1353 }
1354 /*
1355 * No bluk hpte removal support, invalidate each entry
1356 */
1357 shift = mmu_psize_defs[psize].shift;
1358 max_hpte_count = HPAGE_PMD_SIZE >> shift;
1359 for (i = 0; i < max_hpte_count; i++) {
1360 /*
1361 * 8 bits per each hpte entries
1362 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
1363 */
1364 valid = hpte_valid(hpte_slot_array, i);
1365 if (!valid)
1366 continue;
1367 hidx = hpte_hash_index(hpte_slot_array, i);
1368
1369 /* get the vpn */
1370 addr = s_addr + (i * (1ul << shift));
1371 vpn = hpt_vpn(addr, vsid, ssize);
1372 hash = hpt_hash(vpn, shift, ssize);
1373 if (hidx & _PTEIDX_SECONDARY)
1374 hash = ~hash;
1375
1376 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1377 slot += hidx & _PTEIDX_GROUP_IX;
1378 ppc_md.hpte_invalidate(slot, vpn, psize,
1379 MMU_PAGE_16M, ssize, local);
1380 }
1381tm_abort:
1382#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1383 /* Transactions are not aborted by tlbiel, only tlbie.
1384 * Without, syncing a page back to a block device w/ PIO could pick up
1385 * transactional data (bad!) so we force an abort here. Before the
1386 * sync the page will be made read-only, which will flush_hash_page.
1387 * BIG ISSUE here: if the kernel uses a page from userspace without
1388 * unmapping it first, it may see the speculated version.
1389 */
1390 if (local && cpu_has_feature(CPU_FTR_TM) &&
1391 current->thread.regs &&
1392 MSR_TM_ACTIVE(current->thread.regs->msr)) {
1393 tm_enable();
1394 tm_abort(TM_CAUSE_TLBI);
1395 }
1396#endif
1397}
1398#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1399
1318void flush_hash_range(unsigned long number, int local) 1400void flush_hash_range(unsigned long number, int local)
1319{ 1401{
1320 if (ppc_md.flush_hash_range) 1402 if (ppc_md.flush_hash_range)
@@ -1322,7 +1404,7 @@ void flush_hash_range(unsigned long number, int local)
1322 else { 1404 else {
1323 int i; 1405 int i;
1324 struct ppc64_tlb_batch *batch = 1406 struct ppc64_tlb_batch *batch =
1325 &__get_cpu_var(ppc64_tlb_batch); 1407 this_cpu_ptr(&ppc64_tlb_batch);
1326 1408
1327 for (i = 0; i < number; i++) 1409 for (i = 0; i < number; i++)
1328 flush_hash_page(batch->vpn[i], batch->pte[i], 1410 flush_hash_page(batch->vpn[i], batch->pte[i],
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 5f5e6328c21c..86686514ae13 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,60 +18,9 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <asm/machdep.h> 19#include <asm/machdep.h>
20 20
21static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
22 pmd_t *pmdp, unsigned int psize, int ssize)
23{
24 int i, max_hpte_count, valid;
25 unsigned long s_addr;
26 unsigned char *hpte_slot_array;
27 unsigned long hidx, shift, vpn, hash, slot;
28
29 s_addr = addr & HPAGE_PMD_MASK;
30 hpte_slot_array = get_hpte_slot_array(pmdp);
31 /*
32 * IF we try to do a HUGE PTE update after a withdraw is done.
33 * we will find the below NULL. This happens when we do
34 * split_huge_page_pmd
35 */
36 if (!hpte_slot_array)
37 return;
38
39 if (ppc_md.hugepage_invalidate)
40 return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
41 psize, ssize);
42 /*
43 * No bluk hpte removal support, invalidate each entry
44 */
45 shift = mmu_psize_defs[psize].shift;
46 max_hpte_count = HPAGE_PMD_SIZE >> shift;
47 for (i = 0; i < max_hpte_count; i++) {
48 /*
49 * 8 bits per each hpte entries
50 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
51 */
52 valid = hpte_valid(hpte_slot_array, i);
53 if (!valid)
54 continue;
55 hidx = hpte_hash_index(hpte_slot_array, i);
56
57 /* get the vpn */
58 addr = s_addr + (i * (1ul << shift));
59 vpn = hpt_vpn(addr, vsid, ssize);
60 hash = hpt_hash(vpn, shift, ssize);
61 if (hidx & _PTEIDX_SECONDARY)
62 hash = ~hash;
63
64 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
65 slot += hidx & _PTEIDX_GROUP_IX;
66 ppc_md.hpte_invalidate(slot, vpn, psize,
67 MMU_PAGE_16M, ssize, 0);
68 }
69}
70
71
72int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, 21int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
73 pmd_t *pmdp, unsigned long trap, int local, int ssize, 22 pmd_t *pmdp, unsigned long trap, unsigned long flags,
74 unsigned int psize) 23 int ssize, unsigned int psize)
75{ 24{
76 unsigned int index, valid; 25 unsigned int index, valid;
77 unsigned char *hpte_slot_array; 26 unsigned char *hpte_slot_array;
@@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
145 * hash page table entries. 94 * hash page table entries.
146 */ 95 */
147 if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) 96 if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
148 invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); 97 flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
98 ssize, flags);
149 } 99 }
150 100
151 valid = hpte_valid(hpte_slot_array, index); 101 valid = hpte_valid(hpte_slot_array, index);
@@ -158,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
158 slot += hidx & _PTEIDX_GROUP_IX; 108 slot += hidx & _PTEIDX_GROUP_IX;
159 109
160 ret = ppc_md.hpte_updatepp(slot, rflags, vpn, 110 ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
161 psize, lpsize, ssize, local); 111 psize, lpsize, ssize, flags);
162 /* 112 /*
163 * We failed to update, try to insert a new entry. 113 * We failed to update, try to insert a new entry.
164 */ 114 */
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 5e4ee2573903..ba47aaf33a4b 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -33,13 +33,13 @@ static inline int tlb1_next(void)
33 33
34 ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; 34 ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
35 35
36 index = __get_cpu_var(next_tlbcam_idx); 36 index = this_cpu_read(next_tlbcam_idx);
37 37
38 /* Just round-robin the entries and wrap when we hit the end */ 38 /* Just round-robin the entries and wrap when we hit the end */
39 if (unlikely(index == ncams - 1)) 39 if (unlikely(index == ncams - 1))
40 __get_cpu_var(next_tlbcam_idx) = tlbcam_index; 40 __this_cpu_write(next_tlbcam_idx, tlbcam_index);
41 else 41 else
42 __get_cpu_var(next_tlbcam_idx)++; 42 __this_cpu_inc(next_tlbcam_idx);
43 43
44 return index; 44 return index;
45} 45}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index a5bcf9301196..d94b1af53a93 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
19 unsigned long vflags, int psize, int ssize); 19 unsigned long vflags, int psize, int ssize);
20 20
21int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, 21int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
22 pte_t *ptep, unsigned long trap, int local, int ssize, 22 pte_t *ptep, unsigned long trap, unsigned long flags,
23 unsigned int shift, unsigned int mmu_psize) 23 int ssize, unsigned int shift, unsigned int mmu_psize)
24{ 24{
25 unsigned long vpn; 25 unsigned long vpn;
26 unsigned long old_pte, new_pte; 26 unsigned long old_pte, new_pte;
@@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
81 slot += (old_pte & _PAGE_F_GIX) >> 12; 81 slot += (old_pte & _PAGE_F_GIX) >> 12;
82 82
83 if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, 83 if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
84 mmu_psize, ssize, local) == -1) 84 mmu_psize, ssize, flags) == -1)
85 old_pte &= ~_PAGE_HPTEFLAGS; 85 old_pte &= ~_PAGE_HPTEFLAGS;
86 } 86 }
87 87
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6a4a5fcb9730..5ff4e07d920a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -62,6 +62,9 @@ static unsigned nr_gpages;
62/* 62/*
63 * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have 63 * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
64 * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; 64 * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
65 *
66 * Defined in such a way that we can optimize away code block at build time
67 * if CONFIG_HUGETLB_PAGE=n.
65 */ 68 */
66int pmd_huge(pmd_t pmd) 69int pmd_huge(pmd_t pmd)
67{ 70{
@@ -230,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
230 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) 233 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
231 return NULL; 234 return NULL;
232 235
233 return hugepte_offset(hpdp, addr, pdshift); 236 return hugepte_offset(*hpdp, addr, pdshift);
234} 237}
235 238
236#else 239#else
@@ -270,13 +273,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
270 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) 273 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
271 return NULL; 274 return NULL;
272 275
273 return hugepte_offset(hpdp, addr, pdshift); 276 return hugepte_offset(*hpdp, addr, pdshift);
274} 277}
275#endif 278#endif
276 279
277#ifdef CONFIG_PPC_FSL_BOOK3E 280#ifdef CONFIG_PPC_FSL_BOOK3E
278/* Build list of addresses of gigantic pages. This function is used in early 281/* Build list of addresses of gigantic pages. This function is used in early
279 * boot before the buddy or bootmem allocator is setup. 282 * boot before the buddy allocator is setup.
280 */ 283 */
281void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 284void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
282{ 285{
@@ -312,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
312 * If gpages can be in highmem we can't use the trick of storing the 315 * If gpages can be in highmem we can't use the trick of storing the
313 * data structure in the page; allocate space for this 316 * data structure in the page; allocate space for this
314 */ 317 */
315 m = alloc_bootmem(sizeof(struct huge_bootmem_page)); 318 m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0);
316 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; 319 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
317#else 320#else
318 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); 321 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
@@ -352,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val,
352 if (size != 0) { 355 if (size != 0) {
353 if (sscanf(val, "%lu", &npages) <= 0) 356 if (sscanf(val, "%lu", &npages) <= 0)
354 npages = 0; 357 npages = 0;
358 if (npages > MAX_NUMBER_GPAGES) {
359 pr_warn("MMU: %lu pages requested for page "
360 "size %llu KB, limiting to "
361 __stringify(MAX_NUMBER_GPAGES) "\n",
362 npages, size / 1024);
363 npages = MAX_NUMBER_GPAGES;
364 }
355 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; 365 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
356 size = 0; 366 size = 0;
357 } 367 }
@@ -399,7 +409,7 @@ void __init reserve_hugetlb_gpages(void)
399#else /* !PPC_FSL_BOOK3E */ 409#else /* !PPC_FSL_BOOK3E */
400 410
401/* Build list of addresses of gigantic pages. This function is used in early 411/* Build list of addresses of gigantic pages. This function is used in early
402 * boot before the buddy or bootmem allocator is setup. 412 * boot before the buddy allocator is setup.
403 */ 413 */
404void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 414void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
405{ 415{
@@ -462,7 +472,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
462{ 472{
463 struct hugepd_freelist **batchp; 473 struct hugepd_freelist **batchp;
464 474
465 batchp = &get_cpu_var(hugepd_freelist_cur); 475 batchp = this_cpu_ptr(&hugepd_freelist_cur);
466 476
467 if (atomic_read(&tlb->mm->mm_users) < 2 || 477 if (atomic_read(&tlb->mm->mm_users) < 2 ||
468 cpumask_equal(mm_cpumask(tlb->mm), 478 cpumask_equal(mm_cpumask(tlb->mm),
@@ -536,7 +546,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
536 do { 546 do {
537 pmd = pmd_offset(pud, addr); 547 pmd = pmd_offset(pud, addr);
538 next = pmd_addr_end(addr, end); 548 next = pmd_addr_end(addr, end);
539 if (!is_hugepd(pmd)) { 549 if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
540 /* 550 /*
541 * if it is not hugepd pointer, we should already find 551 * if it is not hugepd pointer, we should already find
542 * it cleared. 552 * it cleared.
@@ -585,7 +595,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
585 do { 595 do {
586 pud = pud_offset(pgd, addr); 596 pud = pud_offset(pgd, addr);
587 next = pud_addr_end(addr, end); 597 next = pud_addr_end(addr, end);
588 if (!is_hugepd(pud)) { 598 if (!is_hugepd(__hugepd(pud_val(*pud)))) {
589 if (pud_none_or_clear_bad(pud)) 599 if (pud_none_or_clear_bad(pud))
590 continue; 600 continue;
591 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 601 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
@@ -651,7 +661,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
651 do { 661 do {
652 next = pgd_addr_end(addr, end); 662 next = pgd_addr_end(addr, end);
653 pgd = pgd_offset(tlb->mm, addr); 663 pgd = pgd_offset(tlb->mm, addr);
654 if (!is_hugepd(pgd)) { 664 if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
655 if (pgd_none_or_clear_bad(pgd)) 665 if (pgd_none_or_clear_bad(pgd))
656 continue; 666 continue;
657 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 667 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
@@ -711,12 +721,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
711 return (__boundary - 1 < end - 1) ? __boundary : end; 721 return (__boundary - 1 < end - 1) ? __boundary : end;
712} 722}
713 723
714int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, 724int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
715 unsigned long addr, unsigned long end, 725 unsigned long end, int write, struct page **pages, int *nr)
716 int write, struct page **pages, int *nr)
717{ 726{
718 pte_t *ptep; 727 pte_t *ptep;
719 unsigned long sz = 1UL << hugepd_shift(*hugepd); 728 unsigned long sz = 1UL << hugepd_shift(hugepd);
720 unsigned long next; 729 unsigned long next;
721 730
722 ptep = hugepte_offset(hugepd, addr, pdshift); 731 ptep = hugepte_offset(hugepd, addr, pdshift);
@@ -959,7 +968,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
959 else if (pgd_huge(pgd)) { 968 else if (pgd_huge(pgd)) {
960 ret_pte = (pte_t *) pgdp; 969 ret_pte = (pte_t *) pgdp;
961 goto out; 970 goto out;
962 } else if (is_hugepd(&pgd)) 971 } else if (is_hugepd(__hugepd(pgd_val(pgd))))
963 hpdp = (hugepd_t *)&pgd; 972 hpdp = (hugepd_t *)&pgd;
964 else { 973 else {
965 /* 974 /*
@@ -976,7 +985,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
976 else if (pud_huge(pud)) { 985 else if (pud_huge(pud)) {
977 ret_pte = (pte_t *) pudp; 986 ret_pte = (pte_t *) pudp;
978 goto out; 987 goto out;
979 } else if (is_hugepd(&pud)) 988 } else if (is_hugepd(__hugepd(pud_val(pud))))
980 hpdp = (hugepd_t *)&pud; 989 hpdp = (hugepd_t *)&pud;
981 else { 990 else {
982 pdshift = PMD_SHIFT; 991 pdshift = PMD_SHIFT;
@@ -997,7 +1006,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
997 if (pmd_huge(pmd) || pmd_large(pmd)) { 1006 if (pmd_huge(pmd) || pmd_large(pmd)) {
998 ret_pte = (pte_t *) pmdp; 1007 ret_pte = (pte_t *) pmdp;
999 goto out; 1008 goto out;
1000 } else if (is_hugepd(&pmd)) 1009 } else if (is_hugepd(__hugepd(pmd_val(pmd))))
1001 hpdp = (hugepd_t *)&pmd; 1010 hpdp = (hugepd_t *)&pmd;
1002 else 1011 else
1003 return pte_offset_kernel(&pmd, ea); 1012 return pte_offset_kernel(&pmd, ea);
@@ -1006,7 +1015,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
1006 if (!hpdp) 1015 if (!hpdp)
1007 return NULL; 1016 return NULL;
1008 1017
1009 ret_pte = hugepte_offset(hpdp, ea, pdshift); 1018 ret_pte = hugepte_offset(*hpdp, ea, pdshift);
1010 pdshift = hugepd_shift(*hpdp); 1019 pdshift = hugepd_shift(*hpdp);
1011out: 1020out:
1012 if (shift) 1021 if (shift)
@@ -1036,14 +1045,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1036 if ((pte_val(pte) & mask) != mask) 1045 if ((pte_val(pte) & mask) != mask)
1037 return 0; 1046 return 0;
1038 1047
1039#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1040 /*
1041 * check for splitting here
1042 */
1043 if (pmd_trans_splitting(pte_pmd(pte)))
1044 return 0;
1045#endif
1046
1047 /* hugepages are never "special" */ 1048 /* hugepages are never "special" */
1048 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 1049 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1049 1050
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 415a51b028b9..a10be665b645 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -26,7 +26,6 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/stddef.h> 27#include <linux/stddef.h>
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/bootmem.h>
30#include <linux/highmem.h> 29#include <linux/highmem.h>
31#include <linux/initrd.h> 30#include <linux/initrd.h>
32#include <linux/pagemap.h> 31#include <linux/pagemap.h>
@@ -195,15 +194,6 @@ void __init MMU_init(void)
195 memblock_set_current_limit(lowmem_end_addr); 194 memblock_set_current_limit(lowmem_end_addr);
196} 195}
197 196
198/* This is only called until mem_init is done. */
199void __init *early_get_page(void)
200{
201 if (init_bootmem_done)
202 return alloc_bootmem_pages(PAGE_SIZE);
203 else
204 return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
205}
206
207#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ 197#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
208void setup_initial_memory_limit(phys_addr_t first_memblock_base, 198void setup_initial_memory_limit(phys_addr_t first_memblock_base,
209 phys_addr_t first_memblock_size) 199 phys_addr_t first_memblock_size)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 3481556a1880..10471f9bb63f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -34,7 +34,6 @@
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/bootmem.h>
38#include <linux/highmem.h> 37#include <linux/highmem.h>
39#include <linux/idr.h> 38#include <linux/idr.h>
40#include <linux/nodemask.h> 39#include <linux/nodemask.h>
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8ebaac75c940..b7285a5870f8 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -35,6 +35,7 @@
35#include <linux/memblock.h> 35#include <linux/memblock.h>
36#include <linux/hugetlb.h> 36#include <linux/hugetlb.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/vmalloc.h>
38 39
39#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
40#include <asm/prom.h> 41#include <asm/prom.h>
@@ -60,7 +61,6 @@
60#define CPU_FTR_NOEXECUTE 0 61#define CPU_FTR_NOEXECUTE 0
61#endif 62#endif
62 63
63int init_bootmem_done;
64int mem_init_done; 64int mem_init_done;
65unsigned long long memory_limit; 65unsigned long long memory_limit;
66 66
@@ -144,8 +144,17 @@ int arch_remove_memory(u64 start, u64 size)
144 144
145 zone = page_zone(pfn_to_page(start_pfn)); 145 zone = page_zone(pfn_to_page(start_pfn));
146 ret = __remove_pages(zone, start_pfn, nr_pages); 146 ret = __remove_pages(zone, start_pfn, nr_pages);
147 if (!ret && (ppc_md.remove_memory)) 147 if (ret)
148 ret = ppc_md.remove_memory(start, size); 148 return ret;
149
150 /* Remove htab bolted mappings for this section of memory */
151 start = (unsigned long)__va(start);
152 ret = remove_section_mapping(start, start + size);
153
154 /* Ensure all vmalloc mappings are flushed in case they also
155 * hit that section of memory
156 */
157 vm_unmap_aliases();
149 158
150 return ret; 159 return ret;
151} 160}
@@ -180,70 +189,23 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
180} 189}
181EXPORT_SYMBOL_GPL(walk_system_ram_range); 190EXPORT_SYMBOL_GPL(walk_system_ram_range);
182 191
183/*
184 * Initialize the bootmem system and give it all the memory we
185 * have available. If we are using highmem, we only put the
186 * lowmem into the bootmem system.
187 */
188#ifndef CONFIG_NEED_MULTIPLE_NODES 192#ifndef CONFIG_NEED_MULTIPLE_NODES
189void __init do_init_bootmem(void) 193void __init initmem_init(void)
190{ 194{
191 unsigned long start, bootmap_pages;
192 unsigned long total_pages;
193 struct memblock_region *reg;
194 int boot_mapsize;
195
196 max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 195 max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
197 total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; 196 min_low_pfn = MEMORY_START >> PAGE_SHIFT;
198#ifdef CONFIG_HIGHMEM 197#ifdef CONFIG_HIGHMEM
199 total_pages = total_lowmem >> PAGE_SHIFT;
200 max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; 198 max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
201#endif 199#endif
202 200
203 /*
204 * Find an area to use for the bootmem bitmap. Calculate the size of
205 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
206 * Add 1 additional page in case the address isn't page-aligned.
207 */
208 bootmap_pages = bootmem_bootmap_pages(total_pages);
209
210 start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
211
212 min_low_pfn = MEMORY_START >> PAGE_SHIFT;
213 boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
214
215 /* Place all memblock_regions in the same node and merge contiguous 201 /* Place all memblock_regions in the same node and merge contiguous
216 * memblock_regions 202 * memblock_regions
217 */ 203 */
218 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); 204 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
219 205
220 /* Add all physical memory to the bootmem map, mark each area
221 * present.
222 */
223#ifdef CONFIG_HIGHMEM
224 free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
225
226 /* reserve the sections we're already using */
227 for_each_memblock(reserved, reg) {
228 unsigned long top = reg->base + reg->size - 1;
229 if (top < lowmem_end_addr)
230 reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
231 else if (reg->base < lowmem_end_addr) {
232 unsigned long trunc_size = lowmem_end_addr - reg->base;
233 reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
234 }
235 }
236#else
237 free_bootmem_with_active_regions(0, max_pfn);
238
239 /* reserve the sections we're already using */
240 for_each_memblock(reserved, reg)
241 reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
242#endif
243 /* XXX need to clip this if using highmem? */ 206 /* XXX need to clip this if using highmem? */
244 sparse_memory_present_with_active_regions(0); 207 sparse_memory_present_with_active_regions(0);
245 208 sparse_init();
246 init_bootmem_done = 1;
247} 209}
248 210
249/* mark pages that don't exist as nosave */ 211/* mark pages that don't exist as nosave */
@@ -359,14 +321,6 @@ void __init paging_init(void)
359 mark_nonram_nosave(); 321 mark_nonram_nosave();
360} 322}
361 323
362static void __init register_page_bootmem_info(void)
363{
364 int i;
365
366 for_each_online_node(i)
367 register_page_bootmem_info_node(NODE_DATA(i));
368}
369
370void __init mem_init(void) 324void __init mem_init(void)
371{ 325{
372 /* 326 /*
@@ -379,7 +333,6 @@ void __init mem_init(void)
379 swiotlb_init(0); 333 swiotlb_init(0);
380#endif 334#endif
381 335
382 register_page_bootmem_info();
383 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 336 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
384 set_max_mapnr(max_pfn); 337 set_max_mapnr(max_pfn);
385 free_all_bootmem(); 338 free_all_bootmem();
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 928ebe79668b..9cba6cba2e50 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -421,12 +421,12 @@ void __init mmu_context_init(void)
421 /* 421 /*
422 * Allocate the maps used by context management 422 * Allocate the maps used by context management
423 */ 423 */
424 context_map = alloc_bootmem(CTX_MAP_SIZE); 424 context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0);
425 context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); 425 context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0);
426#ifndef CONFIG_SMP 426#ifndef CONFIG_SMP
427 stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); 427 stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
428#else 428#else
429 stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); 429 stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
430 430
431 register_cpu_notifier(&mmu_context_cpu_nb); 431 register_cpu_notifier(&mmu_context_cpu_nb);
432#endif 432#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9615d82919b8..78c45f392f5b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
67{ 67{
68 __tlbil_va(address, pid); 68 __tlbil_va(address, pid);
69} 69}
70#endif /* CONIFG_8xx */ 70#endif /* CONFIG_8xx */
71 71
72#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) 72#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
73extern void _tlbivax_bcast(unsigned long address, unsigned int pid, 73extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fe6002c1d5a..0257a7d659ef 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
134 return 0; 134 return 0;
135} 135}
136 136
137/*
138 * get_node_active_region - Return active region containing pfn
139 * Active range returned is empty if none found.
140 * @pfn: The page to return the region for
141 * @node_ar: Returned set to the active region containing @pfn
142 */
143static void __init get_node_active_region(unsigned long pfn,
144 struct node_active_region *node_ar)
145{
146 unsigned long start_pfn, end_pfn;
147 int i, nid;
148
149 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
150 if (pfn >= start_pfn && pfn < end_pfn) {
151 node_ar->nid = nid;
152 node_ar->start_pfn = start_pfn;
153 node_ar->end_pfn = end_pfn;
154 break;
155 }
156 }
157}
158
159static void reset_numa_cpu_lookup_table(void) 137static void reset_numa_cpu_lookup_table(void)
160{ 138{
161 unsigned int cpu; 139 unsigned int cpu;
@@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void)
928 } 906 }
929} 907}
930 908
931/*
932 * Allocate some memory, satisfying the memblock or bootmem allocator where
933 * required. nid is the preferred node and end is the physical address of
934 * the highest address in the node.
935 *
936 * Returns the virtual address of the memory.
937 */
938static void __init *careful_zallocation(int nid, unsigned long size,
939 unsigned long align,
940 unsigned long end_pfn)
941{
942 void *ret;
943 int new_nid;
944 unsigned long ret_paddr;
945
946 ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
947
948 /* retry over all memory */
949 if (!ret_paddr)
950 ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
951
952 if (!ret_paddr)
953 panic("numa.c: cannot allocate %lu bytes for node %d",
954 size, nid);
955
956 ret = __va(ret_paddr);
957
958 /*
959 * We initialize the nodes in numeric order: 0, 1, 2...
960 * and hand over control from the MEMBLOCK allocator to the
961 * bootmem allocator. If this function is called for
962 * node 5, then we know that all nodes <5 are using the
963 * bootmem allocator instead of the MEMBLOCK allocator.
964 *
965 * So, check the nid from which this allocation came
966 * and double check to see if we need to use bootmem
967 * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
968 * since it would be useless.
969 */
970 new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
971 if (new_nid < nid) {
972 ret = __alloc_bootmem_node(NODE_DATA(new_nid),
973 size, align, 0);
974
975 dbg("alloc_bootmem %p %lx\n", ret, size);
976 }
977
978 memset(ret, 0, size);
979 return ret;
980}
981
982static struct notifier_block ppc64_numa_nb = { 909static struct notifier_block ppc64_numa_nb = {
983 .notifier_call = cpu_numa_callback, 910 .notifier_call = cpu_numa_callback,
984 .priority = 1 /* Must run before sched domains notifier. */ 911 .priority = 1 /* Must run before sched domains notifier. */
985}; 912};
986 913
987static void __init mark_reserved_regions_for_nid(int nid) 914/* Initialize NODE_DATA for a node on the local memory */
915static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
988{ 916{
989 struct pglist_data *node = NODE_DATA(nid); 917 u64 spanned_pages = end_pfn - start_pfn;
990 struct memblock_region *reg; 918 const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
991 919 u64 nd_pa;
992 for_each_memblock(reserved, reg) { 920 void *nd;
993 unsigned long physbase = reg->base; 921 int tnid;
994 unsigned long size = reg->size; 922
995 unsigned long start_pfn = physbase >> PAGE_SHIFT; 923 if (spanned_pages)
996 unsigned long end_pfn = PFN_UP(physbase + size); 924 pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
997 struct node_active_region node_ar; 925 nid, start_pfn << PAGE_SHIFT,
998 unsigned long node_end_pfn = pgdat_end_pfn(node); 926 (end_pfn << PAGE_SHIFT) - 1);
999 927 else
1000 /* 928 pr_info("Initmem setup node %d\n", nid);
1001 * Check to make sure that this memblock.reserved area is 929
1002 * within the bounds of the node that we care about. 930 nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
1003 * Checking the nid of the start and end points is not 931 nd = __va(nd_pa);
1004 * sufficient because the reserved area could span the 932
1005 * entire node. 933 /* report and initialize */
1006 */ 934 pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
1007 if (end_pfn <= node->node_start_pfn || 935 nd_pa, nd_pa + nd_size - 1);
1008 start_pfn >= node_end_pfn) 936 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
1009 continue; 937 if (tnid != nid)
1010 938 pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
1011 get_node_active_region(start_pfn, &node_ar); 939
1012 while (start_pfn < end_pfn && 940 node_data[nid] = nd;
1013 node_ar.start_pfn < node_ar.end_pfn) { 941 memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
1014 unsigned long reserve_size = size; 942 NODE_DATA(nid)->node_id = nid;
1015 /* 943 NODE_DATA(nid)->node_start_pfn = start_pfn;
1016 * if reserved region extends past active region 944 NODE_DATA(nid)->node_spanned_pages = spanned_pages;
1017 * then trim size to active region
1018 */
1019 if (end_pfn > node_ar.end_pfn)
1020 reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
1021 - physbase;
1022 /*
1023 * Only worry about *this* node, others may not
1024 * yet have valid NODE_DATA().
1025 */
1026 if (node_ar.nid == nid) {
1027 dbg("reserve_bootmem %lx %lx nid=%d\n",
1028 physbase, reserve_size, node_ar.nid);
1029 reserve_bootmem_node(NODE_DATA(node_ar.nid),
1030 physbase, reserve_size,
1031 BOOTMEM_DEFAULT);
1032 }
1033 /*
1034 * if reserved region is contained in the active region
1035 * then done.
1036 */
1037 if (end_pfn <= node_ar.end_pfn)
1038 break;
1039
1040 /*
1041 * reserved region extends past the active region
1042 * get next active region that contains this
1043 * reserved region
1044 */
1045 start_pfn = node_ar.end_pfn;
1046 physbase = start_pfn << PAGE_SHIFT;
1047 size = size - reserve_size;
1048 get_node_active_region(start_pfn, &node_ar);
1049 }
1050 }
1051} 945}
1052 946
1053 947void __init initmem_init(void)
1054void __init do_init_bootmem(void)
1055{ 948{
1056 int nid, cpu; 949 int nid, cpu;
1057 950
1058 min_low_pfn = 0;
1059 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 951 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
1060 max_pfn = max_low_pfn; 952 max_pfn = max_low_pfn;
1061 953
@@ -1064,64 +956,18 @@ void __init do_init_bootmem(void)
1064 else 956 else
1065 dump_numa_memory_topology(); 957 dump_numa_memory_topology();
1066 958
959 memblock_dump_all();
960
1067 for_each_online_node(nid) { 961 for_each_online_node(nid) {
1068 unsigned long start_pfn, end_pfn; 962 unsigned long start_pfn, end_pfn;
1069 void *bootmem_vaddr;
1070 unsigned long bootmap_pages;
1071 963
1072 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 964 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
1073 965 setup_node_data(nid, start_pfn, end_pfn);
1074 /*
1075 * Allocate the node structure node local if possible
1076 *
1077 * Be careful moving this around, as it relies on all
1078 * previous nodes' bootmem to be initialized and have
1079 * all reserved areas marked.
1080 */
1081 NODE_DATA(nid) = careful_zallocation(nid,
1082 sizeof(struct pglist_data),
1083 SMP_CACHE_BYTES, end_pfn);
1084
1085 dbg("node %d\n", nid);
1086 dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
1087
1088 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
1089 NODE_DATA(nid)->node_start_pfn = start_pfn;
1090 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
1091
1092 if (NODE_DATA(nid)->node_spanned_pages == 0)
1093 continue;
1094
1095 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
1096 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
1097
1098 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
1099 bootmem_vaddr = careful_zallocation(nid,
1100 bootmap_pages << PAGE_SHIFT,
1101 PAGE_SIZE, end_pfn);
1102
1103 dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
1104
1105 init_bootmem_node(NODE_DATA(nid),
1106 __pa(bootmem_vaddr) >> PAGE_SHIFT,
1107 start_pfn, end_pfn);
1108
1109 free_bootmem_with_active_regions(nid, end_pfn);
1110 /*
1111 * Be very careful about moving this around. Future
1112 * calls to careful_zallocation() depend on this getting
1113 * done correctly.
1114 */
1115 mark_reserved_regions_for_nid(nid);
1116 sparse_memory_present_with_active_regions(nid); 966 sparse_memory_present_with_active_regions(nid);
1117 } 967 }
1118 968
1119 init_bootmem_done = 1; 969 sparse_init();
1120 970
1121 /*
1122 * Now bootmem is initialised we can create the node to cpumask
1123 * lookup tables and setup the cpu callback to populate them.
1124 */
1125 setup_node_to_cpumask_map(); 971 setup_node_to_cpumask_map();
1126 972
1127 reset_numa_cpu_lookup_table(); 973 reset_numa_cpu_lookup_table();
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cf11342bf519..d545b1231594 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
100{ 100{
101 pte_t *pte; 101 pte_t *pte;
102 extern int mem_init_done; 102 extern int mem_init_done;
103 extern void *early_get_page(void);
104 103
105 if (mem_init_done) { 104 if (mem_init_done) {
106 pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 105 pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
107 } else { 106 } else {
108 pte = (pte_t *)early_get_page(); 107 pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
109 if (pte) 108 if (pte)
110 clear_page(pte); 109 clear_page(pte);
111 } 110 }
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c8d709ab489d..4fe5f64cc179 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -33,9 +33,9 @@
33#include <linux/swap.h> 33#include <linux/swap.h>
34#include <linux/stddef.h> 34#include <linux/stddef.h>
35#include <linux/vmalloc.h> 35#include <linux/vmalloc.h>
36#include <linux/bootmem.h>
37#include <linux/memblock.h> 36#include <linux/memblock.h>
38#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/hugetlb.h>
39 39
40#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
41#include <asm/page.h> 41#include <asm/page.h>
@@ -51,6 +51,7 @@
51#include <asm/cputable.h> 51#include <asm/cputable.h>
52#include <asm/sections.h> 52#include <asm/sections.h>
53#include <asm/firmware.h> 53#include <asm/firmware.h>
54#include <asm/dma.h>
54 55
55#include "mmu_decl.h" 56#include "mmu_decl.h"
56 57
@@ -75,11 +76,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
75{ 76{
76 void *pt; 77 void *pt;
77 78
78 if (init_bootmem_done) 79 pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
79 pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
80 else
81 pt = __va(memblock_alloc_base(size, size,
82 __pa(MAX_DMA_ADDRESS)));
83 memset(pt, 0, size); 80 memset(pt, 0, size);
84 81
85 return pt; 82 return pt;
@@ -113,10 +110,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
113 __pgprot(flags))); 110 __pgprot(flags)));
114 } else { 111 } else {
115#ifdef CONFIG_PPC_MMU_NOHASH 112#ifdef CONFIG_PPC_MMU_NOHASH
116 /* Warning ! This will blow up if bootmem is not initialized
117 * which our ppc64 code is keen to do that, we'll need to
118 * fix it and/or be more careful
119 */
120 pgdp = pgd_offset_k(ea); 113 pgdp = pgd_offset_k(ea);
121#ifdef PUD_TABLE_SIZE 114#ifdef PUD_TABLE_SIZE
122 if (pgd_none(*pgdp)) { 115 if (pgd_none(*pgdp)) {
@@ -352,16 +345,31 @@ EXPORT_SYMBOL(iounmap);
352EXPORT_SYMBOL(__iounmap); 345EXPORT_SYMBOL(__iounmap);
353EXPORT_SYMBOL(__iounmap_at); 346EXPORT_SYMBOL(__iounmap_at);
354 347
348#ifndef __PAGETABLE_PUD_FOLDED
349/* 4 level page table */
350struct page *pgd_page(pgd_t pgd)
351{
352 if (pgd_huge(pgd))
353 return pte_page(pgd_pte(pgd));
354 return virt_to_page(pgd_page_vaddr(pgd));
355}
356#endif
357
358struct page *pud_page(pud_t pud)
359{
360 if (pud_huge(pud))
361 return pte_page(pud_pte(pud));
362 return virt_to_page(pud_page_vaddr(pud));
363}
364
355/* 365/*
356 * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags 366 * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
357 * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. 367 * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
358 */ 368 */
359struct page *pmd_page(pmd_t pmd) 369struct page *pmd_page(pmd_t pmd)
360{ 370{
361#ifdef CONFIG_TRANSPARENT_HUGEPAGE 371 if (pmd_trans_huge(pmd) || pmd_huge(pmd))
362 if (pmd_trans_huge(pmd))
363 return pfn_to_page(pmd_pfn(pmd)); 372 return pfn_to_page(pmd_pfn(pmd));
364#endif
365 return virt_to_page(pmd_page_vaddr(pmd)); 373 return virt_to_page(pmd_page_vaddr(pmd));
366} 374}
367 375
@@ -731,29 +739,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
731void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 739void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
732 pmd_t *pmdp, unsigned long old_pmd) 740 pmd_t *pmdp, unsigned long old_pmd)
733{ 741{
734 int ssize, i; 742 int ssize;
735 unsigned long s_addr; 743 unsigned int psize;
736 int max_hpte_count; 744 unsigned long vsid;
737 unsigned int psize, valid; 745 unsigned long flags = 0;
738 unsigned char *hpte_slot_array; 746 const struct cpumask *tmp;
739 unsigned long hidx, vpn, vsid, hash, shift, slot;
740
741 /*
742 * Flush all the hptes mapping this hugepage
743 */
744 s_addr = addr & HPAGE_PMD_MASK;
745 hpte_slot_array = get_hpte_slot_array(pmdp);
746 /*
747 * IF we try to do a HUGE PTE update after a withdraw is done.
748 * we will find the below NULL. This happens when we do
749 * split_huge_page_pmd
750 */
751 if (!hpte_slot_array)
752 return;
753 747
754 /* get the base page size,vsid and segment size */ 748 /* get the base page size,vsid and segment size */
755#ifdef CONFIG_DEBUG_VM 749#ifdef CONFIG_DEBUG_VM
756 psize = get_slice_psize(mm, s_addr); 750 psize = get_slice_psize(mm, addr);
757 BUG_ON(psize == MMU_PAGE_16M); 751 BUG_ON(psize == MMU_PAGE_16M);
758#endif 752#endif
759 if (old_pmd & _PAGE_COMBO) 753 if (old_pmd & _PAGE_COMBO)
@@ -761,46 +755,20 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
761 else 755 else
762 psize = MMU_PAGE_64K; 756 psize = MMU_PAGE_64K;
763 757
764 if (!is_kernel_addr(s_addr)) { 758 if (!is_kernel_addr(addr)) {
765 ssize = user_segment_size(s_addr); 759 ssize = user_segment_size(addr);
766 vsid = get_vsid(mm->context.id, s_addr, ssize); 760 vsid = get_vsid(mm->context.id, addr, ssize);
767 WARN_ON(vsid == 0); 761 WARN_ON(vsid == 0);
768 } else { 762 } else {
769 vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); 763 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
770 ssize = mmu_kernel_ssize; 764 ssize = mmu_kernel_ssize;
771 } 765 }
772 766
773 if (ppc_md.hugepage_invalidate) 767 tmp = cpumask_of(smp_processor_id());
774 return ppc_md.hugepage_invalidate(vsid, s_addr, 768 if (cpumask_equal(mm_cpumask(mm), tmp))
775 hpte_slot_array, 769 flags |= HPTE_LOCAL_UPDATE;
776 psize, ssize); 770
777 /* 771 return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
778 * No bluk hpte removal support, invalidate each entry
779 */
780 shift = mmu_psize_defs[psize].shift;
781 max_hpte_count = HPAGE_PMD_SIZE >> shift;
782 for (i = 0; i < max_hpte_count; i++) {
783 /*
784 * 8 bits per each hpte entries
785 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
786 */
787 valid = hpte_valid(hpte_slot_array, i);
788 if (!valid)
789 continue;
790 hidx = hpte_hash_index(hpte_slot_array, i);
791
792 /* get the vpn */
793 addr = s_addr + (i * (1ul << shift));
794 vpn = hpt_vpn(addr, vsid, ssize);
795 hash = hpt_hash(vpn, shift, ssize);
796 if (hidx & _PTEIDX_SECONDARY)
797 hash = ~hash;
798
799 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
800 slot += hidx & _PTEIDX_GROUP_IX;
801 ppc_md.hpte_invalidate(slot, vpn, psize,
802 MMU_PAGE_16M, ssize, 0);
803 }
804} 772}
805 773
806static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) 774static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)