aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2005-11-06 19:06:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-06 19:56:47 -0500
commit3c726f8dee6f55e96475574e9f645327e461884c (patch)
treef67c381e8f57959aa4a94bda4c68e24253cd8171 /arch/powerpc/mm
parentf912696ab330bf539231d1f8032320f2a08b850f (diff)
[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c532
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
-rw-r--r--arch/powerpc/mm/init_64.c18
-rw-r--r--arch/powerpc/mm/mem.c56
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S220
-rw-r--r--arch/powerpc/mm/stab.c30
-rw-r--r--arch/powerpc/mm/tlb_64.c32
12 files changed, 1637 insertions, 514 deletions
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eeea..e0d02c4a2615 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * ppc64 MMU hashtable management routines 2 * ppc64 MMU hashtable management routines
3 * 3 *
4 * (c) Copyright IBM Corp. 2003 4 * (c) Copyright IBM Corp. 2003, 2005
5 * 5 *
6 * Maintained by: Benjamin Herrenschmidt 6 * Maintained by: Benjamin Herrenschmidt
7 * <benh@kernel.crashing.org> 7 * <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
10 * described in the kernel's COPYING file. 10 * described in the kernel's COPYING file.
11 */ 11 */
12 12
13#include <linux/config.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/mmu.h> 16#include <asm/mmu.h>
@@ -42,14 +43,24 @@
42/* Save non-volatile offsets */ 43/* Save non-volatile offsets */
43#define STK_REG(i) (112 + ((i)-14)*8) 44#define STK_REG(i) (112 + ((i)-14)*8)
44 45
46
47#ifndef CONFIG_PPC_64K_PAGES
48
49/*****************************************************************************
50 * *
51 * 4K SW & 4K HW pages implementation *
52 * *
53 *****************************************************************************/
54
55
45/* 56/*
46 * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, 57 * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
47 * pte_t *ptep, unsigned long trap, int local) 58 * pte_t *ptep, unsigned long trap, int local)
48 * 59 *
49 * Adds a page to the hash table. This is the non-LPAR version for now 60 * Adds a 4K page to the hash table in a segment of 4K pages only
50 */ 61 */
51 62
52_GLOBAL(__hash_page) 63_GLOBAL(__hash_page_4K)
53 mflr r0 64 mflr r0
54 std r0,16(r1) 65 std r0,16(r1)
55 stdu r1,-STACKFRAMESIZE(r1) 66 stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
88 /* If so, just bail out and refault if needed. Someone else 99 /* If so, just bail out and refault if needed. Someone else
89 * is changing this PTE anyway and might hash it. 100 * is changing this PTE anyway and might hash it.
90 */ 101 */
91 bne- bail_ok 102 bne- htab_bail_ok
103
92 /* Prepare new PTE value (turn access RW into DIRTY, then 104 /* Prepare new PTE value (turn access RW into DIRTY, then
93 * add BUSY,HASHPTE and ACCESSED) 105 * add BUSY,HASHPTE and ACCESSED)
94 */ 106 */
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
118 130
119 /* Convert linux PTE bits into HW equivalents */ 131 /* Convert linux PTE bits into HW equivalents */
120 andi. r3,r30,0x1fe /* Get basic set of flags */ 132 andi. r3,r30,0x1fe /* Get basic set of flags */
121 xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ 133 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
122 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ 134 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
123 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ 135 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
124 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ 136 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
125 andc r0,r30,r0 /* r0 = pte & ~r0 */ 137 andc r0,r30,r0 /* r0 = pte & ~r0 */
126 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ 138 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
127 139
@@ -158,19 +170,21 @@ htab_insert_pte:
158 andc r30,r30,r0 170 andc r30,r30,r0
159 ori r30,r30,_PAGE_HASHPTE 171 ori r30,r30,_PAGE_HASHPTE
160 172
161 /* page number in r5 */ 173 /* physical address r5 */
162 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 174 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
175 sldi r5,r5,PAGE_SHIFT
163 176
164 /* Calculate primary group hash */ 177 /* Calculate primary group hash */
165 and r0,r28,r27 178 and r0,r28,r27
166 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 179 rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
167 180
168 /* Call ppc_md.hpte_insert */ 181 /* Call ppc_md.hpte_insert */
169 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 182 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
170 mr r4,r29 /* Retreive va */ 183 mr r4,r29 /* Retreive va */
171 li r6,0 /* no vflags */ 184 li r7,0 /* !bolted, !secondary */
185 li r8,MMU_PAGE_4K /* page size */
172_GLOBAL(htab_call_hpte_insert1) 186_GLOBAL(htab_call_hpte_insert1)
173 bl . /* Will be patched by htab_finish_init() */ 187 bl . /* Patched by htab_finish_init() */
174 cmpdi 0,r3,0 188 cmpdi 0,r3,0
175 bge htab_pte_insert_ok /* Insertion successful */ 189 bge htab_pte_insert_ok /* Insertion successful */
176 cmpdi 0,r3,-2 /* Critical failure */ 190 cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
178 192
179 /* Now try secondary slot */ 193 /* Now try secondary slot */
180 194
181 /* page number in r5 */ 195 /* physical address r5 */
182 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 196 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
197 sldi r5,r5,PAGE_SHIFT
183 198
184 /* Calculate secondary group hash */ 199 /* Calculate secondary group hash */
185 andc r0,r27,r28 200 andc r0,r27,r28
186 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 201 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
187 202
188 /* Call ppc_md.hpte_insert */ 203 /* Call ppc_md.hpte_insert */
189 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 204 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
190 mr r4,r29 /* Retreive va */ 205 mr r4,r29 /* Retreive va */
191 li r6,HPTE_V_SECONDARY@l /* secondary slot */ 206 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
207 li r8,MMU_PAGE_4K /* page size */
192_GLOBAL(htab_call_hpte_insert2) 208_GLOBAL(htab_call_hpte_insert2)
193 bl . /* Will be patched by htab_finish_init() */ 209 bl . /* Patched by htab_finish_init() */
194 cmpdi 0,r3,0 210 cmpdi 0,r3,0
195 bge+ htab_pte_insert_ok /* Insertion successful */ 211 bge+ htab_pte_insert_ok /* Insertion successful */
196 cmpdi 0,r3,-2 /* Critical failure */ 212 cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
207 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 223 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
208 /* Call ppc_md.hpte_remove */ 224 /* Call ppc_md.hpte_remove */
209_GLOBAL(htab_call_hpte_remove) 225_GLOBAL(htab_call_hpte_remove)
210 bl . /* Will be patched by htab_finish_init() */ 226 bl . /* Patched by htab_finish_init() */
211 227
212 /* Try all again */ 228 /* Try all again */
213 b htab_insert_pte 229 b htab_insert_pte
214 230
215bail_ok: 231htab_bail_ok:
216 li r3,0 232 li r3,0
217 b bail 233 b htab_bail
218 234
219htab_pte_insert_ok: 235htab_pte_insert_ok:
220 /* Insert slot number & secondary bit in PTE */ 236 /* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
227 ld r6,STK_PARM(r6)(r1) 243 ld r6,STK_PARM(r6)(r1)
228 std r30,0(r6) 244 std r30,0(r6)
229 li r3, 0 245 li r3, 0
230bail: 246htab_bail:
231 ld r27,STK_REG(r27)(r1) 247 ld r27,STK_REG(r27)(r1)
232 ld r28,STK_REG(r28)(r1) 248 ld r28,STK_REG(r28)(r1)
233 ld r29,STK_REG(r29)(r1) 249 ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
256 272
257 /* Call ppc_md.hpte_updatepp */ 273 /* Call ppc_md.hpte_updatepp */
258 mr r5,r29 /* va */ 274 mr r5,r29 /* va */
259 li r6,0 /* large is 0 */ 275 li r6,MMU_PAGE_4K /* page size */
260 ld r7,STK_PARM(r8)(r1) /* get "local" param */ 276 ld r7,STK_PARM(r8)(r1) /* get "local" param */
261_GLOBAL(htab_call_hpte_updatepp) 277_GLOBAL(htab_call_hpte_updatepp)
262 bl . /* Will be patched by htab_finish_init() */ 278 bl . /* Patched by htab_finish_init() */
263 279
264 /* if we failed because typically the HPTE wasn't really here 280 /* if we failed because typically the HPTE wasn't really here
265 * we try an insertion. 281 * we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
276 /* Bail out clearing reservation */ 292 /* Bail out clearing reservation */
277 stdcx. r31,0,r6 293 stdcx. r31,0,r6
278 li r3,1 294 li r3,1
279 b bail 295 b htab_bail
296
297htab_pte_insert_failure:
298 /* Bail out restoring old PTE */
299 ld r6,STK_PARM(r6)(r1)
300 std r31,0(r6)
301 li r3,-1
302 b htab_bail
303
304
305#else /* CONFIG_PPC_64K_PAGES */
306
307
308/*****************************************************************************
309 * *
310 * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
311 * *
312 *****************************************************************************/
313
314/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
315 * pte_t *ptep, unsigned long trap, int local)
316 */
317
318/*
319 * For now, we do NOT implement Admixed pages
320 */
321_GLOBAL(__hash_page_4K)
322 mflr r0
323 std r0,16(r1)
324 stdu r1,-STACKFRAMESIZE(r1)
325 /* Save all params that we need after a function call */
326 std r6,STK_PARM(r6)(r1)
327 std r8,STK_PARM(r8)(r1)
328
329 /* Add _PAGE_PRESENT to access */
330 ori r4,r4,_PAGE_PRESENT
331
332 /* Save non-volatile registers.
333 * r31 will hold "old PTE"
334 * r30 is "new PTE"
335 * r29 is "va"
336 * r28 is a hash value
337 * r27 is hashtab mask (maybe dynamic patched instead ?)
338 * r26 is the hidx mask
339 * r25 is the index in combo page
340 */
341 std r25,STK_REG(r25)(r1)
342 std r26,STK_REG(r26)(r1)
343 std r27,STK_REG(r27)(r1)
344 std r28,STK_REG(r28)(r1)
345 std r29,STK_REG(r29)(r1)
346 std r30,STK_REG(r30)(r1)
347 std r31,STK_REG(r31)(r1)
348
349 /* Step 1:
350 *
351 * Check permissions, atomically mark the linux PTE busy
352 * and hashed.
353 */
3541:
355 ldarx r31,0,r6
356 /* Check access rights (access & ~(pte_val(*ptep))) */
357 andc. r0,r4,r31
358 bne- htab_wrong_access
359 /* Check if PTE is busy */
360 andi. r0,r31,_PAGE_BUSY
361 /* If so, just bail out and refault if needed. Someone else
362 * is changing this PTE anyway and might hash it.
363 */
364 bne- htab_bail_ok
365 /* Prepare new PTE value (turn access RW into DIRTY, then
366 * add BUSY and ACCESSED)
367 */
368 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
369 or r30,r30,r31
370 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
371 /* Write the linux PTE atomically (setting busy) */
372 stdcx. r30,0,r6
373 bne- 1b
374 isync
375
376 /* Step 2:
377 *
378 * Insert/Update the HPTE in the hash table. At this point,
379 * r4 (access) is re-useable, we use it for the new HPTE flags
380 */
381
382 /* Load the hidx index */
383 rldicl r25,r3,64-12,60
384
385 /* Calc va and put it in r29 */
386 rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
387 rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
388 or r29,r3,r29 /* r29 = va
389
390 /* Calculate hash value for primary slot and store it in r28 */
391 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
392 rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
393 xor r28,r5,r0
394
395 /* Convert linux PTE bits into HW equivalents */
396 andi. r3,r30,0x1fe /* Get basic set of flags */
397 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
398 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
399 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
400 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
401 andc r0,r30,r0 /* r0 = pte & ~r0 */
402 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
403
404 /* We eventually do the icache sync here (maybe inline that
405 * code rather than call a C function...)
406 */
407BEGIN_FTR_SECTION
408 mr r4,r30
409 mr r5,r7
410 bl .hash_page_do_lazy_icache
411END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
412
413 /* At this point, r3 contains new PP bits, save them in
414 * place of "access" in the param area (sic)
415 */
416 std r3,STK_PARM(r4)(r1)
417
418 /* Get htab_hash_mask */
419 ld r4,htab_hash_mask@got(2)
420 ld r27,0(r4) /* htab_hash_mask -> r27 */
421
422 /* Check if we may already be in the hashtable, in this case, we
423 * go to out-of-line code to try to modify the HPTE. We look for
424 * the bit at (1 >> (index + 32))
425 */
426 andi. r0,r31,_PAGE_HASHPTE
427 li r26,0 /* Default hidx */
428 beq htab_insert_pte
429 ld r6,STK_PARM(r6)(r1)
430 ori r26,r6,0x8000 /* Load the hidx mask */
431 ld r26,0(r26)
432 addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
433 rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
434 bne htab_modify_pte
435
436htab_insert_pte:
437 /* real page number in r5, PTE RPN value + index */
438 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
439 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
440 add r5,r5,r25
441 sldi r5,r5,HW_PAGE_SHIFT
442
443 /* Calculate primary group hash */
444 and r0,r28,r27
445 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
446
447 /* Call ppc_md.hpte_insert */
448 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
449 mr r4,r29 /* Retreive va */
450 li r7,0 /* !bolted, !secondary */
451 li r8,MMU_PAGE_4K /* page size */
452_GLOBAL(htab_call_hpte_insert1)
453 bl . /* patched by htab_finish_init() */
454 cmpdi 0,r3,0
455 bge htab_pte_insert_ok /* Insertion successful */
456 cmpdi 0,r3,-2 /* Critical failure */
457 beq- htab_pte_insert_failure
458
459 /* Now try secondary slot */
460
461 /* real page number in r5, PTE RPN value + index */
462 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
463 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
464 add r5,r5,r25
465 sldi r5,r5,HW_PAGE_SHIFT
466
467 /* Calculate secondary group hash */
468 andc r0,r27,r28
469 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
470
471 /* Call ppc_md.hpte_insert */
472 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
473 mr r4,r29 /* Retreive va */
474 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
475 li r8,MMU_PAGE_4K /* page size */
476_GLOBAL(htab_call_hpte_insert2)
477 bl . /* patched by htab_finish_init() */
478 cmpdi 0,r3,0
479 bge+ htab_pte_insert_ok /* Insertion successful */
480 cmpdi 0,r3,-2 /* Critical failure */
481 beq- htab_pte_insert_failure
482
483 /* Both are full, we need to evict something */
484 mftb r0
485 /* Pick a random group based on TB */
486 andi. r0,r0,1
487 mr r5,r28
488 bne 2f
489 not r5,r5
4902: and r0,r5,r27
491 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
492 /* Call ppc_md.hpte_remove */
493_GLOBAL(htab_call_hpte_remove)
494 bl . /* patched by htab_finish_init() */
495
496 /* Try all again */
497 b htab_insert_pte
498
499htab_bail_ok:
500 li r3,0
501 b htab_bail
502
503htab_pte_insert_ok:
504 /* Insert slot number & secondary bit in PTE second half,
505 * clear _PAGE_BUSY and set approriate HPTE slot bit
506 */
507 ld r6,STK_PARM(r6)(r1)
508 li r0,_PAGE_BUSY
509 andc r30,r30,r0
510 /* HPTE SUB bit */
511 li r0,1
512 subfic r5,r25,27 /* Must match bit position in */
513 sld r0,r0,r5 /* pgtable.h */
514 or r30,r30,r0
515 /* hindx */
516 sldi r5,r25,2
517 sld r3,r3,r5
518 li r4,0xf
519 sld r4,r4,r5
520 andc r26,r26,r4
521 or r26,r26,r3
522 ori r5,r6,0x8000
523 std r26,0(r5)
524 lwsync
525 std r30,0(r6)
526 li r3, 0
527htab_bail:
528 ld r25,STK_REG(r25)(r1)
529 ld r26,STK_REG(r26)(r1)
530 ld r27,STK_REG(r27)(r1)
531 ld r28,STK_REG(r28)(r1)
532 ld r29,STK_REG(r29)(r1)
533 ld r30,STK_REG(r30)(r1)
534 ld r31,STK_REG(r31)(r1)
535 addi r1,r1,STACKFRAMESIZE
536 ld r0,16(r1)
537 mtlr r0
538 blr
539
540htab_modify_pte:
541 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
542 mr r4,r3
543 sldi r5,r25,2
544 srd r3,r26,r5
545
546 /* Secondary group ? if yes, get a inverted hash value */
547 mr r5,r28
548 andi. r0,r3,0x8 /* page secondary ? */
549 beq 1f
550 not r5,r5
5511: andi. r3,r3,0x7 /* extract idx alone */
552
553 /* Calculate proper slot value for ppc_md.hpte_updatepp */
554 and r0,r5,r27
555 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
556 add r3,r0,r3 /* add slot idx */
557
558 /* Call ppc_md.hpte_updatepp */
559 mr r5,r29 /* va */
560 li r6,MMU_PAGE_4K /* page size */
561 ld r7,STK_PARM(r8)(r1) /* get "local" param */
562_GLOBAL(htab_call_hpte_updatepp)
563 bl . /* patched by htab_finish_init() */
564
565 /* if we failed because typically the HPTE wasn't really here
566 * we try an insertion.
567 */
568 cmpdi 0,r3,-1
569 beq- htab_insert_pte
570
571 /* Clear the BUSY bit and Write out the PTE */
572 li r0,_PAGE_BUSY
573 andc r30,r30,r0
574 ld r6,STK_PARM(r6)(r1)
575 std r30,0(r6)
576 li r3,0
577 b htab_bail
578
579htab_wrong_access:
580 /* Bail out clearing reservation */
581 stdcx. r31,0,r6
582 li r3,1
583 b htab_bail
280 584
281htab_pte_insert_failure: 585htab_pte_insert_failure:
282 /* Bail out restoring old PTE */ 586 /* Bail out restoring old PTE */
283 ld r6,STK_PARM(r6)(r1) 587 ld r6,STK_PARM(r6)(r1)
284 std r31,0(r6) 588 std r31,0(r6)
285 li r3,-1 589 li r3,-1
286 b bail 590 b htab_bail
591
592
593/*****************************************************************************
594 * *
595 * 64K SW & 64K HW in a 64K segment pages implementation *
596 * *
597 *****************************************************************************/
598
599_GLOBAL(__hash_page_64K)
600 mflr r0
601 std r0,16(r1)
602 stdu r1,-STACKFRAMESIZE(r1)
603 /* Save all params that we need after a function call */
604 std r6,STK_PARM(r6)(r1)
605 std r8,STK_PARM(r8)(r1)
606
607 /* Add _PAGE_PRESENT to access */
608 ori r4,r4,_PAGE_PRESENT
609
610 /* Save non-volatile registers.
611 * r31 will hold "old PTE"
612 * r30 is "new PTE"
613 * r29 is "va"
614 * r28 is a hash value
615 * r27 is hashtab mask (maybe dynamic patched instead ?)
616 */
617 std r27,STK_REG(r27)(r1)
618 std r28,STK_REG(r28)(r1)
619 std r29,STK_REG(r29)(r1)
620 std r30,STK_REG(r30)(r1)
621 std r31,STK_REG(r31)(r1)
622
623 /* Step 1:
624 *
625 * Check permissions, atomically mark the linux PTE busy
626 * and hashed.
627 */
6281:
629 ldarx r31,0,r6
630 /* Check access rights (access & ~(pte_val(*ptep))) */
631 andc. r0,r4,r31
632 bne- ht64_wrong_access
633 /* Check if PTE is busy */
634 andi. r0,r31,_PAGE_BUSY
635 /* If so, just bail out and refault if needed. Someone else
636 * is changing this PTE anyway and might hash it.
637 */
638 bne- ht64_bail_ok
639 /* Prepare new PTE value (turn access RW into DIRTY, then
640 * add BUSY,HASHPTE and ACCESSED)
641 */
642 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
643 or r30,r30,r31
644 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
645 /* Write the linux PTE atomically (setting busy) */
646 stdcx. r30,0,r6
647 bne- 1b
648 isync
649
650 /* Step 2:
651 *
652 * Insert/Update the HPTE in the hash table. At this point,
653 * r4 (access) is re-useable, we use it for the new HPTE flags
654 */
655
656 /* Calc va and put it in r29 */
657 rldicr r29,r5,28,63-28
658 rldicl r3,r3,0,36
659 or r29,r3,r29
660
661 /* Calculate hash value for primary slot and store it in r28 */
662 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
663 rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
664 xor r28,r5,r0
665
666 /* Convert linux PTE bits into HW equivalents */
667 andi. r3,r30,0x1fe /* Get basic set of flags */
668 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
669 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
670 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
671 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
672 andc r0,r30,r0 /* r0 = pte & ~r0 */
673 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
674
675 /* We eventually do the icache sync here (maybe inline that
676 * code rather than call a C function...)
677 */
678BEGIN_FTR_SECTION
679 mr r4,r30
680 mr r5,r7
681 bl .hash_page_do_lazy_icache
682END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
683
684 /* At this point, r3 contains new PP bits, save them in
685 * place of "access" in the param area (sic)
686 */
687 std r3,STK_PARM(r4)(r1)
688
689 /* Get htab_hash_mask */
690 ld r4,htab_hash_mask@got(2)
691 ld r27,0(r4) /* htab_hash_mask -> r27 */
692
693 /* Check if we may already be in the hashtable, in this case, we
694 * go to out-of-line code to try to modify the HPTE
695 */
696 andi. r0,r31,_PAGE_HASHPTE
697 bne ht64_modify_pte
698
699ht64_insert_pte:
700 /* Clear hpte bits in new pte (we also clear BUSY btw) and
701 * add _PAGE_HASHPTE
702 */
703 lis r0,_PAGE_HPTEFLAGS@h
704 ori r0,r0,_PAGE_HPTEFLAGS@l
705 andc r30,r30,r0
706 ori r30,r30,_PAGE_HASHPTE
707
708 /* Phyical address in r5 */
709 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
710 sldi r5,r5,PAGE_SHIFT
711
712 /* Calculate primary group hash */
713 and r0,r28,r27
714 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
715
716 /* Call ppc_md.hpte_insert */
717 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
718 mr r4,r29 /* Retreive va */
719 li r7,0 /* !bolted, !secondary */
720 li r8,MMU_PAGE_64K
721_GLOBAL(ht64_call_hpte_insert1)
722 bl . /* patched by htab_finish_init() */
723 cmpdi 0,r3,0
724 bge ht64_pte_insert_ok /* Insertion successful */
725 cmpdi 0,r3,-2 /* Critical failure */
726 beq- ht64_pte_insert_failure
727
728 /* Now try secondary slot */
729
730 /* Phyical address in r5 */
731 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
732 sldi r5,r5,PAGE_SHIFT
733
734 /* Calculate secondary group hash */
735 andc r0,r27,r28
736 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
737
738 /* Call ppc_md.hpte_insert */
739 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
740 mr r4,r29 /* Retreive va */
741 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
742 li r8,MMU_PAGE_64K
743_GLOBAL(ht64_call_hpte_insert2)
744 bl . /* patched by htab_finish_init() */
745 cmpdi 0,r3,0
746 bge+ ht64_pte_insert_ok /* Insertion successful */
747 cmpdi 0,r3,-2 /* Critical failure */
748 beq- ht64_pte_insert_failure
749
750 /* Both are full, we need to evict something */
751 mftb r0
752 /* Pick a random group based on TB */
753 andi. r0,r0,1
754 mr r5,r28
755 bne 2f
756 not r5,r5
7572: and r0,r5,r27
758 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
759 /* Call ppc_md.hpte_remove */
760_GLOBAL(ht64_call_hpte_remove)
761 bl . /* patched by htab_finish_init() */
762
763 /* Try all again */
764 b ht64_insert_pte
765
766ht64_bail_ok:
767 li r3,0
768 b ht64_bail
769
770ht64_pte_insert_ok:
771 /* Insert slot number & secondary bit in PTE */
772 rldimi r30,r3,12,63-15
773
774 /* Write out the PTE with a normal write
775 * (maybe add eieio may be good still ?)
776 */
777ht64_write_out_pte:
778 ld r6,STK_PARM(r6)(r1)
779 std r30,0(r6)
780 li r3, 0
781ht64_bail:
782 ld r27,STK_REG(r27)(r1)
783 ld r28,STK_REG(r28)(r1)
784 ld r29,STK_REG(r29)(r1)
785 ld r30,STK_REG(r30)(r1)
786 ld r31,STK_REG(r31)(r1)
787 addi r1,r1,STACKFRAMESIZE
788 ld r0,16(r1)
789 mtlr r0
790 blr
791
792ht64_modify_pte:
793 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
794 mr r4,r3
795 rlwinm r3,r31,32-12,29,31
796
797 /* Secondary group ? if yes, get a inverted hash value */
798 mr r5,r28
799 andi. r0,r31,_PAGE_F_SECOND
800 beq 1f
801 not r5,r5
8021:
803 /* Calculate proper slot value for ppc_md.hpte_updatepp */
804 and r0,r5,r27
805 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
806 add r3,r0,r3 /* add slot idx */
807
808 /* Call ppc_md.hpte_updatepp */
809 mr r5,r29 /* va */
810 li r6,MMU_PAGE_64K
811 ld r7,STK_PARM(r8)(r1) /* get "local" param */
812_GLOBAL(ht64_call_hpte_updatepp)
813 bl . /* patched by htab_finish_init() */
814
815 /* if we failed because typically the HPTE wasn't really here
816 * we try an insertion.
817 */
818 cmpdi 0,r3,-1
819 beq- ht64_insert_pte
820
821 /* Clear the BUSY bit and Write out the PTE */
822 li r0,_PAGE_BUSY
823 andc r30,r30,r0
824 b ht64_write_out_pte
825
826ht64_wrong_access:
827 /* Bail out clearing reservation */
828 stdcx. r31,0,r6
829 li r3,1
830 b ht64_bail
831
832ht64_pte_insert_failure:
833 /* Bail out restoring old PTE */
834 ld r6,STK_PARM(r6)(r1)
835 std r31,0(r6)
836 li r3,-1
837 b ht64_bail
838
839
840#endif /* CONFIG_PPC_64K_PAGES */
287 841
288 842
843/*****************************************************************************
844 * *
845 * Huge pages implementation is in hugetlbpage.c *
846 * *
847 *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c28..d96bcfe4c6f6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12
13#undef DEBUG_LOW
14
12#include <linux/spinlock.h> 15#include <linux/spinlock.h>
13#include <linux/bitops.h> 16#include <linux/bitops.h>
14#include <linux/threads.h> 17#include <linux/threads.h>
@@ -22,11 +25,84 @@
22#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
23#include <asm/tlb.h> 26#include <asm/tlb.h>
24#include <asm/cputable.h> 27#include <asm/cputable.h>
28#include <asm/udbg.h>
29
30#ifdef DEBUG_LOW
31#define DBG_LOW(fmt...) udbg_printf(fmt)
32#else
33#define DBG_LOW(fmt...)
34#endif
25 35
26#define HPTE_LOCK_BIT 3 36#define HPTE_LOCK_BIT 3
27 37
28static DEFINE_SPINLOCK(native_tlbie_lock); 38static DEFINE_SPINLOCK(native_tlbie_lock);
29 39
40static inline void __tlbie(unsigned long va, unsigned int psize)
41{
42 unsigned int penc;
43
44 /* clear top 16 bits, non SLS segment */
45 va &= ~(0xffffULL << 48);
46
47 switch (psize) {
48 case MMU_PAGE_4K:
49 va &= ~0xffful;
50 asm volatile("tlbie %0,0" : : "r" (va) : "memory");
51 break;
52 default:
53 penc = mmu_psize_defs[psize].penc;
54 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
55 va |= (0x7f >> (8 - penc)) << 12;
56 asm volatile("tlbie %0,1" : : "r" (va) : "memory");
57 break;
58 }
59}
60
61static inline void __tlbiel(unsigned long va, unsigned int psize)
62{
63 unsigned int penc;
64
65 /* clear top 16 bits, non SLS segment */
66 va &= ~(0xffffULL << 48);
67
68 switch (psize) {
69 case MMU_PAGE_4K:
70 va &= ~0xffful;
71 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
72 : : "r"(va) : "memory");
73 break;
74 default:
75 penc = mmu_psize_defs[psize].penc;
76 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
77 va |= (0x7f >> (8 - penc)) << 12;
78 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
79 : : "r"(va) : "memory");
80 break;
81 }
82
83}
84
85static inline void tlbie(unsigned long va, int psize, int local)
86{
87 unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
88 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
89
90 if (use_local)
91 use_local = mmu_psize_defs[psize].tlbiel;
92 if (lock_tlbie && !use_local)
93 spin_lock(&native_tlbie_lock);
94 asm volatile("ptesync": : :"memory");
95 if (use_local) {
96 __tlbiel(va, psize);
97 asm volatile("ptesync": : :"memory");
98 } else {
99 __tlbie(va, psize);
100 asm volatile("eieio; tlbsync; ptesync": : :"memory");
101 }
102 if (lock_tlbie && !use_local)
103 spin_unlock(&native_tlbie_lock);
104}
105
30static inline void native_lock_hpte(hpte_t *hptep) 106static inline void native_lock_hpte(hpte_t *hptep)
31{ 107{
32 unsigned long *word = &hptep->v; 108 unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
48} 124}
49 125
50long native_hpte_insert(unsigned long hpte_group, unsigned long va, 126long native_hpte_insert(unsigned long hpte_group, unsigned long va,
51 unsigned long prpn, unsigned long vflags, 127 unsigned long pa, unsigned long rflags,
52 unsigned long rflags) 128 unsigned long vflags, int psize)
53{ 129{
54 hpte_t *hptep = htab_address + hpte_group; 130 hpte_t *hptep = htab_address + hpte_group;
55 unsigned long hpte_v, hpte_r; 131 unsigned long hpte_v, hpte_r;
56 int i; 132 int i;
57 133
134 if (!(vflags & HPTE_V_BOLTED)) {
135 DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
136 " rflags=%lx, vflags=%lx, psize=%d)\n",
137 hpte_group, va, pa, rflags, vflags, psize);
138 }
139
58 for (i = 0; i < HPTES_PER_GROUP; i++) { 140 for (i = 0; i < HPTES_PER_GROUP; i++) {
59 if (! (hptep->v & HPTE_V_VALID)) { 141 if (! (hptep->v & HPTE_V_VALID)) {
60 /* retry with lock held */ 142 /* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
70 if (i == HPTES_PER_GROUP) 152 if (i == HPTES_PER_GROUP)
71 return -1; 153 return -1;
72 154
73 hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; 155 hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
74 if (vflags & HPTE_V_LARGE) 156 hpte_r = hpte_encode_r(pa, psize) | rflags;
75 va &= ~(1UL << HPTE_V_AVPN_SHIFT); 157
76 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; 158 if (!(vflags & HPTE_V_BOLTED)) {
159 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
160 i, hpte_v, hpte_r);
161 }
77 162
78 hptep->r = hpte_r; 163 hptep->r = hpte_r;
79 /* Guarantee the second dword is visible before the valid bit */ 164 /* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
96 int slot_offset; 181 int slot_offset;
97 unsigned long hpte_v; 182 unsigned long hpte_v;
98 183
184 DBG_LOW(" remove(group=%lx)\n", hpte_group);
185
99 /* pick a random entry to start at */ 186 /* pick a random entry to start at */
100 slot_offset = mftb() & 0x7; 187 slot_offset = mftb() & 0x7;
101 188
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
126 return i; 213 return i;
127} 214}
128 215
129static inline void set_pp_bit(unsigned long pp, hpte_t *addr) 216static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
217 unsigned long va, int psize, int local)
130{ 218{
131 unsigned long old; 219 hpte_t *hptep = htab_address + slot;
132 unsigned long *p = &addr->r; 220 unsigned long hpte_v, want_v;
133 221 int ret = 0;
134 __asm__ __volatile__( 222
135 "1: ldarx %0,0,%3\n\ 223 want_v = hpte_encode_v(va, psize);
136 rldimi %0,%2,0,61\n\ 224
137 stdcx. %0,0,%3\n\ 225 DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
138 bne 1b" 226 va, want_v & HPTE_V_AVPN, slot, newpp);
139 : "=&r" (old), "=m" (*p) 227
140 : "r" (pp), "r" (p), "m" (*p) 228 native_lock_hpte(hptep);
141 : "cc"); 229
230 hpte_v = hptep->v;
231
232 /* Even if we miss, we need to invalidate the TLB */
233 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
234 DBG_LOW(" -> miss\n");
235 native_unlock_hpte(hptep);
236 ret = -1;
237 } else {
238 DBG_LOW(" -> hit\n");
239 /* Update the HPTE */
240 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
241 (newpp & (HPTE_R_PP | HPTE_R_N));
242 native_unlock_hpte(hptep);
243 }
244
245 /* Ensure it is out of the tlb too. */
246 tlbie(va, psize, local);
247
248 return ret;
142} 249}
143 250
144/* 251static long native_hpte_find(unsigned long va, int psize)
145 * Only works on small pages. Yes its ugly to have to check each slot in
146 * the group but we only use this during bootup.
147 */
148static long native_hpte_find(unsigned long vpn)
149{ 252{
150 hpte_t *hptep; 253 hpte_t *hptep;
151 unsigned long hash; 254 unsigned long hash;
152 unsigned long i, j; 255 unsigned long i, j;
153 long slot; 256 long slot;
154 unsigned long hpte_v; 257 unsigned long want_v, hpte_v;
155 258
156 hash = hpt_hash(vpn, 0); 259 hash = hpt_hash(va, mmu_psize_defs[psize].shift);
260 want_v = hpte_encode_v(va, psize);
157 261
158 for (j = 0; j < 2; j++) { 262 for (j = 0; j < 2; j++) {
159 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 263 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
161 hptep = htab_address + slot; 265 hptep = htab_address + slot;
162 hpte_v = hptep->v; 266 hpte_v = hptep->v;
163 267
164 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) 268 if (HPTE_V_COMPARE(hpte_v, want_v)
165 && (hpte_v & HPTE_V_VALID) 269 && (hpte_v & HPTE_V_VALID)
166 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { 270 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
167 /* HPTE matches */ 271 /* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
177 return -1; 281 return -1;
178} 282}
179 283
180static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
181 unsigned long va, int large, int local)
182{
183 hpte_t *hptep = htab_address + slot;
184 unsigned long hpte_v;
185 unsigned long avpn = va >> 23;
186 int ret = 0;
187
188 if (large)
189 avpn &= ~1;
190
191 native_lock_hpte(hptep);
192
193 hpte_v = hptep->v;
194
195 /* Even if we miss, we need to invalidate the TLB */
196 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
197 || !(hpte_v & HPTE_V_VALID)) {
198 native_unlock_hpte(hptep);
199 ret = -1;
200 } else {
201 set_pp_bit(newpp, hptep);
202 native_unlock_hpte(hptep);
203 }
204
205 /* Ensure it is out of the tlb too */
206 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
207 tlbiel(va);
208 } else {
209 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
210
211 if (lock_tlbie)
212 spin_lock(&native_tlbie_lock);
213 tlbie(va, large);
214 if (lock_tlbie)
215 spin_unlock(&native_tlbie_lock);
216 }
217
218 return ret;
219}
220
221/* 284/*
222 * Update the page protection bits. Intended to be used to create 285 * Update the page protection bits. Intended to be used to create
223 * guard pages for kernel data structures on pages which are bolted 286 * guard pages for kernel data structures on pages which are bolted
224 * in the HPT. Assumes pages being operated on will not be stolen. 287 * in the HPT. Assumes pages being operated on will not be stolen.
225 * Does not work on large pages.
226 * 288 *
227 * No need to lock here because we should be the only user. 289 * No need to lock here because we should be the only user.
228 */ 290 */
229static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) 291static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
292 int psize)
230{ 293{
231 unsigned long vsid, va, vpn, flags = 0; 294 unsigned long vsid, va;
232 long slot; 295 long slot;
233 hpte_t *hptep; 296 hpte_t *hptep;
234 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
235 297
236 vsid = get_kernel_vsid(ea); 298 vsid = get_kernel_vsid(ea);
237 va = (vsid << 28) | (ea & 0x0fffffff); 299 va = (vsid << 28) | (ea & 0x0fffffff);
238 vpn = va >> PAGE_SHIFT;
239 300
240 slot = native_hpte_find(vpn); 301 slot = native_hpte_find(va, psize);
241 if (slot == -1) 302 if (slot == -1)
242 panic("could not find page to bolt\n"); 303 panic("could not find page to bolt\n");
243 hptep = htab_address + slot; 304 hptep = htab_address + slot;
244 305
245 set_pp_bit(newpp, hptep); 306 /* Update the HPTE */
307 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
308 (newpp & (HPTE_R_PP | HPTE_R_N));
246 309
247 /* Ensure it is out of the tlb too */ 310 /* Ensure it is out of the tlb too. */
248 if (lock_tlbie) 311 tlbie(va, psize, 0);
249 spin_lock_irqsave(&native_tlbie_lock, flags);
250 tlbie(va, 0);
251 if (lock_tlbie)
252 spin_unlock_irqrestore(&native_tlbie_lock, flags);
253} 312}
254 313
255static void native_hpte_invalidate(unsigned long slot, unsigned long va, 314static void native_hpte_invalidate(unsigned long slot, unsigned long va,
256 int large, int local) 315 int psize, int local)
257{ 316{
258 hpte_t *hptep = htab_address + slot; 317 hpte_t *hptep = htab_address + slot;
259 unsigned long hpte_v; 318 unsigned long hpte_v;
260 unsigned long avpn = va >> 23; 319 unsigned long want_v;
261 unsigned long flags; 320 unsigned long flags;
262 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
263
264 if (large)
265 avpn &= ~1;
266 321
267 local_irq_save(flags); 322 local_irq_save(flags);
268 native_lock_hpte(hptep);
269 323
324 DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
325
326 want_v = hpte_encode_v(va, psize);
327 native_lock_hpte(hptep);
270 hpte_v = hptep->v; 328 hpte_v = hptep->v;
271 329
272 /* Even if we miss, we need to invalidate the TLB */ 330 /* Even if we miss, we need to invalidate the TLB */
273 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) 331 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
274 || !(hpte_v & HPTE_V_VALID)) {
275 native_unlock_hpte(hptep); 332 native_unlock_hpte(hptep);
276 } else { 333 else
277 /* Invalidate the hpte. NOTE: this also unlocks it */ 334 /* Invalidate the hpte. NOTE: this also unlocks it */
278 hptep->v = 0; 335 hptep->v = 0;
279 }
280 336
281 /* Invalidate the tlb */ 337 /* Invalidate the TLB */
282 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 338 tlbie(va, psize, local);
283 tlbiel(va); 339
284 } else {
285 if (lock_tlbie)
286 spin_lock(&native_tlbie_lock);
287 tlbie(va, large);
288 if (lock_tlbie)
289 spin_unlock(&native_tlbie_lock);
290 }
291 local_irq_restore(flags); 340 local_irq_restore(flags);
292} 341}
293 342
294/* 343/*
344 * XXX This need fixing based on page size. It's only used by
345 * native_hpte_clear() for now which needs fixing too so they
346 * make a good pair...
347 */
348static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
349{
350 unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
351 unsigned long va;
352
353 va = avpn << 23;
354
355 if (! (hpte_v & HPTE_V_LARGE)) {
356 unsigned long vpi, pteg;
357
358 pteg = slot / HPTES_PER_GROUP;
359 if (hpte_v & HPTE_V_SECONDARY)
360 pteg = ~pteg;
361
362 vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
363
364 va |= vpi << PAGE_SHIFT;
365 }
366
367 return va;
368}
369
370/*
295 * clear all mappings on kexec. All cpus are in real mode (or they will 371 * clear all mappings on kexec. All cpus are in real mode (or they will
296 * be when they isi), and we are the only one left. We rely on our kernel 372 * be when they isi), and we are the only one left. We rely on our kernel
297 * mapping being 0xC0's and the hardware ignoring those two real bits. 373 * mapping being 0xC0's and the hardware ignoring those two real bits.
298 * 374 *
299 * TODO: add batching support when enabled. remember, no dynamic memory here, 375 * TODO: add batching support when enabled. remember, no dynamic memory here,
300 * athough there is the control page available... 376 * athough there is the control page available...
377 *
378 * XXX FIXME: 4k only for now !
301 */ 379 */
302static void native_hpte_clear(void) 380static void native_hpte_clear(void)
303{ 381{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
327 405
328 if (hpte_v & HPTE_V_VALID) { 406 if (hpte_v & HPTE_V_VALID) {
329 hptep->v = 0; 407 hptep->v = 0;
330 tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); 408 tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
331 } 409 }
332 } 410 }
333 411
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
335 local_irq_restore(flags); 413 local_irq_restore(flags);
336} 414}
337 415
416/*
417 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
418 * the lock all the time
419 */
338static void native_flush_hash_range(unsigned long number, int local) 420static void native_flush_hash_range(unsigned long number, int local)
339{ 421{
340 unsigned long va, vpn, hash, secondary, slot, flags, avpn; 422 unsigned long va, hash, index, hidx, shift, slot;
341 int i, j;
342 hpte_t *hptep; 423 hpte_t *hptep;
343 unsigned long hpte_v; 424 unsigned long hpte_v;
425 unsigned long want_v;
426 unsigned long flags;
427 real_pte_t pte;
344 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 428 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
345 unsigned long large = batch->large; 429 unsigned long psize = batch->psize;
430 int i;
346 431
347 local_irq_save(flags); 432 local_irq_save(flags);
348 433
349 j = 0;
350 for (i = 0; i < number; i++) { 434 for (i = 0; i < number; i++) {
351 va = batch->vaddr[j]; 435 va = batch->vaddr[i];
352 if (large) 436 pte = batch->pte[i];
353 vpn = va >> HPAGE_SHIFT; 437
354 else 438 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
355 vpn = va >> PAGE_SHIFT; 439 hash = hpt_hash(va, shift);
356 hash = hpt_hash(vpn, large); 440 hidx = __rpte_to_hidx(pte, index);
357 secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; 441 if (hidx & _PTEIDX_SECONDARY)
358 if (secondary) 442 hash = ~hash;
359 hash = ~hash; 443 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
360 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 444 slot += hidx & _PTEIDX_GROUP_IX;
361 slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; 445 hptep = htab_address + slot;
362 446 want_v = hpte_encode_v(va, psize);
363 hptep = htab_address + slot; 447 native_lock_hpte(hptep);
364 448 hpte_v = hptep->v;
365 avpn = va >> 23; 449 if (!HPTE_V_COMPARE(hpte_v, want_v) ||
366 if (large) 450 !(hpte_v & HPTE_V_VALID))
367 avpn &= ~0x1UL; 451 native_unlock_hpte(hptep);
368 452 else
369 native_lock_hpte(hptep); 453 hptep->v = 0;
370 454 } pte_iterate_hashed_end();
371 hpte_v = hptep->v;
372
373 /* Even if we miss, we need to invalidate the TLB */
374 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
375 || !(hpte_v & HPTE_V_VALID)) {
376 native_unlock_hpte(hptep);
377 } else {
378 /* Invalidate the hpte. NOTE: this also unlocks it */
379 hptep->v = 0;
380 }
381
382 j++;
383 } 455 }
384 456
385 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 457 if (cpu_has_feature(CPU_FTR_TLBIEL) &&
458 mmu_psize_defs[psize].tlbiel && local) {
386 asm volatile("ptesync":::"memory"); 459 asm volatile("ptesync":::"memory");
387 460 for (i = 0; i < number; i++) {
388 for (i = 0; i < j; i++) 461 va = batch->vaddr[i];
389 __tlbiel(batch->vaddr[i]); 462 pte = batch->pte[i];
390 463
464 pte_iterate_hashed_subpages(pte, psize, va, index,
465 shift) {
466 __tlbiel(va, psize);
467 } pte_iterate_hashed_end();
468 }
391 asm volatile("ptesync":::"memory"); 469 asm volatile("ptesync":::"memory");
392 } else { 470 } else {
393 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); 471 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
396 spin_lock(&native_tlbie_lock); 474 spin_lock(&native_tlbie_lock);
397 475
398 asm volatile("ptesync":::"memory"); 476 asm volatile("ptesync":::"memory");
399 477 for (i = 0; i < number; i++) {
400 for (i = 0; i < j; i++) 478 va = batch->vaddr[i];
401 __tlbie(batch->vaddr[i], large); 479 pte = batch->pte[i];
402 480
481 pte_iterate_hashed_subpages(pte, psize, va, index,
482 shift) {
483 __tlbie(va, psize);
484 } pte_iterate_hashed_end();
485 }
403 asm volatile("eieio; tlbsync; ptesync":::"memory"); 486 asm volatile("eieio; tlbsync; ptesync":::"memory");
404 487
405 if (lock_tlbie) 488 if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02c..b2f3dbca6952 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#undef DEBUG 21#undef DEBUG
22#undef DEBUG_LOW
22 23
23#include <linux/config.h> 24#include <linux/config.h>
24#include <linux/spinlock.h> 25#include <linux/spinlock.h>
@@ -59,6 +60,15 @@
59#define DBG(fmt...) 60#define DBG(fmt...)
60#endif 61#endif
61 62
63#ifdef DEBUG_LOW
64#define DBG_LOW(fmt...) udbg_printf(fmt)
65#else
66#define DBG_LOW(fmt...)
67#endif
68
69#define KB (1024)
70#define MB (1024*KB)
71
62/* 72/*
63 * Note: pte --> Linux PTE 73 * Note: pte --> Linux PTE
64 * HPTE --> PowerPC Hashed Page Table Entry 74 * HPTE --> PowerPC Hashed Page Table Entry
@@ -77,91 +87,290 @@ extern unsigned long dart_tablebase;
77 87
78hpte_t *htab_address; 88hpte_t *htab_address;
79unsigned long htab_hash_mask; 89unsigned long htab_hash_mask;
80
81unsigned long _SDR1; 90unsigned long _SDR1;
91struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
92int mmu_linear_psize = MMU_PAGE_4K;
93int mmu_virtual_psize = MMU_PAGE_4K;
94#ifdef CONFIG_HUGETLB_PAGE
95int mmu_huge_psize = MMU_PAGE_16M;
96unsigned int HPAGE_SHIFT;
97#endif
82 98
83#define KB (1024) 99/* There are definitions of page sizes arrays to be used when none
84#define MB (1024*KB) 100 * is provided by the firmware.
85 101 */
86static inline void loop_forever(void)
87{
88 volatile unsigned long x = 1;
89 for(;x;x|=1)
90 ;
91}
92 102
93static inline void create_pte_mapping(unsigned long start, unsigned long end, 103/* Pre-POWER4 CPUs (4k pages only)
94 unsigned long mode, int large) 104 */
105struct mmu_psize_def mmu_psize_defaults_old[] = {
106 [MMU_PAGE_4K] = {
107 .shift = 12,
108 .sllp = 0,
109 .penc = 0,
110 .avpnm = 0,
111 .tlbiel = 0,
112 },
113};
114
115/* POWER4, GPUL, POWER5
116 *
117 * Support for 16Mb large pages
118 */
119struct mmu_psize_def mmu_psize_defaults_gp[] = {
120 [MMU_PAGE_4K] = {
121 .shift = 12,
122 .sllp = 0,
123 .penc = 0,
124 .avpnm = 0,
125 .tlbiel = 1,
126 },
127 [MMU_PAGE_16M] = {
128 .shift = 24,
129 .sllp = SLB_VSID_L,
130 .penc = 0,
131 .avpnm = 0x1UL,
132 .tlbiel = 0,
133 },
134};
135
136
137int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
138 unsigned long pstart, unsigned long mode, int psize)
95{ 139{
96 unsigned long addr; 140 unsigned long vaddr, paddr;
97 unsigned int step; 141 unsigned int step, shift;
98 unsigned long tmp_mode; 142 unsigned long tmp_mode;
99 unsigned long vflags; 143 int ret = 0;
100 144
101 if (large) { 145 shift = mmu_psize_defs[psize].shift;
102 step = 16*MB; 146 step = 1 << shift;
103 vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
104 } else {
105 step = 4*KB;
106 vflags = HPTE_V_BOLTED;
107 }
108 147
109 for (addr = start; addr < end; addr += step) { 148 for (vaddr = vstart, paddr = pstart; vaddr < vend;
149 vaddr += step, paddr += step) {
110 unsigned long vpn, hash, hpteg; 150 unsigned long vpn, hash, hpteg;
111 unsigned long vsid = get_kernel_vsid(addr); 151 unsigned long vsid = get_kernel_vsid(vaddr);
112 unsigned long va = (vsid << 28) | (addr & 0xfffffff); 152 unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
113 int ret = -1;
114
115 if (large)
116 vpn = va >> HPAGE_SHIFT;
117 else
118 vpn = va >> PAGE_SHIFT;
119
120 153
154 vpn = va >> shift;
121 tmp_mode = mode; 155 tmp_mode = mode;
122 156
123 /* Make non-kernel text non-executable */ 157 /* Make non-kernel text non-executable */
124 if (!in_kernel_text(addr)) 158 if (!in_kernel_text(vaddr))
125 tmp_mode = mode | HW_NO_EXEC; 159 tmp_mode = mode | HPTE_R_N;
126
127 hash = hpt_hash(vpn, large);
128 160
161 hash = hpt_hash(va, shift);
129 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 162 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
130 163
164 /* The crap below can be cleaned once ppd_md.probe() can
165 * set up the hash callbacks, thus we can just used the
166 * normal insert callback here.
167 */
131#ifdef CONFIG_PPC_ISERIES 168#ifdef CONFIG_PPC_ISERIES
132 if (systemcfg->platform & PLATFORM_ISERIES_LPAR) 169 if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
133 ret = iSeries_hpte_bolt_or_insert(hpteg, va, 170 ret = iSeries_hpte_insert(hpteg, va,
134 virt_to_abs(addr) >> PAGE_SHIFT, 171 virt_to_abs(paddr),
135 vflags, tmp_mode); 172 tmp_mode,
173 HPTE_V_BOLTED,
174 psize);
136 else 175 else
137#endif 176#endif
138#ifdef CONFIG_PPC_PSERIES 177#ifdef CONFIG_PPC_PSERIES
139 if (systemcfg->platform & PLATFORM_LPAR) 178 if (systemcfg->platform & PLATFORM_LPAR)
140 ret = pSeries_lpar_hpte_insert(hpteg, va, 179 ret = pSeries_lpar_hpte_insert(hpteg, va,
141 virt_to_abs(addr) >> PAGE_SHIFT, 180 virt_to_abs(paddr),
142 vflags, tmp_mode); 181 tmp_mode,
182 HPTE_V_BOLTED,
183 psize);
143 else 184 else
144#endif 185#endif
145#ifdef CONFIG_PPC_MULTIPLATFORM 186#ifdef CONFIG_PPC_MULTIPLATFORM
146 ret = native_hpte_insert(hpteg, va, 187 ret = native_hpte_insert(hpteg, va,
147 virt_to_abs(addr) >> PAGE_SHIFT, 188 virt_to_abs(paddr),
148 vflags, tmp_mode); 189 tmp_mode, HPTE_V_BOLTED,
190 psize);
149#endif 191#endif
192 if (ret < 0)
193 break;
194 }
195 return ret < 0 ? ret : 0;
196}
150 197
151 if (ret == -1) { 198static int __init htab_dt_scan_page_sizes(unsigned long node,
152 ppc64_terminate_msg(0x20, "create_pte_mapping"); 199 const char *uname, int depth,
153 loop_forever(); 200 void *data)
201{
202 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
203 u32 *prop;
204 unsigned long size = 0;
205
206 /* We are scanning "cpu" nodes only */
207 if (type == NULL || strcmp(type, "cpu") != 0)
208 return 0;
209
210 prop = (u32 *)of_get_flat_dt_prop(node,
211 "ibm,segment-page-sizes", &size);
212 if (prop != NULL) {
213 DBG("Page sizes from device-tree:\n");
214 size /= 4;
215 cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
216 while(size > 0) {
217 unsigned int shift = prop[0];
218 unsigned int slbenc = prop[1];
219 unsigned int lpnum = prop[2];
220 unsigned int lpenc = 0;
221 struct mmu_psize_def *def;
222 int idx = -1;
223
224 size -= 3; prop += 3;
225 while(size > 0 && lpnum) {
226 if (prop[0] == shift)
227 lpenc = prop[1];
228 prop += 2; size -= 2;
229 lpnum--;
230 }
231 switch(shift) {
232 case 0xc:
233 idx = MMU_PAGE_4K;
234 break;
235 case 0x10:
236 idx = MMU_PAGE_64K;
237 break;
238 case 0x14:
239 idx = MMU_PAGE_1M;
240 break;
241 case 0x18:
242 idx = MMU_PAGE_16M;
243 cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
244 break;
245 case 0x22:
246 idx = MMU_PAGE_16G;
247 break;
248 }
249 if (idx < 0)
250 continue;
251 def = &mmu_psize_defs[idx];
252 def->shift = shift;
253 if (shift <= 23)
254 def->avpnm = 0;
255 else
256 def->avpnm = (1 << (shift - 23)) - 1;
257 def->sllp = slbenc;
258 def->penc = lpenc;
259 /* We don't know for sure what's up with tlbiel, so
260 * for now we only set it for 4K and 64K pages
261 */
262 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
263 def->tlbiel = 1;
264 else
265 def->tlbiel = 0;
266
267 DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
268 "tlbiel=%d, penc=%d\n",
269 idx, shift, def->sllp, def->avpnm, def->tlbiel,
270 def->penc);
154 } 271 }
272 return 1;
273 }
274 return 0;
275}
276
277
278static void __init htab_init_page_sizes(void)
279{
280 int rc;
281
282 /* Default to 4K pages only */
283 memcpy(mmu_psize_defs, mmu_psize_defaults_old,
284 sizeof(mmu_psize_defaults_old));
285
286 /*
287 * Try to find the available page sizes in the device-tree
288 */
289 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
290 if (rc != 0) /* Found */
291 goto found;
292
293 /*
294 * Not in the device-tree, let's fallback on known size
295 * list for 16M capable GP & GR
296 */
297 if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
298 cpu_has_feature(CPU_FTR_16M_PAGE))
299 memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
300 sizeof(mmu_psize_defaults_gp));
301 found:
302 /*
303 * Pick a size for the linear mapping. Currently, we only support
304 * 16M, 1M and 4K which is the default
305 */
306 if (mmu_psize_defs[MMU_PAGE_16M].shift)
307 mmu_linear_psize = MMU_PAGE_16M;
308 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
309 mmu_linear_psize = MMU_PAGE_1M;
310
311 /*
312 * Pick a size for the ordinary pages. Default is 4K, we support
313 * 64K if cache inhibited large pages are supported by the
314 * processor
315 */
316#ifdef CONFIG_PPC_64K_PAGES
317 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
318 cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
319 mmu_virtual_psize = MMU_PAGE_64K;
320#endif
321
322 printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
323 mmu_psize_defs[mmu_linear_psize].shift,
324 mmu_psize_defs[mmu_virtual_psize].shift);
325
326#ifdef CONFIG_HUGETLB_PAGE
327 /* Init large page size. Currently, we pick 16M or 1M depending
328 * on what is available
329 */
330 if (mmu_psize_defs[MMU_PAGE_16M].shift)
331 mmu_huge_psize = MMU_PAGE_16M;
332 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
333 mmu_huge_psize = MMU_PAGE_1M;
334
335 /* Calculate HPAGE_SHIFT and sanity check it */
336 if (mmu_psize_defs[mmu_huge_psize].shift > 16 &&
337 mmu_psize_defs[mmu_huge_psize].shift < 28)
338 HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
339 else
340 HPAGE_SHIFT = 0; /* No huge pages dude ! */
341#endif /* CONFIG_HUGETLB_PAGE */
342}
343
344static int __init htab_dt_scan_pftsize(unsigned long node,
345 const char *uname, int depth,
346 void *data)
347{
348 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
349 u32 *prop;
350
351 /* We are scanning "cpu" nodes only */
352 if (type == NULL || strcmp(type, "cpu") != 0)
353 return 0;
354
355 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
356 if (prop != NULL) {
357 /* pft_size[0] is the NUMA CEC cookie */
358 ppc64_pft_size = prop[1];
359 return 1;
155 } 360 }
361 return 0;
156} 362}
157 363
158static unsigned long get_hashtable_size(void) 364static unsigned long __init htab_get_table_size(void)
159{ 365{
160 unsigned long rnd_mem_size, pteg_count; 366 unsigned long rnd_mem_size, pteg_count;
161 367
162 /* If hash size wasn't obtained in prom.c, we calculate it now based on 368 /* If hash size isn't already provided by the platform, we try to
163 * the total RAM size 369 * retreive it from the device-tree. If it's not there neither, we
370 * calculate it now based on the total RAM size
164 */ 371 */
372 if (ppc64_pft_size == 0)
373 of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
165 if (ppc64_pft_size) 374 if (ppc64_pft_size)
166 return 1UL << ppc64_pft_size; 375 return 1UL << ppc64_pft_size;
167 376
@@ -181,17 +390,21 @@ void __init htab_initialize(void)
181 unsigned long table, htab_size_bytes; 390 unsigned long table, htab_size_bytes;
182 unsigned long pteg_count; 391 unsigned long pteg_count;
183 unsigned long mode_rw; 392 unsigned long mode_rw;
184 int i, use_largepages = 0;
185 unsigned long base = 0, size = 0; 393 unsigned long base = 0, size = 0;
394 int i;
395
186 extern unsigned long tce_alloc_start, tce_alloc_end; 396 extern unsigned long tce_alloc_start, tce_alloc_end;
187 397
188 DBG(" -> htab_initialize()\n"); 398 DBG(" -> htab_initialize()\n");
189 399
400 /* Initialize page sizes */
401 htab_init_page_sizes();
402
190 /* 403 /*
191 * Calculate the required size of the htab. We want the number of 404 * Calculate the required size of the htab. We want the number of
192 * PTEGs to equal one half the number of real pages. 405 * PTEGs to equal one half the number of real pages.
193 */ 406 */
194 htab_size_bytes = get_hashtable_size(); 407 htab_size_bytes = htab_get_table_size();
195 pteg_count = htab_size_bytes >> 7; 408 pteg_count = htab_size_bytes >> 7;
196 409
197 /* For debug, make the HTAB 1/8 as big as it normally would be. */ 410 /* For debug, make the HTAB 1/8 as big as it normally would be. */
@@ -211,14 +424,11 @@ void __init htab_initialize(void)
211 * the absolute address space. 424 * the absolute address space.
212 */ 425 */
213 table = lmb_alloc(htab_size_bytes, htab_size_bytes); 426 table = lmb_alloc(htab_size_bytes, htab_size_bytes);
427 BUG_ON(table == 0);
214 428
215 DBG("Hash table allocated at %lx, size: %lx\n", table, 429 DBG("Hash table allocated at %lx, size: %lx\n", table,
216 htab_size_bytes); 430 htab_size_bytes);
217 431
218 if ( !table ) {
219 ppc64_terminate_msg(0x20, "hpt space");
220 loop_forever();
221 }
222 htab_address = abs_to_virt(table); 432 htab_address = abs_to_virt(table);
223 433
224 /* htab absolute addr + encoded htabsize */ 434 /* htab absolute addr + encoded htabsize */
@@ -234,8 +444,6 @@ void __init htab_initialize(void)
234 * _NOT_ map it to avoid cache paradoxes as it's remapped non 444 * _NOT_ map it to avoid cache paradoxes as it's remapped non
235 * cacheable later on 445 * cacheable later on
236 */ 446 */
237 if (cpu_has_feature(CPU_FTR_16M_PAGE))
238 use_largepages = 1;
239 447
240 /* create bolted the linear mapping in the hash table */ 448 /* create bolted the linear mapping in the hash table */
241 for (i=0; i < lmb.memory.cnt; i++) { 449 for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +454,32 @@ void __init htab_initialize(void)
246 454
247#ifdef CONFIG_U3_DART 455#ifdef CONFIG_U3_DART
248 /* Do not map the DART space. Fortunately, it will be aligned 456 /* Do not map the DART space. Fortunately, it will be aligned
249 * in such a way that it will not cross two lmb regions and will 457 * in such a way that it will not cross two lmb regions and
250 * fit within a single 16Mb page. 458 * will fit within a single 16Mb page.
251 * The DART space is assumed to be a full 16Mb region even if we 459 * The DART space is assumed to be a full 16Mb region even if
252 * only use 2Mb of that space. We will use more of it later for 460 * we only use 2Mb of that space. We will use more of it later
253 * AGP GART. We have to use a full 16Mb large page. 461 * for AGP GART. We have to use a full 16Mb large page.
254 */ 462 */
255 DBG("DART base: %lx\n", dart_tablebase); 463 DBG("DART base: %lx\n", dart_tablebase);
256 464
257 if (dart_tablebase != 0 && dart_tablebase >= base 465 if (dart_tablebase != 0 && dart_tablebase >= base
258 && dart_tablebase < (base + size)) { 466 && dart_tablebase < (base + size)) {
259 if (base != dart_tablebase) 467 if (base != dart_tablebase)
260 create_pte_mapping(base, dart_tablebase, mode_rw, 468 BUG_ON(htab_bolt_mapping(base, dart_tablebase,
261 use_largepages); 469 base, mode_rw,
470 mmu_linear_psize));
262 if ((base + size) > (dart_tablebase + 16*MB)) 471 if ((base + size) > (dart_tablebase + 16*MB))
263 create_pte_mapping(dart_tablebase + 16*MB, base + size, 472 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
264 mode_rw, use_largepages); 473 base + size,
474 dart_tablebase+16*MB,
475 mode_rw,
476 mmu_linear_psize));
265 continue; 477 continue;
266 } 478 }
267#endif /* CONFIG_U3_DART */ 479#endif /* CONFIG_U3_DART */
268 create_pte_mapping(base, base + size, mode_rw, use_largepages); 480 BUG_ON(htab_bolt_mapping(base, base + size, base,
269 } 481 mode_rw, mmu_linear_psize));
482 }
270 483
271 /* 484 /*
272 * If we have a memory_limit and we've allocated TCEs then we need to 485 * If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +495,9 @@ void __init htab_initialize(void)
282 if (base + size >= tce_alloc_start) 495 if (base + size >= tce_alloc_start)
283 tce_alloc_start = base + size + 1; 496 tce_alloc_start = base + size + 1;
284 497
285 create_pte_mapping(tce_alloc_start, tce_alloc_end, 498 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
286 mode_rw, use_largepages); 499 tce_alloc_start, mode_rw,
500 mmu_linear_psize));
287 } 501 }
288 502
289 DBG(" <- htab_initialize()\n"); 503 DBG(" <- htab_initialize()\n");
@@ -298,9 +512,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
298{ 512{
299 struct page *page; 513 struct page *page;
300 514
301 if (!pfn_valid(pte_pfn(pte)))
302 return pp;
303
304 page = pte_page(pte); 515 page = pte_page(pte);
305 516
306 /* page is dirty */ 517 /* page is dirty */
@@ -309,7 +520,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
309 __flush_dcache_icache(page_address(page)); 520 __flush_dcache_icache(page_address(page));
310 set_bit(PG_arch_1, &page->flags); 521 set_bit(PG_arch_1, &page->flags);
311 } else 522 } else
312 pp |= HW_NO_EXEC; 523 pp |= HPTE_R_N;
313 } 524 }
314 return pp; 525 return pp;
315} 526}
@@ -325,94 +536,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
325 unsigned long vsid; 536 unsigned long vsid;
326 struct mm_struct *mm; 537 struct mm_struct *mm;
327 pte_t *ptep; 538 pte_t *ptep;
328 int ret;
329 int user_region = 0;
330 int local = 0;
331 cpumask_t tmp; 539 cpumask_t tmp;
540 int rc, user_region = 0, local = 0;
332 541
333 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 542 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
334 return 1; 543 ea, access, trap);
335 544
545 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
546 DBG_LOW(" out of pgtable range !\n");
547 return 1;
548 }
549
550 /* Get region & vsid */
336 switch (REGION_ID(ea)) { 551 switch (REGION_ID(ea)) {
337 case USER_REGION_ID: 552 case USER_REGION_ID:
338 user_region = 1; 553 user_region = 1;
339 mm = current->mm; 554 mm = current->mm;
340 if (! mm) 555 if (! mm) {
556 DBG_LOW(" user region with no mm !\n");
341 return 1; 557 return 1;
342 558 }
343 vsid = get_vsid(mm->context.id, ea); 559 vsid = get_vsid(mm->context.id, ea);
344 break; 560 break;
345 case VMALLOC_REGION_ID: 561 case VMALLOC_REGION_ID:
346 mm = &init_mm; 562 mm = &init_mm;
347 vsid = get_kernel_vsid(ea); 563 vsid = get_kernel_vsid(ea);
348 break; 564 break;
349#if 0
350 case KERNEL_REGION_ID:
351 /*
352 * Should never get here - entire 0xC0... region is bolted.
353 * Send the problem up to do_page_fault
354 */
355#endif
356 default: 565 default:
357 /* Not a valid range 566 /* Not a valid range
358 * Send the problem up to do_page_fault 567 * Send the problem up to do_page_fault
359 */ 568 */
360 return 1; 569 return 1;
361 break;
362 } 570 }
571 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
363 572
573 /* Get pgdir */
364 pgdir = mm->pgd; 574 pgdir = mm->pgd;
365
366 if (pgdir == NULL) 575 if (pgdir == NULL)
367 return 1; 576 return 1;
368 577
578 /* Check CPU locality */
369 tmp = cpumask_of_cpu(smp_processor_id()); 579 tmp = cpumask_of_cpu(smp_processor_id());
370 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 580 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
371 local = 1; 581 local = 1;
372 582
373 /* Is this a huge page ? */ 583 /* Handle hugepage regions */
374 if (unlikely(in_hugepage_area(mm->context, ea))) 584 if (unlikely(in_hugepage_area(mm->context, ea))) {
375 ret = hash_huge_page(mm, access, ea, vsid, local); 585 DBG_LOW(" -> huge page !\n");
376 else { 586 return hash_huge_page(mm, access, ea, vsid, local);
377 ptep = find_linux_pte(pgdir, ea); 587 }
378 if (ptep == NULL) 588
379 return 1; 589 /* Get PTE and page size from page tables */
380 ret = __hash_page(ea, access, vsid, ptep, trap, local); 590 ptep = find_linux_pte(pgdir, ea);
591 if (ptep == NULL || !pte_present(*ptep)) {
592 DBG_LOW(" no PTE !\n");
593 return 1;
594 }
595
596#ifndef CONFIG_PPC_64K_PAGES
597 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
598#else
599 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
600 pte_val(*(ptep + PTRS_PER_PTE)));
601#endif
602 /* Pre-check access permissions (will be re-checked atomically
603 * in __hash_page_XX but this pre-check is a fast path
604 */
605 if (access & ~pte_val(*ptep)) {
606 DBG_LOW(" no access !\n");
607 return 1;
381 } 608 }
382 609
383 return ret; 610 /* Do actual hashing */
611#ifndef CONFIG_PPC_64K_PAGES
612 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
613#else
614 if (mmu_virtual_psize == MMU_PAGE_64K)
615 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
616 else
617 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
618#endif /* CONFIG_PPC_64K_PAGES */
619
620#ifndef CONFIG_PPC_64K_PAGES
621 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
622#else
623 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
624 pte_val(*(ptep + PTRS_PER_PTE)));
625#endif
626 DBG_LOW(" -> rc=%d\n", rc);
627 return rc;
384} 628}
385 629
386void flush_hash_page(unsigned long va, pte_t pte, int local) 630void hash_preload(struct mm_struct *mm, unsigned long ea,
631 unsigned long access, unsigned long trap)
387{ 632{
388 unsigned long vpn, hash, secondary, slot; 633 unsigned long vsid;
389 unsigned long huge = pte_huge(pte); 634 void *pgdir;
635 pte_t *ptep;
636 cpumask_t mask;
637 unsigned long flags;
638 int local = 0;
639
640 /* We don't want huge pages prefaulted for now
641 */
642 if (unlikely(in_hugepage_area(mm->context, ea)))
643 return;
644
645 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
646 " trap=%lx\n", mm, mm->pgd, ea, access, trap);
390 647
391 if (huge) 648 /* Get PTE, VSID, access mask */
392 vpn = va >> HPAGE_SHIFT; 649 pgdir = mm->pgd;
650 if (pgdir == NULL)
651 return;
652 ptep = find_linux_pte(pgdir, ea);
653 if (!ptep)
654 return;
655 vsid = get_vsid(mm->context.id, ea);
656
657 /* Hash it in */
658 local_irq_save(flags);
659 mask = cpumask_of_cpu(smp_processor_id());
660 if (cpus_equal(mm->cpu_vm_mask, mask))
661 local = 1;
662#ifndef CONFIG_PPC_64K_PAGES
663 __hash_page_4K(ea, access, vsid, ptep, trap, local);
664#else
665 if (mmu_virtual_psize == MMU_PAGE_64K)
666 __hash_page_64K(ea, access, vsid, ptep, trap, local);
393 else 667 else
394 vpn = va >> PAGE_SHIFT; 668 __hash_page_4K(ea, access, vsid, ptep, trap, local);
395 hash = hpt_hash(vpn, huge); 669#endif /* CONFIG_PPC_64K_PAGES */
396 secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; 670 local_irq_restore(flags);
397 if (secondary) 671}
398 hash = ~hash; 672
399 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 673void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
400 slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; 674{
401 675 unsigned long hash, index, shift, hidx, slot;
402 ppc_md.hpte_invalidate(slot, va, huge, local); 676
677 DBG_LOW("flush_hash_page(va=%016x)\n", va);
678 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
679 hash = hpt_hash(va, shift);
680 hidx = __rpte_to_hidx(pte, index);
681 if (hidx & _PTEIDX_SECONDARY)
682 hash = ~hash;
683 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
684 slot += hidx & _PTEIDX_GROUP_IX;
685 DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
686 ppc_md.hpte_invalidate(slot, va, psize, local);
687 } pte_iterate_hashed_end();
403} 688}
404 689
405void flush_hash_range(unsigned long number, int local) 690void flush_hash_range(unsigned long number, int local)
406{ 691{
407 if (ppc_md.flush_hash_range) { 692 if (ppc_md.flush_hash_range)
408 ppc_md.flush_hash_range(number, local); 693 ppc_md.flush_hash_range(number, local);
409 } else { 694 else {
410 int i; 695 int i;
411 struct ppc64_tlb_batch *batch = 696 struct ppc64_tlb_batch *batch =
412 &__get_cpu_var(ppc64_tlb_batch); 697 &__get_cpu_var(ppc64_tlb_batch);
413 698
414 for (i = 0; i < number; i++) 699 for (i = 0; i < number; i++)
415 flush_hash_page(batch->vaddr[i], batch->pte[i], local); 700 flush_hash_page(batch->vaddr[i], batch->pte[i],
701 batch->psize, local);
416 } 702 }
417} 703}
418 704
@@ -452,6 +738,18 @@ void __init htab_finish_init(void)
452 extern unsigned int *htab_call_hpte_remove; 738 extern unsigned int *htab_call_hpte_remove;
453 extern unsigned int *htab_call_hpte_updatepp; 739 extern unsigned int *htab_call_hpte_updatepp;
454 740
741#ifdef CONFIG_PPC_64K_PAGES
742 extern unsigned int *ht64_call_hpte_insert1;
743 extern unsigned int *ht64_call_hpte_insert2;
744 extern unsigned int *ht64_call_hpte_remove;
745 extern unsigned int *ht64_call_hpte_updatepp;
746
747 make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
748 make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
749 make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
750 make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
751#endif /* CONFIG_PPC_64K_PAGES */
752
455 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); 753 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
456 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); 754 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
457 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); 755 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974..0073a04047e4 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
47 pu = pud_offset(pg, addr); 47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) { 48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr); 49 pm = pmd_offset(pu, addr);
50#ifdef CONFIG_PPC_64K_PAGES
51 /* Currently, we use the normal PTE offset within full
52 * size PTE pages, thus our huge PTEs are scattered in
53 * the PTE page and we do waste some. We may change
54 * that in the future, but the current mecanism keeps
55 * things much simpler
56 */
57 if (!pmd_none(*pm)) {
58 /* Note: pte_offset_* are all equivalent on
59 * ppc64 as we don't have HIGHMEM
60 */
61 pt = pte_offset_kernel(pm, addr);
62 return pt;
63 }
64#else /* CONFIG_PPC_64K_PAGES */
65 /* On 4k pages, we put huge PTEs in the PMD page */
50 pt = (pte_t *)pm; 66 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt; 67 return pt;
68#endif /* CONFIG_PPC_64K_PAGES */
54 } 69 }
55 } 70 }
56 71
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
74 if (pu) { 89 if (pu) {
75 pm = pmd_alloc(mm, pu, addr); 90 pm = pmd_alloc(mm, pu, addr);
76 if (pm) { 91 if (pm) {
92#ifdef CONFIG_PPC_64K_PAGES
93 /* See comment in huge_pte_offset. Note that if we ever
94 * want to put the page size in the PMD, we would have
95 * to open code our own pte_alloc* function in order
96 * to populate and set the size atomically
97 */
98 pt = pte_alloc_map(mm, pm, addr);
99#else /* CONFIG_PPC_64K_PAGES */
77 pt = (pte_t *)pm; 100 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm) 101#endif /* CONFIG_PPC_64K_PAGES */
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt; 102 return pt;
81 } 103 }
82 } 104 }
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
84 return NULL; 106 return NULL;
85} 107}
86 108
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 109void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte) 110 pte_t *ptep, pte_t pte)
91{ 111{
92 int i;
93
94 if (pte_present(*ptep)) { 112 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep); 113 /* We open-code pte_clear because we need to pass the right
114 * argument to hpte_update (huge / !huge)
115 */
116 unsigned long old = pte_update(ptep, ~0UL);
117 if (old & _PAGE_HASHPTE)
118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
96 flush_tlb_pending(); 119 flush_tlb_pending();
97 } 120 }
98 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103} 122}
104 123
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 124pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep) 125 pte_t *ptep)
107{ 126{
108 unsigned long old = pte_update(ptep, ~0UL); 127 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110 128
111 if (old & _PAGE_HASHPTE) 129 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0); 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
113 131 *ptep = __pte(0);
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116 132
117 return __pte(old); 133 return __pte(old);
118} 134}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
563 int lastshift; 579 int lastshift;
564 u16 areamask, curareas; 580 u16 areamask, curareas;
565 581
582 if (HPAGE_SHIFT == 0)
583 return -EINVAL;
566 if (len & ~HPAGE_MASK) 584 if (len & ~HPAGE_MASK)
567 return -EINVAL; 585 return -EINVAL;
568 586
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local) 637 unsigned long ea, unsigned long vsid, int local)
620{ 638{
621 pte_t *ptep; 639 pte_t *ptep;
622 unsigned long va, vpn; 640 unsigned long old_pte, new_pte;
623 pte_t old_pte, new_pte; 641 unsigned long va, rflags, pa;
624 unsigned long rflags, prpn;
625 long slot; 642 long slot;
626 int err = 1; 643 int err = 1;
627 644
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea); 645 ptep = huge_pte_offset(mm, ea);
631 646
632 /* Search the Linux page table for a match with va */ 647 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff); 648 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635 649
636 /* 650 /*
637 * If no pte found or not present, send the problem up to 651 * If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
640 if (unlikely(!ptep || pte_none(*ptep))) 654 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out; 655 goto out;
642 656
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /* 657 /*
646 * Check the user's access rights to the page. If access should be 658 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault. 659 * prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
661 */ 673 */
662 674
663 675
664 old_pte = *ptep; 676 do {
665 new_pte = old_pte; 677 old_pte = pte_val(*ptep);
666 678 if (old_pte & _PAGE_BUSY)
667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); 679 goto out;
680 new_pte = old_pte | _PAGE_BUSY |
681 _PAGE_ACCESSED | _PAGE_HASHPTE;
682 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
683 old_pte, new_pte));
684
685 rflags = 0x2 | (!(new_pte & _PAGE_RW));
668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 686 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); 687 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
670 688
671 /* Check if pte already has an hpte (case 2) */ 689 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { 690 if (unlikely(old_pte & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */ 691 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot; 692 unsigned long hash, slot;
675 693
676 hash = hpt_hash(vpn, 1); 694 hash = hpt_hash(va, HPAGE_SHIFT);
677 if (pte_val(old_pte) & _PAGE_SECONDARY) 695 if (old_pte & _PAGE_F_SECOND)
678 hash = ~hash; 696 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 697 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; 698 slot += (old_pte & _PAGE_F_GIX) >> 12;
681 699
682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) 700 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; 701 old_pte &= ~_PAGE_HPTEFLAGS;
684 } 702 }
685 703
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { 704 if (likely(!(old_pte & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1); 705 unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
688 unsigned long hpte_group; 706 unsigned long hpte_group;
689 707
690 prpn = pte_pfn(old_pte); 708 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
691 709
692repeat: 710repeat:
693 hpte_group = ((hash & htab_hash_mask) * 711 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL; 712 HPTES_PER_GROUP) & ~0x7UL;
695 713
696 /* Update the linux pte with the HPTE slot */ 714 /* clear HPTE slot informations in new PTE */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; 715 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699 716
700 /* Add in WIMG bits */ 717 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */ 718 /* XXX We should store these in the pte */
719 /* --BenH: I think they are ... */
702 rflags |= _PAGE_COHERENT; 720 rflags |= _PAGE_COHERENT;
703 721
704 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 722 /* Insert into the hash table, primary slot */
705 HPTE_V_LARGE, rflags); 723 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
724 mmu_huge_psize);
706 725
707 /* Primary is full, try the secondary */ 726 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) { 727 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY; 728 new_pte |= _PAGE_F_SECOND;
710 hpte_group = ((~hash & htab_hash_mask) * 729 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL; 730 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 731 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY, 732 HPTE_V_SECONDARY,
715 rflags); 733 mmu_huge_psize);
716 if (slot == -1) { 734 if (slot == -1) {
717 if (mftb() & 0x1) 735 if (mftb() & 0x1)
718 hpte_group = ((hash & htab_hash_mask) * 736 hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
726 if (unlikely(slot == -2)) 744 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n"); 745 panic("hash_huge_page: pte_insert failed\n");
728 746
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; 747 new_pte |= (slot << 12) & _PAGE_F_GIX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 } 748 }
738 749
750 /*
751 * No need to use ldarx/stdcx here because all who
752 * might be updating the pte will hold the
753 * page_table_lock
754 */
755 *ptep = __pte(new_pte & ~_PAGE_BUSY);
756
739 err = 0; 757 err = 0;
740 758
741 out: 759 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err; 760 return err;
745} 761}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29f..dfe7fa37b41a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,12 +188,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
188 memset(addr, 0, kmem_cache_size(cache)); 188 memset(addr, 0, kmem_cache_size(cache));
189} 189}
190 190
191#ifdef CONFIG_PPC_64K_PAGES
192static const int pgtable_cache_size[2] = {
193 PTE_TABLE_SIZE, PGD_TABLE_SIZE
194};
195static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
196 "pte_pmd_cache", "pgd_cache",
197};
198#else
191static const int pgtable_cache_size[2] = { 199static const int pgtable_cache_size[2] = {
192 PTE_TABLE_SIZE, PMD_TABLE_SIZE 200 PTE_TABLE_SIZE, PMD_TABLE_SIZE
193}; 201};
194static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 202static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
195 "pgd_pte_cache", "pud_pmd_cache", 203 "pgd_pte_cache", "pud_pmd_cache",
196}; 204};
205#endif /* CONFIG_PPC_64K_PAGES */
197 206
198kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 207kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
199 208
@@ -201,19 +210,14 @@ void pgtable_cache_init(void)
201{ 210{
202 int i; 211 int i;
203 212
204 BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
205 BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
206 BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
207 BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
208
209 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { 213 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
210 int size = pgtable_cache_size[i]; 214 int size = pgtable_cache_size[i];
211 const char *name = pgtable_cache_name[i]; 215 const char *name = pgtable_cache_name[i];
212 216
213 pgtable_cache[i] = kmem_cache_create(name, 217 pgtable_cache[i] = kmem_cache_create(name,
214 size, size, 218 size, size,
215 SLAB_HWCACHE_ALIGN 219 SLAB_HWCACHE_ALIGN |
216 | SLAB_MUST_HWCACHE_ALIGN, 220 SLAB_MUST_HWCACHE_ALIGN,
217 zero_ctor, 221 zero_ctor,
218 NULL); 222 NULL);
219 if (! pgtable_cache[i]) 223 if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e14..7faa46b71f21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
61int mem_init_done; 61int mem_init_done;
62unsigned long memory_limit; 62unsigned long memory_limit;
63 63
64extern void hash_preload(struct mm_struct *mm, unsigned long ea,
65 unsigned long access, unsigned long trap);
66
64/* 67/*
65 * This is called by /dev/mem to know if a given address has to 68 * This is called by /dev/mem to know if a given address has to
66 * be mapped non-cacheable or not 69 * be mapped non-cacheable or not
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
493void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 496void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
494 pte_t pte) 497 pte_t pte)
495{ 498{
496 /* handle i-cache coherency */ 499#ifdef CONFIG_PPC_STD_MMU
497 unsigned long pfn = pte_pfn(pte); 500 unsigned long access = 0, trap;
498#ifdef CONFIG_PPC32
499 pmd_t *pmd;
500#else
501 unsigned long vsid;
502 void *pgdir;
503 pte_t *ptep;
504 int local = 0;
505 cpumask_t tmp;
506 unsigned long flags;
507#endif 501#endif
502 unsigned long pfn = pte_pfn(pte);
508 503
509 /* handle i-cache coherency */ 504 /* handle i-cache coherency */
510 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && 505 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
535 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 530 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
536 if (!pte_young(pte) || address >= TASK_SIZE) 531 if (!pte_young(pte) || address >= TASK_SIZE)
537 return; 532 return;
538#ifdef CONFIG_PPC32
539 if (Hash == 0)
540 return;
541 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
542 if (!pmd_none(*pmd))
543 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
544#else
545 pgdir = vma->vm_mm->pgd;
546 if (pgdir == NULL)
547 return;
548 533
549 ptep = find_linux_pte(pgdir, address); 534 /* We try to figure out if we are coming from an instruction
550 if (!ptep) 535 * access fault and pass that down to __hash_page so we avoid
536 * double-faulting on execution of fresh text. We have to test
537 * for regs NULL since init will get here first thing at boot
538 *
539 * We also avoid filling the hash if not coming from a fault
540 */
541 if (current->thread.regs == NULL)
551 return; 542 return;
552 543 trap = TRAP(current->thread.regs);
553 vsid = get_vsid(vma->vm_mm->context.id, address); 544 if (trap == 0x400)
554 545 access |= _PAGE_EXEC;
555 local_irq_save(flags); 546 else if (trap != 0x300)
556 tmp = cpumask_of_cpu(smp_processor_id()); 547 return;
557 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) 548 hash_preload(vma->vm_mm, address, access, trap);
558 local = 1; 549#endif /* CONFIG_PPC_STD_MMU */
559
560 __hash_page(address, 0, vsid, ptep, 0x300, local);
561 local_irq_restore(flags);
562#endif
563#endif
564} 550}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a78206135..51b786940971 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -101,7 +101,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
101 pud_t *pudp; 101 pud_t *pudp;
102 pmd_t *pmdp; 102 pmd_t *pmdp;
103 pte_t *ptep; 103 pte_t *ptep;
104 unsigned long vsid;
105 104
106 if (mem_init_done) { 105 if (mem_init_done) {
107 pgdp = pgd_offset_k(ea); 106 pgdp = pgd_offset_k(ea);
@@ -117,28 +116,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
117 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 116 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
118 __pgprot(flags))); 117 __pgprot(flags)));
119 } else { 118 } else {
120 unsigned long va, vpn, hash, hpteg;
121
122 /* 119 /*
123 * If the mm subsystem is not fully up, we cannot create a 120 * If the mm subsystem is not fully up, we cannot create a
124 * linux page table entry for this mapping. Simply bolt an 121 * linux page table entry for this mapping. Simply bolt an
125 * entry in the hardware page table. 122 * entry in the hardware page table.
123 *
126 */ 124 */
127 vsid = get_kernel_vsid(ea); 125 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
128 va = (vsid << 28) | (ea & 0xFFFFFFF); 126 mmu_virtual_psize))
129 vpn = va >> PAGE_SHIFT; 127 panic("Can't map bolted IO mapping");
130
131 hash = hpt_hash(vpn, 0);
132
133 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
134
135 /* Panic if a pte grpup is full */
136 if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
137 HPTE_V_BOLTED,
138 _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
139 == -1) {
140 panic("map_io_page: could not insert mapping");
141 }
142 } 128 }
143 return 0; 129 return 0;
144} 130}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e9..d137abd241ff 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
179} 179}
180 180
181/* 181/*
182 * Preload a translation in the hash table
183 */
184void hash_preload(struct mm_struct *mm, unsigned long ea,
185 unsigned long access, unsigned long trap)
186{
187 pmd_t *pmd;
188
189 if (Hash == 0)
190 return;
191 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
192 if (!pmd_none(*pmd))
193 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
194}
195
196/*
182 * Initialize the hash table and patch the instructions in hashtable.S. 197 * Initialize the hash table and patch the instructions in hashtable.S.
183 */ 198 */
184void __init MMU_init_hw(void) 199void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a37..60e852f2f8e5 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 */ 15 */
16 16
17#undef DEBUG
18
17#include <linux/config.h> 19#include <linux/config.h>
18#include <asm/pgtable.h> 20#include <asm/pgtable.h>
19#include <asm/mmu.h> 21#include <asm/mmu.h>
20#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
21#include <asm/paca.h> 23#include <asm/paca.h>
22#include <asm/cputable.h> 24#include <asm/cputable.h>
25#include <asm/cacheflush.h>
26
27#ifdef DEBUG
28#define DBG(fmt...) udbg_printf(fmt)
29#else
30#define DBG(fmt...)
31#endif
23 32
24extern void slb_allocate(unsigned long ea); 33extern void slb_allocate_realmode(unsigned long ea);
34extern void slb_allocate_user(unsigned long ea);
35
36static void slb_allocate(unsigned long ea)
37{
38 /* Currently, we do real mode for all SLBs including user, but
39 * that will change if we bring back dynamic VSIDs
40 */
41 slb_allocate_realmode(ea);
42}
25 43
26static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) 44static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
27{ 45{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
46{ 64{
47 /* If you change this make sure you change SLB_NUM_BOLTED 65 /* If you change this make sure you change SLB_NUM_BOLTED
48 * appropriately too. */ 66 * appropriately too. */
49 unsigned long ksp_flags = SLB_VSID_KERNEL; 67 unsigned long linear_llp, virtual_llp, lflags, vflags;
50 unsigned long ksp_esid_data; 68 unsigned long ksp_esid_data;
51 69
52 WARN_ON(!irqs_disabled()); 70 WARN_ON(!irqs_disabled());
53 71
54 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 72 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
55 ksp_flags |= SLB_VSID_L; 73 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
74 lflags = SLB_VSID_KERNEL | linear_llp;
75 vflags = SLB_VSID_KERNEL | virtual_llp;
56 76
57 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); 77 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
58 if ((ksp_esid_data & ESID_MASK) == KERNELBASE) 78 if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
67 /* Slot 2 - kernel stack */ 87 /* Slot 2 - kernel stack */
68 "slbmte %2,%3\n" 88 "slbmte %2,%3\n"
69 "isync" 89 "isync"
70 :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), 90 :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
71 "r"(mk_esid_data(VMALLOCBASE, 1)), 91 "r"(mk_esid_data(VMALLOCBASE, 1)),
72 "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), 92 "r"(mk_vsid_data(ksp_esid_data, lflags)),
73 "r"(ksp_esid_data) 93 "r"(ksp_esid_data)
74 : "memory"); 94 : "memory");
75} 95}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
102 122
103 get_paca()->slb_cache_ptr = 0; 123 get_paca()->slb_cache_ptr = 0;
104 get_paca()->context = mm->context; 124 get_paca()->context = mm->context;
125#ifdef CONFIG_PPC_64K_PAGES
126 get_paca()->pgdir = mm->pgd;
127#endif /* CONFIG_PPC_64K_PAGES */
105 128
106 /* 129 /*
107 * preload some userspace segments into the SLB. 130 * preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
131 slb_allocate(unmapped_base); 154 slb_allocate(unmapped_base);
132} 155}
133 156
157static inline void patch_slb_encoding(unsigned int *insn_addr,
158 unsigned int immed)
159{
160 /* Assume the instruction had a "0" immediate value, just
161 * "or" in the new value
162 */
163 *insn_addr |= immed;
164 flush_icache_range((unsigned long)insn_addr, 4+
165 (unsigned long)insn_addr);
166}
167
134void slb_initialize(void) 168void slb_initialize(void)
135{ 169{
170 unsigned long linear_llp, virtual_llp;
171 static int slb_encoding_inited;
172 extern unsigned int *slb_miss_kernel_load_linear;
173 extern unsigned int *slb_miss_kernel_load_virtual;
174 extern unsigned int *slb_miss_user_load_normal;
175#ifdef CONFIG_HUGETLB_PAGE
176 extern unsigned int *slb_miss_user_load_huge;
177 unsigned long huge_llp;
178
179 huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
180#endif
181
182 /* Prepare our SLB miss handler based on our page size */
183 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
184 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
185 if (!slb_encoding_inited) {
186 slb_encoding_inited = 1;
187 patch_slb_encoding(slb_miss_kernel_load_linear,
188 SLB_VSID_KERNEL | linear_llp);
189 patch_slb_encoding(slb_miss_kernel_load_virtual,
190 SLB_VSID_KERNEL | virtual_llp);
191 patch_slb_encoding(slb_miss_user_load_normal,
192 SLB_VSID_USER | virtual_llp);
193
194 DBG("SLB: linear LLP = %04x\n", linear_llp);
195 DBG("SLB: virtual LLP = %04x\n", virtual_llp);
196#ifdef CONFIG_HUGETLB_PAGE
197 patch_slb_encoding(slb_miss_user_load_huge,
198 SLB_VSID_USER | huge_llp);
199 DBG("SLB: huge LLP = %04x\n", huge_llp);
200#endif
201 }
202
136 /* On iSeries the bolted entries have already been set up by 203 /* On iSeries the bolted entries have already been set up by
137 * the hypervisor from the lparMap data in head.S */ 204 * the hypervisor from the lparMap data in head.S */
138#ifndef CONFIG_PPC_ISERIES 205#ifndef CONFIG_PPC_ISERIES
139 unsigned long flags = SLB_VSID_KERNEL; 206 {
207 unsigned long lflags, vflags;
140 208
141 /* Invalidate the entire SLB (even slot 0) & all the ERATS */ 209 lflags = SLB_VSID_KERNEL | linear_llp;
142 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 210 vflags = SLB_VSID_KERNEL | virtual_llp;
143 flags |= SLB_VSID_L;
144 211
145 asm volatile("isync":::"memory"); 212 /* Invalidate the entire SLB (even slot 0) & all the ERATS */
146 asm volatile("slbmte %0,%0"::"r" (0) : "memory"); 213 asm volatile("isync":::"memory");
214 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
147 asm volatile("isync; slbia; isync":::"memory"); 215 asm volatile("isync; slbia; isync":::"memory");
148 create_slbe(KERNELBASE, flags, 0); 216 create_slbe(KERNELBASE, lflags, 0);
149 create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); 217
218 /* VMALLOC space has 4K pages always for now */
219 create_slbe(VMALLOCBASE, vflags, 1);
220
150 /* We don't bolt the stack for the time being - we're in boot, 221 /* We don't bolt the stack for the time being - we're in boot,
151 * so the stack is in the bolted segment. By the time it goes 222 * so the stack is in the bolted segment. By the time it goes
152 * elsewhere, we'll call _switch() which will bolt in the new 223 * elsewhere, we'll call _switch() which will bolt in the new
153 * one. */ 224 * one. */
154 asm volatile("isync":::"memory"); 225 asm volatile("isync":::"memory");
155#endif 226 }
227#endif /* CONFIG_PPC_ISERIES */
156 228
157 get_paca()->stab_rr = SLB_NUM_BOLTED; 229 get_paca()->stab_rr = SLB_NUM_BOLTED;
158} 230}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503bc..3e18241b6f35 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
18 18
19#include <linux/config.h> 19#include <linux/config.h>
20#include <asm/processor.h> 20#include <asm/processor.h>
21#include <asm/page.h>
22#include <asm/mmu.h>
23#include <asm/ppc_asm.h> 21#include <asm/ppc_asm.h>
24#include <asm/asm-offsets.h> 22#include <asm/asm-offsets.h>
25#include <asm/cputable.h> 23#include <asm/cputable.h>
24#include <asm/page.h>
25#include <asm/mmu.h>
26#include <asm/pgtable.h>
26 27
27/* void slb_allocate(unsigned long ea); 28/* void slb_allocate_realmode(unsigned long ea);
28 * 29 *
29 * Create an SLB entry for the given EA (user or kernel). 30 * Create an SLB entry for the given EA (user or kernel).
30 * r3 = faulting address, r13 = PACA 31 * r3 = faulting address, r13 = PACA
31 * r9, r10, r11 are clobbered by this function 32 * r9, r10, r11 are clobbered by this function
32 * No other registers are examined or changed. 33 * No other registers are examined or changed.
33 */ 34 */
34_GLOBAL(slb_allocate) 35_GLOBAL(slb_allocate_realmode)
35 /* 36 /* r3 = faulting address */
36 * First find a slot, round robin. Previously we tried to find
37 * a free slot first but that took too long. Unfortunately we
38 * dont have any LRU information to help us choose a slot.
39 */
40#ifdef CONFIG_PPC_ISERIES
41 /*
42 * On iSeries, the "bolted" stack segment can be cast out on
43 * shared processor switch so we need to check for a miss on
44 * it and restore it to the right slot.
45 */
46 ld r9,PACAKSAVE(r13)
47 clrrdi r9,r9,28
48 clrrdi r11,r3,28
49 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
50 cmpld r9,r11
51 beq 3f
52#endif /* CONFIG_PPC_ISERIES */
53
54 ld r10,PACASTABRR(r13)
55 addi r10,r10,1
56 /* use a cpu feature mask if we ever change our slb size */
57 cmpldi r10,SLB_NUM_ENTRIES
58
59 blt+ 4f
60 li r10,SLB_NUM_BOLTED
61
624:
63 std r10,PACASTABRR(r13)
643:
65 /* r3 = faulting address, r10 = entry */
66 37
67 srdi r9,r3,60 /* get region */ 38 srdi r9,r3,60 /* get region */
68 srdi r3,r3,28 /* get esid */ 39 srdi r10,r3,28 /* get esid */
69 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ 40 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
70 41
71 rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ 42 /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
72 oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
73
74 /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
75
76 blt cr7,0f /* user or kernel? */ 43 blt cr7,0f /* user or kernel? */
77 44
78 /* kernel address: proto-VSID = ESID */ 45 /* kernel address: proto-VSID = ESID */
@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
81 * top segment. That's ok, the scramble below will translate 48 * top segment. That's ok, the scramble below will translate
82 * it to VSID 0, which is reserved as a bad VSID - one which 49 * it to VSID 0, which is reserved as a bad VSID - one which
83 * will never have any pages in it. */ 50 * will never have any pages in it. */
84 li r11,SLB_VSID_KERNEL
85BEGIN_FTR_SECTION
86 bne cr7,9f
87 li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
88END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
89 b 9f
90 51
910: /* user address: proto-VSID = context<<15 | ESID */ 52 /* Check if hitting the linear mapping of the vmalloc/ioremap
92 srdi. r9,r3,USER_ESID_BITS 53 * kernel space
54 */
55 bne cr7,1f
56
57 /* Linear mapping encoding bits, the "li" instruction below will
58 * be patched by the kernel at boot
59 */
60_GLOBAL(slb_miss_kernel_load_linear)
61 li r11,0
62 b slb_finish_load
63
641: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
65 * will be patched by the kernel at boot
66 */
67_GLOBAL(slb_miss_kernel_load_virtual)
68 li r11,0
69 b slb_finish_load
70
71
720: /* user address: proto-VSID = context << 15 | ESID. First check
73 * if the address is within the boundaries of the user region
74 */
75 srdi. r9,r10,USER_ESID_BITS
93 bne- 8f /* invalid ea bits set */ 76 bne- 8f /* invalid ea bits set */
94 77
78 /* Figure out if the segment contains huge pages */
95#ifdef CONFIG_HUGETLB_PAGE 79#ifdef CONFIG_HUGETLB_PAGE
96BEGIN_FTR_SECTION 80BEGIN_FTR_SECTION
81 b 1f
82END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
97 lhz r9,PACAHIGHHTLBAREAS(r13) 83 lhz r9,PACAHIGHHTLBAREAS(r13)
98 srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) 84 srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
99 srd r9,r9,r11 85 srd r9,r9,r11
100 lhz r11,PACALOWHTLBAREAS(r13) 86 lhz r11,PACALOWHTLBAREAS(r13)
101 srd r11,r11,r3 87 srd r11,r11,r10
102 or r9,r9,r11 88 or. r9,r9,r11
103END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 89 beq 1f
90_GLOBAL(slb_miss_user_load_huge)
91 li r11,0
92 b 2f
931:
104#endif /* CONFIG_HUGETLB_PAGE */ 94#endif /* CONFIG_HUGETLB_PAGE */
105 95
106 li r11,SLB_VSID_USER 96_GLOBAL(slb_miss_user_load_normal)
97 li r11,0
107 98
108#ifdef CONFIG_HUGETLB_PAGE 992:
109BEGIN_FTR_SECTION 100 ld r9,PACACONTEXTID(r13)
110 rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */ 101 rldimi r10,r9,USER_ESID_BITS,0
111END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 102 b slb_finish_load
112#endif /* CONFIG_HUGETLB_PAGE */ 103
1048: /* invalid EA */
105 li r10,0 /* BAD_VSID */
106 li r11,SLB_VSID_USER /* flags don't much matter */
107 b slb_finish_load
108
109#ifdef __DISABLED__
110
111/* void slb_allocate_user(unsigned long ea);
112 *
113 * Create an SLB entry for the given EA (user or kernel).
114 * r3 = faulting address, r13 = PACA
115 * r9, r10, r11 are clobbered by this function
116 * No other registers are examined or changed.
117 *
118 * It is called with translation enabled in order to be able to walk the
119 * page tables. This is not currently used.
120 */
121_GLOBAL(slb_allocate_user)
122 /* r3 = faulting address */
123 srdi r10,r3,28 /* get esid */
124
125 crset 4*cr7+lt /* set "user" flag for later */
126
127 /* check if we fit in the range covered by the pagetables*/
128 srdi. r9,r3,PGTABLE_EADDR_SIZE
129 crnot 4*cr0+eq,4*cr0+eq
130 beqlr
113 131
132 /* now we need to get to the page tables in order to get the page
133 * size encoding from the PMD. In the future, we'll be able to deal
134 * with 1T segments too by getting the encoding from the PGD instead
135 */
136 ld r9,PACAPGDIR(r13)
137 cmpldi cr0,r9,0
138 beqlr
139 rlwinm r11,r10,8,25,28
140 ldx r9,r9,r11 /* get pgd_t */
141 cmpldi cr0,r9,0
142 beqlr
143 rlwinm r11,r10,3,17,28
144 ldx r9,r9,r11 /* get pmd_t */
145 cmpldi cr0,r9,0
146 beqlr
147
148 /* build vsid flags */
149 andi. r11,r9,SLB_VSID_LLP
150 ori r11,r11,SLB_VSID_USER
151
152 /* get context to calculate proto-VSID */
114 ld r9,PACACONTEXTID(r13) 153 ld r9,PACACONTEXTID(r13)
115 rldimi r3,r9,USER_ESID_BITS,0 154 rldimi r10,r9,USER_ESID_BITS,0
155
156 /* fall through slb_finish_load */
157
158#endif /* __DISABLED__ */
116 159
1179: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
118 ASM_VSID_SCRAMBLE(r3,r9)
119 160
120 rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ 161/*
162 * Finish loading of an SLB entry and return
163 *
164 * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
165 */
166slb_finish_load:
167 ASM_VSID_SCRAMBLE(r10,r9)
168 rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
169
170 /* r3 = EA, r11 = VSID data */
171 /*
172 * Find a slot, round robin. Previously we tried to find a
173 * free slot first but that took too long. Unfortunately we
174 * dont have any LRU information to help us choose a slot.
175 */
176#ifdef CONFIG_PPC_ISERIES
177 /*
178 * On iSeries, the "bolted" stack segment can be cast out on
179 * shared processor switch so we need to check for a miss on
180 * it and restore it to the right slot.
181 */
182 ld r9,PACAKSAVE(r13)
183 clrrdi r9,r9,28
184 clrrdi r3,r3,28
185 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
186 cmpld r9,r3
187 beq 3f
188#endif /* CONFIG_PPC_ISERIES */
189
190 ld r10,PACASTABRR(r13)
191 addi r10,r10,1
192 /* use a cpu feature mask if we ever change our slb size */
193 cmpldi r10,SLB_NUM_ENTRIES
194
195 blt+ 4f
196 li r10,SLB_NUM_BOLTED
197
1984:
199 std r10,PACASTABRR(r13)
200
2013:
202 rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
203 oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
204
205 /* r3 = ESID data, r11 = VSID data */
121 206
122 /* 207 /*
123 * No need for an isync before or after this slbmte. The exception 208 * No need for an isync before or after this slbmte. The exception
@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
125 */ 210 */
126 slbmte r11,r10 211 slbmte r11,r10
127 212
128 bgelr cr7 /* we're done for kernel addresses */ 213 /* we're done for kernel addresses */
214 crclr 4*cr0+eq /* set result to "success" */
215 bgelr cr7
129 216
130 /* Update the slb cache */ 217 /* Update the slb cache */
131 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 218 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
143 li r3,SLB_CACHE_ENTRIES+1 230 li r3,SLB_CACHE_ENTRIES+1
1442: 2312:
145 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 232 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
233 crclr 4*cr0+eq /* set result to "success" */
146 blr 234 blr
147 235
1488: /* invalid EA */
149 li r3,0 /* BAD_VSID */
150 li r11,SLB_VSID_USER /* flags don't much matter */
151 b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf27..fa325dbf98fc 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -26,7 +26,6 @@ struct stab_entry {
26 unsigned long vsid_data; 26 unsigned long vsid_data;
27}; 27};
28 28
29/* Both the segment table and SLB code uses the following cache */
30#define NR_STAB_CACHE_ENTRIES 8 29#define NR_STAB_CACHE_ENTRIES 8
31DEFINE_PER_CPU(long, stab_cache_ptr); 30DEFINE_PER_CPU(long, stab_cache_ptr);
32DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); 31DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
186 /* Never flush the first entry. */ 185 /* Never flush the first entry. */
187 ste += 1; 186 ste += 1;
188 for (entry = 1; 187 for (entry = 1;
189 entry < (PAGE_SIZE / sizeof(struct stab_entry)); 188 entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
190 entry++, ste++) { 189 entry++, ste++) {
191 unsigned long ea; 190 unsigned long ea;
192 ea = ste->esid_data & ESID_MASK; 191 ea = ste->esid_data & ESID_MASK;
@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
200 199
201 __get_cpu_var(stab_cache_ptr) = 0; 200 __get_cpu_var(stab_cache_ptr) = 0;
202 201
202#ifdef CONFIG_PPC_64K_PAGES
203 get_paca()->pgdir = mm->pgd;
204#endif /* CONFIG_PPC_64K_PAGES */
205
203 /* Now preload some entries for the new task */ 206 /* Now preload some entries for the new task */
204 if (test_tsk_thread_flag(tsk, TIF_32BIT)) 207 if (test_tsk_thread_flag(tsk, TIF_32BIT))
205 unmapped_base = TASK_UNMAPPED_BASE_USER32; 208 unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
223 asm volatile("sync" : : : "memory"); 226 asm volatile("sync" : : : "memory");
224} 227}
225 228
226extern void slb_initialize(void);
227
228/* 229/*
229 * Allocate segment tables for secondary CPUs. These must all go in 230 * Allocate segment tables for secondary CPUs. These must all go in
230 * the first (bolted) segment, so that do_stab_bolted won't get a 231 * the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +244,21 @@ void stabs_alloc(void)
243 if (cpu == 0) 244 if (cpu == 0)
244 continue; /* stab for CPU 0 is statically allocated */ 245 continue; /* stab for CPU 0 is statically allocated */
245 246
246 newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT); 247 newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
248 1<<SID_SHIFT);
247 if (! newstab) 249 if (! newstab)
248 panic("Unable to allocate segment table for CPU %d.\n", 250 panic("Unable to allocate segment table for CPU %d.\n",
249 cpu); 251 cpu);
250 252
251 newstab += KERNELBASE; 253 newstab += KERNELBASE;
252 254
253 memset((void *)newstab, 0, PAGE_SIZE); 255 memset((void *)newstab, 0, HW_PAGE_SIZE);
254 256
255 paca[cpu].stab_addr = newstab; 257 paca[cpu].stab_addr = newstab;
256 paca[cpu].stab_real = virt_to_abs(newstab); 258 paca[cpu].stab_real = virt_to_abs(newstab);
257 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); 259 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
260 "virtual, 0x%lx absolute\n",
261 cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
258 } 262 }
259} 263}
260 264
@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
267{ 271{
268 unsigned long vsid = get_kernel_vsid(KERNELBASE); 272 unsigned long vsid = get_kernel_vsid(KERNELBASE);
269 273
270 if (cpu_has_feature(CPU_FTR_SLB)) { 274 asm volatile("isync; slbia; isync":::"memory");
271 slb_initialize(); 275 make_ste(stab, GET_ESID(KERNELBASE), vsid);
272 } else {
273 asm volatile("isync; slbia; isync":::"memory");
274 make_ste(stab, GET_ESID(KERNELBASE), vsid);
275 276
276 /* Order update */ 277 /* Order update */
277 asm volatile("sync":::"memory"); 278 asm volatile("sync":::"memory");
278 }
279} 279}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4f..53e31b834ace 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
21 * as published by the Free Software Foundation; either version 21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version. 22 * 2 of the License, or (at your option) any later version.
23 */ 23 */
24
24#include <linux/config.h> 25#include <linux/config.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
26#include <linux/mm.h> 27#include <linux/mm.h>
@@ -30,7 +31,7 @@
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
33#include <linux/highmem.h> 34#include <asm/bug.h>
34 35
35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 36DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
36 37
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
126 * (if we remove it we should clear the _PTE_HPTEFLAGS bits). 127 * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
127 */ 128 */
128void hpte_update(struct mm_struct *mm, unsigned long addr, 129void hpte_update(struct mm_struct *mm, unsigned long addr,
129 unsigned long pte, int wrprot) 130 pte_t *ptep, unsigned long pte, int huge)
130{ 131{
131 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 132 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
132 unsigned long vsid; 133 unsigned long vsid;
134 unsigned int psize = mmu_virtual_psize;
133 int i; 135 int i;
134 136
135 i = batch->index; 137 i = batch->index;
136 138
139 /* We mask the address for the base page size. Huge pages will
140 * have applied their own masking already
141 */
142 addr &= PAGE_MASK;
143
144 /* Get page size (maybe move back to caller) */
145 if (huge) {
146#ifdef CONFIG_HUGETLB_PAGE
147 psize = mmu_huge_psize;
148#else
149 BUG();
150#endif
151 }
152
137 /* 153 /*
138 * This can happen when we are in the middle of a TLB batch and 154 * This can happen when we are in the middle of a TLB batch and
139 * we encounter memory pressure (eg copy_page_range when it tries 155 * we encounter memory pressure (eg copy_page_range when it tries
140 * to allocate a new pte). If we have to reclaim memory and end 156 * to allocate a new pte). If we have to reclaim memory and end
141 * up scanning and resetting referenced bits then our batch context 157 * up scanning and resetting referenced bits then our batch context
142 * will change mid stream. 158 * will change mid stream.
159 *
160 * We also need to ensure only one page size is present in a given
161 * batch
143 */ 162 */
144 if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) { 163 if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
145 flush_tlb_pending(); 164 flush_tlb_pending();
146 i = 0; 165 i = 0;
147 } 166 }
148 if (i == 0) { 167 if (i == 0) {
149 batch->mm = mm; 168 batch->mm = mm;
150 batch->large = pte_huge(pte); 169 batch->psize = psize;
151 } 170 }
152 if (addr < KERNELBASE) { 171 if (addr < KERNELBASE) {
153 vsid = get_vsid(mm->context.id, addr); 172 vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
155 } else 174 } else
156 vsid = get_kernel_vsid(addr); 175 vsid = get_kernel_vsid(addr);
157 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); 176 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
158 batch->pte[i] = __pte(pte); 177 batch->pte[i] = __real_pte(__pte(pte), ptep);
159 batch->index = ++i; 178 batch->index = ++i;
160 if (i >= PPC64_TLB_BATCH_NR) 179 if (i >= PPC64_TLB_BATCH_NR)
161 flush_tlb_pending(); 180 flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
177 local = 1; 196 local = 1;
178 197
179 if (i == 1) 198 if (i == 1)
180 flush_hash_page(batch->vaddr[0], batch->pte[0], local); 199 flush_hash_page(batch->vaddr[0], batch->pte[0],
200 batch->psize, local);
181 else 201 else
182 flush_hash_range(i, local); 202 flush_hash_range(i, local);
183 batch->index = 0; 203 batch->index = 0;