aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2005-11-06 22:42:09 -0500
committerPaul Mackerras <paulus@samba.org>2005-11-06 22:42:09 -0500
commitc6135234550ed89a6fd0e8cb229633967e41d649 (patch)
tree22cef33e314839c4fb30d6fc888c0caa2a0f6602 /arch/powerpc/mm
parent76032de898f34db55b5048349db56557828a1390 (diff)
parent0b154bb7d0cce80e9c0bcf11d4f9e71b59409d26 (diff)
Merge ../linux-2.6
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c532
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
-rw-r--r--arch/powerpc/mm/init_64.c18
-rw-r--r--arch/powerpc/mm/mem.c56
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S220
-rw-r--r--arch/powerpc/mm/stab.c30
-rw-r--r--arch/powerpc/mm/tlb_64.c32
12 files changed, 1637 insertions, 514 deletions
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eeea..e0d02c4a2615 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * ppc64 MMU hashtable management routines 2 * ppc64 MMU hashtable management routines
3 * 3 *
4 * (c) Copyright IBM Corp. 2003 4 * (c) Copyright IBM Corp. 2003, 2005
5 * 5 *
6 * Maintained by: Benjamin Herrenschmidt 6 * Maintained by: Benjamin Herrenschmidt
7 * <benh@kernel.crashing.org> 7 * <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
10 * described in the kernel's COPYING file. 10 * described in the kernel's COPYING file.
11 */ 11 */
12 12
13#include <linux/config.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/mmu.h> 16#include <asm/mmu.h>
@@ -42,14 +43,24 @@
42/* Save non-volatile offsets */ 43/* Save non-volatile offsets */
43#define STK_REG(i) (112 + ((i)-14)*8) 44#define STK_REG(i) (112 + ((i)-14)*8)
44 45
46
47#ifndef CONFIG_PPC_64K_PAGES
48
49/*****************************************************************************
50 * *
51 * 4K SW & 4K HW pages implementation *
52 * *
53 *****************************************************************************/
54
55
45/* 56/*
46 * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, 57 * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
47 * pte_t *ptep, unsigned long trap, int local) 58 * pte_t *ptep, unsigned long trap, int local)
48 * 59 *
49 * Adds a page to the hash table. This is the non-LPAR version for now 60 * Adds a 4K page to the hash table in a segment of 4K pages only
50 */ 61 */
51 62
52_GLOBAL(__hash_page) 63_GLOBAL(__hash_page_4K)
53 mflr r0 64 mflr r0
54 std r0,16(r1) 65 std r0,16(r1)
55 stdu r1,-STACKFRAMESIZE(r1) 66 stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
88 /* If so, just bail out and refault if needed. Someone else 99 /* If so, just bail out and refault if needed. Someone else
89 * is changing this PTE anyway and might hash it. 100 * is changing this PTE anyway and might hash it.
90 */ 101 */
91 bne- bail_ok 102 bne- htab_bail_ok
103
92 /* Prepare new PTE value (turn access RW into DIRTY, then 104 /* Prepare new PTE value (turn access RW into DIRTY, then
93 * add BUSY,HASHPTE and ACCESSED) 105 * add BUSY,HASHPTE and ACCESSED)
94 */ 106 */
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
118 130
119 /* Convert linux PTE bits into HW equivalents */ 131 /* Convert linux PTE bits into HW equivalents */
120 andi. r3,r30,0x1fe /* Get basic set of flags */ 132 andi. r3,r30,0x1fe /* Get basic set of flags */
121 xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ 133 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
122 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ 134 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
123 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ 135 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
124 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ 136 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
125 andc r0,r30,r0 /* r0 = pte & ~r0 */ 137 andc r0,r30,r0 /* r0 = pte & ~r0 */
126 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ 138 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
127 139
@@ -158,19 +170,21 @@ htab_insert_pte:
158 andc r30,r30,r0 170 andc r30,r30,r0
159 ori r30,r30,_PAGE_HASHPTE 171 ori r30,r30,_PAGE_HASHPTE
160 172
161 /* page number in r5 */ 173 /* physical address r5 */
162 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 174 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
175 sldi r5,r5,PAGE_SHIFT
163 176
164 /* Calculate primary group hash */ 177 /* Calculate primary group hash */
165 and r0,r28,r27 178 and r0,r28,r27
166 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 179 rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
167 180
168 /* Call ppc_md.hpte_insert */ 181 /* Call ppc_md.hpte_insert */
169 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 182 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
170 mr r4,r29 /* Retreive va */ 183 mr r4,r29 /* Retreive va */
171 li r6,0 /* no vflags */ 184 li r7,0 /* !bolted, !secondary */
185 li r8,MMU_PAGE_4K /* page size */
172_GLOBAL(htab_call_hpte_insert1) 186_GLOBAL(htab_call_hpte_insert1)
173 bl . /* Will be patched by htab_finish_init() */ 187 bl . /* Patched by htab_finish_init() */
174 cmpdi 0,r3,0 188 cmpdi 0,r3,0
175 bge htab_pte_insert_ok /* Insertion successful */ 189 bge htab_pte_insert_ok /* Insertion successful */
176 cmpdi 0,r3,-2 /* Critical failure */ 190 cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
178 192
179 /* Now try secondary slot */ 193 /* Now try secondary slot */
180 194
181 /* page number in r5 */ 195 /* physical address r5 */
182 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 196 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
197 sldi r5,r5,PAGE_SHIFT
183 198
184 /* Calculate secondary group hash */ 199 /* Calculate secondary group hash */
185 andc r0,r27,r28 200 andc r0,r27,r28
186 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 201 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
187 202
188 /* Call ppc_md.hpte_insert */ 203 /* Call ppc_md.hpte_insert */
189 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 204 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
190 mr r4,r29 /* Retreive va */ 205 mr r4,r29 /* Retreive va */
191 li r6,HPTE_V_SECONDARY@l /* secondary slot */ 206 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
207 li r8,MMU_PAGE_4K /* page size */
192_GLOBAL(htab_call_hpte_insert2) 208_GLOBAL(htab_call_hpte_insert2)
193 bl . /* Will be patched by htab_finish_init() */ 209 bl . /* Patched by htab_finish_init() */
194 cmpdi 0,r3,0 210 cmpdi 0,r3,0
195 bge+ htab_pte_insert_ok /* Insertion successful */ 211 bge+ htab_pte_insert_ok /* Insertion successful */
196 cmpdi 0,r3,-2 /* Critical failure */ 212 cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
207 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 223 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
208 /* Call ppc_md.hpte_remove */ 224 /* Call ppc_md.hpte_remove */
209_GLOBAL(htab_call_hpte_remove) 225_GLOBAL(htab_call_hpte_remove)
210 bl . /* Will be patched by htab_finish_init() */ 226 bl . /* Patched by htab_finish_init() */
211 227
212 /* Try all again */ 228 /* Try all again */
213 b htab_insert_pte 229 b htab_insert_pte
214 230
215bail_ok: 231htab_bail_ok:
216 li r3,0 232 li r3,0
217 b bail 233 b htab_bail
218 234
219htab_pte_insert_ok: 235htab_pte_insert_ok:
220 /* Insert slot number & secondary bit in PTE */ 236 /* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
227 ld r6,STK_PARM(r6)(r1) 243 ld r6,STK_PARM(r6)(r1)
228 std r30,0(r6) 244 std r30,0(r6)
229 li r3, 0 245 li r3, 0
230bail: 246htab_bail:
231 ld r27,STK_REG(r27)(r1) 247 ld r27,STK_REG(r27)(r1)
232 ld r28,STK_REG(r28)(r1) 248 ld r28,STK_REG(r28)(r1)
233 ld r29,STK_REG(r29)(r1) 249 ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
256 272
257 /* Call ppc_md.hpte_updatepp */ 273 /* Call ppc_md.hpte_updatepp */
258 mr r5,r29 /* va */ 274 mr r5,r29 /* va */
259 li r6,0 /* large is 0 */ 275 li r6,MMU_PAGE_4K /* page size */
260 ld r7,STK_PARM(r8)(r1) /* get "local" param */ 276 ld r7,STK_PARM(r8)(r1) /* get "local" param */
261_GLOBAL(htab_call_hpte_updatepp) 277_GLOBAL(htab_call_hpte_updatepp)
262 bl . /* Will be patched by htab_finish_init() */ 278 bl . /* Patched by htab_finish_init() */
263 279
264 /* if we failed because typically the HPTE wasn't really here 280 /* if we failed because typically the HPTE wasn't really here
265 * we try an insertion. 281 * we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
276 /* Bail out clearing reservation */ 292 /* Bail out clearing reservation */
277 stdcx. r31,0,r6 293 stdcx. r31,0,r6
278 li r3,1 294 li r3,1
279 b bail 295 b htab_bail
296
297htab_pte_insert_failure:
298 /* Bail out restoring old PTE */
299 ld r6,STK_PARM(r6)(r1)
300 std r31,0(r6)
301 li r3,-1
302 b htab_bail
303
304
305#else /* CONFIG_PPC_64K_PAGES */
306
307
308/*****************************************************************************
309 * *
310 * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
311 * *
312 *****************************************************************************/
313
314/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
315 * pte_t *ptep, unsigned long trap, int local)
316 */
317
318/*
319 * For now, we do NOT implement Admixed pages
320 */
321_GLOBAL(__hash_page_4K)
322 mflr r0
323 std r0,16(r1)
324 stdu r1,-STACKFRAMESIZE(r1)
325 /* Save all params that we need after a function call */
326 std r6,STK_PARM(r6)(r1)
327 std r8,STK_PARM(r8)(r1)
328
329 /* Add _PAGE_PRESENT to access */
330 ori r4,r4,_PAGE_PRESENT
331
332 /* Save non-volatile registers.
333 * r31 will hold "old PTE"
334 * r30 is "new PTE"
335 * r29 is "va"
336 * r28 is a hash value
337 * r27 is hashtab mask (maybe dynamic patched instead ?)
338 * r26 is the hidx mask
339 * r25 is the index in combo page
340 */
341 std r25,STK_REG(r25)(r1)
342 std r26,STK_REG(r26)(r1)
343 std r27,STK_REG(r27)(r1)
344 std r28,STK_REG(r28)(r1)
345 std r29,STK_REG(r29)(r1)
346 std r30,STK_REG(r30)(r1)
347 std r31,STK_REG(r31)(r1)
348
349 /* Step 1:
350 *
351 * Check permissions, atomically mark the linux PTE busy
352 * and hashed.
353 */
3541:
355 ldarx r31,0,r6
356 /* Check access rights (access & ~(pte_val(*ptep))) */
357 andc. r0,r4,r31
358 bne- htab_wrong_access
359 /* Check if PTE is busy */
360 andi. r0,r31,_PAGE_BUSY
361 /* If so, just bail out and refault if needed. Someone else
362 * is changing this PTE anyway and might hash it.
363 */
364 bne- htab_bail_ok
365 /* Prepare new PTE value (turn access RW into DIRTY, then
366 * add BUSY and ACCESSED)
367 */
368 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
369 or r30,r30,r31
370 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
371 /* Write the linux PTE atomically (setting busy) */
372 stdcx. r30,0,r6
373 bne- 1b
374 isync
375
376 /* Step 2:
377 *
378 * Insert/Update the HPTE in the hash table. At this point,
379 * r4 (access) is re-useable, we use it for the new HPTE flags
380 */
381
382 /* Load the hidx index */
383 rldicl r25,r3,64-12,60
384
385 /* Calc va and put it in r29 */
386 rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
387 rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
388 or r29,r3,r29 /* r29 = va
389
390 /* Calculate hash value for primary slot and store it in r28 */
391 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
392 rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
393 xor r28,r5,r0
394
395 /* Convert linux PTE bits into HW equivalents */
396 andi. r3,r30,0x1fe /* Get basic set of flags */
397 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
398 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
399 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
400 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
401 andc r0,r30,r0 /* r0 = pte & ~r0 */
402 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
403
404 /* We eventually do the icache sync here (maybe inline that
405 * code rather than call a C function...)
406 */
407BEGIN_FTR_SECTION
408 mr r4,r30
409 mr r5,r7
410 bl .hash_page_do_lazy_icache
411END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
412
413 /* At this point, r3 contains new PP bits, save them in
414 * place of "access" in the param area (sic)
415 */
416 std r3,STK_PARM(r4)(r1)
417
418 /* Get htab_hash_mask */
419 ld r4,htab_hash_mask@got(2)
420 ld r27,0(r4) /* htab_hash_mask -> r27 */
421
422 /* Check if we may already be in the hashtable, in this case, we
423 * go to out-of-line code to try to modify the HPTE. We look for
424 * the bit at (1 >> (index + 32))
425 */
426 andi. r0,r31,_PAGE_HASHPTE
427 li r26,0 /* Default hidx */
428 beq htab_insert_pte
429 ld r6,STK_PARM(r6)(r1)
430 ori r26,r6,0x8000 /* Load the hidx mask */
431 ld r26,0(r26)
432 addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
433 rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
434 bne htab_modify_pte
435
436htab_insert_pte:
437 /* real page number in r5, PTE RPN value + index */
438 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
439 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
440 add r5,r5,r25
441 sldi r5,r5,HW_PAGE_SHIFT
442
443 /* Calculate primary group hash */
444 and r0,r28,r27
445 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
446
447 /* Call ppc_md.hpte_insert */
448 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
449 mr r4,r29 /* Retreive va */
450 li r7,0 /* !bolted, !secondary */
451 li r8,MMU_PAGE_4K /* page size */
452_GLOBAL(htab_call_hpte_insert1)
453 bl . /* patched by htab_finish_init() */
454 cmpdi 0,r3,0
455 bge htab_pte_insert_ok /* Insertion successful */
456 cmpdi 0,r3,-2 /* Critical failure */
457 beq- htab_pte_insert_failure
458
459 /* Now try secondary slot */
460
461 /* real page number in r5, PTE RPN value + index */
462 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
463 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
464 add r5,r5,r25
465 sldi r5,r5,HW_PAGE_SHIFT
466
467 /* Calculate secondary group hash */
468 andc r0,r27,r28
469 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
470
471 /* Call ppc_md.hpte_insert */
472 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
473 mr r4,r29 /* Retreive va */
474 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
475 li r8,MMU_PAGE_4K /* page size */
476_GLOBAL(htab_call_hpte_insert2)
477 bl . /* patched by htab_finish_init() */
478 cmpdi 0,r3,0
479 bge+ htab_pte_insert_ok /* Insertion successful */
480 cmpdi 0,r3,-2 /* Critical failure */
481 beq- htab_pte_insert_failure
482
483 /* Both are full, we need to evict something */
484 mftb r0
485 /* Pick a random group based on TB */
486 andi. r0,r0,1
487 mr r5,r28
488 bne 2f
489 not r5,r5
4902: and r0,r5,r27
491 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
492 /* Call ppc_md.hpte_remove */
493_GLOBAL(htab_call_hpte_remove)
494 bl . /* patched by htab_finish_init() */
495
496 /* Try all again */
497 b htab_insert_pte
498
499htab_bail_ok:
500 li r3,0
501 b htab_bail
502
503htab_pte_insert_ok:
504 /* Insert slot number & secondary bit in PTE second half,
505 * clear _PAGE_BUSY and set approriate HPTE slot bit
506 */
507 ld r6,STK_PARM(r6)(r1)
508 li r0,_PAGE_BUSY
509 andc r30,r30,r0
510 /* HPTE SUB bit */
511 li r0,1
512 subfic r5,r25,27 /* Must match bit position in */
513 sld r0,r0,r5 /* pgtable.h */
514 or r30,r30,r0
515 /* hindx */
516 sldi r5,r25,2
517 sld r3,r3,r5
518 li r4,0xf
519 sld r4,r4,r5
520 andc r26,r26,r4
521 or r26,r26,r3
522 ori r5,r6,0x8000
523 std r26,0(r5)
524 lwsync
525 std r30,0(r6)
526 li r3, 0
527htab_bail:
528 ld r25,STK_REG(r25)(r1)
529 ld r26,STK_REG(r26)(r1)
530 ld r27,STK_REG(r27)(r1)
531 ld r28,STK_REG(r28)(r1)
532 ld r29,STK_REG(r29)(r1)
533 ld r30,STK_REG(r30)(r1)
534 ld r31,STK_REG(r31)(r1)
535 addi r1,r1,STACKFRAMESIZE
536 ld r0,16(r1)
537 mtlr r0
538 blr
539
540htab_modify_pte:
541 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
542 mr r4,r3
543 sldi r5,r25,2
544 srd r3,r26,r5
545
546 /* Secondary group ? if yes, get a inverted hash value */
547 mr r5,r28
548 andi. r0,r3,0x8 /* page secondary ? */
549 beq 1f
550 not r5,r5
5511: andi. r3,r3,0x7 /* extract idx alone */
552
553 /* Calculate proper slot value for ppc_md.hpte_updatepp */
554 and r0,r5,r27
555 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
556 add r3,r0,r3 /* add slot idx */
557
558 /* Call ppc_md.hpte_updatepp */
559 mr r5,r29 /* va */
560 li r6,MMU_PAGE_4K /* page size */
561 ld r7,STK_PARM(r8)(r1) /* get "local" param */
562_GLOBAL(htab_call_hpte_updatepp)
563 bl . /* patched by htab_finish_init() */
564
565 /* if we failed because typically the HPTE wasn't really here
566 * we try an insertion.
567 */
568 cmpdi 0,r3,-1
569 beq- htab_insert_pte
570
571 /* Clear the BUSY bit and Write out the PTE */
572 li r0,_PAGE_BUSY
573 andc r30,r30,r0
574 ld r6,STK_PARM(r6)(r1)
575 std r30,0(r6)
576 li r3,0
577 b htab_bail
578
579htab_wrong_access:
580 /* Bail out clearing reservation */
581 stdcx. r31,0,r6
582 li r3,1
583 b htab_bail
280 584
281htab_pte_insert_failure: 585htab_pte_insert_failure:
282 /* Bail out restoring old PTE */ 586 /* Bail out restoring old PTE */
283 ld r6,STK_PARM(r6)(r1) 587 ld r6,STK_PARM(r6)(r1)
284 std r31,0(r6) 588 std r31,0(r6)
285 li r3,-1 589 li r3,-1
286 b bail 590 b htab_bail
591
592
593/*****************************************************************************
594 * *
595 * 64K SW & 64K HW in a 64K segment pages implementation *
596 * *
597 *****************************************************************************/
598
599_GLOBAL(__hash_page_64K)
600 mflr r0
601 std r0,16(r1)
602 stdu r1,-STACKFRAMESIZE(r1)
603 /* Save all params that we need after a function call */
604 std r6,STK_PARM(r6)(r1)
605 std r8,STK_PARM(r8)(r1)
606
607 /* Add _PAGE_PRESENT to access */
608 ori r4,r4,_PAGE_PRESENT
609
610 /* Save non-volatile registers.
611 * r31 will hold "old PTE"
612 * r30 is "new PTE"
613 * r29 is "va"
614 * r28 is a hash value
615 * r27 is hashtab mask (maybe dynamic patched instead ?)
616 */
617 std r27,STK_REG(r27)(r1)
618 std r28,STK_REG(r28)(r1)
619 std r29,STK_REG(r29)(r1)
620 std r30,STK_REG(r30)(r1)
621 std r31,STK_REG(r31)(r1)
622
623 /* Step 1:
624 *
625 * Check permissions, atomically mark the linux PTE busy
626 * and hashed.
627 */
6281:
629 ldarx r31,0,r6
630 /* Check access rights (access & ~(pte_val(*ptep))) */
631 andc. r0,r4,r31
632 bne- ht64_wrong_access
633 /* Check if PTE is busy */
634 andi. r0,r31,_PAGE_BUSY
635 /* If so, just bail out and refault if needed. Someone else
636 * is changing this PTE anyway and might hash it.
637 */
638 bne- ht64_bail_ok
639 /* Prepare new PTE value (turn access RW into DIRTY, then
640 * add BUSY,HASHPTE and ACCESSED)
641 */
642 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
643 or r30,r30,r31
644 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
645 /* Write the linux PTE atomically (setting busy) */
646 stdcx. r30,0,r6
647 bne- 1b
648 isync
649
650 /* Step 2:
651 *
652 * Insert/Update the HPTE in the hash table. At this point,
653 * r4 (access) is re-useable, we use it for the new HPTE flags
654 */
655
656 /* Calc va and put it in r29 */
657 rldicr r29,r5,28,63-28
658 rldicl r3,r3,0,36
659 or r29,r3,r29
660
661 /* Calculate hash value for primary slot and store it in r28 */
662 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
663 rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
664 xor r28,r5,r0
665
666 /* Convert linux PTE bits into HW equivalents */
667 andi. r3,r30,0x1fe /* Get basic set of flags */
668 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
669 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
670 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
671 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
672 andc r0,r30,r0 /* r0 = pte & ~r0 */
673 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
674
675 /* We eventually do the icache sync here (maybe inline that
676 * code rather than call a C function...)
677 */
678BEGIN_FTR_SECTION
679 mr r4,r30
680 mr r5,r7
681 bl .hash_page_do_lazy_icache
682END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
683
684 /* At this point, r3 contains new PP bits, save them in
685 * place of "access" in the param area (sic)
686 */
687 std r3,STK_PARM(r4)(r1)
688
689 /* Get htab_hash_mask */
690 ld r4,htab_hash_mask@got(2)
691 ld r27,0(r4) /* htab_hash_mask -> r27 */
692
693 /* Check if we may already be in the hashtable, in this case, we
694 * go to out-of-line code to try to modify the HPTE
695 */
696 andi. r0,r31,_PAGE_HASHPTE
697 bne ht64_modify_pte
698
699ht64_insert_pte:
700 /* Clear hpte bits in new pte (we also clear BUSY btw) and
701 * add _PAGE_HASHPTE
702 */
703 lis r0,_PAGE_HPTEFLAGS@h
704 ori r0,r0,_PAGE_HPTEFLAGS@l
705 andc r30,r30,r0
706 ori r30,r30,_PAGE_HASHPTE
707
708 /* Phyical address in r5 */
709 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
710 sldi r5,r5,PAGE_SHIFT
711
712 /* Calculate primary group hash */
713 and r0,r28,r27
714 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
715
716 /* Call ppc_md.hpte_insert */
717 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
718 mr r4,r29 /* Retreive va */
719 li r7,0 /* !bolted, !secondary */
720 li r8,MMU_PAGE_64K
721_GLOBAL(ht64_call_hpte_insert1)
722 bl . /* patched by htab_finish_init() */
723 cmpdi 0,r3,0
724 bge ht64_pte_insert_ok /* Insertion successful */
725 cmpdi 0,r3,-2 /* Critical failure */
726 beq- ht64_pte_insert_failure
727
728 /* Now try secondary slot */
729
730 /* Phyical address in r5 */
731 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
732 sldi r5,r5,PAGE_SHIFT
733
734 /* Calculate secondary group hash */
735 andc r0,r27,r28
736 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
737
738 /* Call ppc_md.hpte_insert */
739 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
740 mr r4,r29 /* Retreive va */
741 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
742 li r8,MMU_PAGE_64K
743_GLOBAL(ht64_call_hpte_insert2)
744 bl . /* patched by htab_finish_init() */
745 cmpdi 0,r3,0
746 bge+ ht64_pte_insert_ok /* Insertion successful */
747 cmpdi 0,r3,-2 /* Critical failure */
748 beq- ht64_pte_insert_failure
749
750 /* Both are full, we need to evict something */
751 mftb r0
752 /* Pick a random group based on TB */
753 andi. r0,r0,1
754 mr r5,r28
755 bne 2f
756 not r5,r5
7572: and r0,r5,r27
758 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
759 /* Call ppc_md.hpte_remove */
760_GLOBAL(ht64_call_hpte_remove)
761 bl . /* patched by htab_finish_init() */
762
763 /* Try all again */
764 b ht64_insert_pte
765
766ht64_bail_ok:
767 li r3,0
768 b ht64_bail
769
770ht64_pte_insert_ok:
771 /* Insert slot number & secondary bit in PTE */
772 rldimi r30,r3,12,63-15
773
774 /* Write out the PTE with a normal write
775 * (maybe add eieio may be good still ?)
776 */
777ht64_write_out_pte:
778 ld r6,STK_PARM(r6)(r1)
779 std r30,0(r6)
780 li r3, 0
781ht64_bail:
782 ld r27,STK_REG(r27)(r1)
783 ld r28,STK_REG(r28)(r1)
784 ld r29,STK_REG(r29)(r1)
785 ld r30,STK_REG(r30)(r1)
786 ld r31,STK_REG(r31)(r1)
787 addi r1,r1,STACKFRAMESIZE
788 ld r0,16(r1)
789 mtlr r0
790 blr
791
792ht64_modify_pte:
793 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
794 mr r4,r3
795 rlwinm r3,r31,32-12,29,31
796
797 /* Secondary group ? if yes, get a inverted hash value */
798 mr r5,r28
799 andi. r0,r31,_PAGE_F_SECOND
800 beq 1f
801 not r5,r5
8021:
803 /* Calculate proper slot value for ppc_md.hpte_updatepp */
804 and r0,r5,r27
805 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
806 add r3,r0,r3 /* add slot idx */
807
808 /* Call ppc_md.hpte_updatepp */
809 mr r5,r29 /* va */
810 li r6,MMU_PAGE_64K
811 ld r7,STK_PARM(r8)(r1) /* get "local" param */
812_GLOBAL(ht64_call_hpte_updatepp)
813 bl . /* patched by htab_finish_init() */
814
815 /* if we failed because typically the HPTE wasn't really here
816 * we try an insertion.
817 */
818 cmpdi 0,r3,-1
819 beq- ht64_insert_pte
820
821 /* Clear the BUSY bit and Write out the PTE */
822 li r0,_PAGE_BUSY
823 andc r30,r30,r0
824 b ht64_write_out_pte
825
826ht64_wrong_access:
827 /* Bail out clearing reservation */
828 stdcx. r31,0,r6
829 li r3,1
830 b ht64_bail
831
832ht64_pte_insert_failure:
833 /* Bail out restoring old PTE */
834 ld r6,STK_PARM(r6)(r1)
835 std r31,0(r6)
836 li r3,-1
837 b ht64_bail
838
839
840#endif /* CONFIG_PPC_64K_PAGES */
287 841
288 842
843/*****************************************************************************
844 * *
845 * Huge pages implementation is in hugetlbpage.c *
846 * *
847 *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c28..d96bcfe4c6f6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12
13#undef DEBUG_LOW
14
12#include <linux/spinlock.h> 15#include <linux/spinlock.h>
13#include <linux/bitops.h> 16#include <linux/bitops.h>
14#include <linux/threads.h> 17#include <linux/threads.h>
@@ -22,11 +25,84 @@
22#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
23#include <asm/tlb.h> 26#include <asm/tlb.h>
24#include <asm/cputable.h> 27#include <asm/cputable.h>
28#include <asm/udbg.h>
29
30#ifdef DEBUG_LOW
31#define DBG_LOW(fmt...) udbg_printf(fmt)
32#else
33#define DBG_LOW(fmt...)
34#endif
25 35
26#define HPTE_LOCK_BIT 3 36#define HPTE_LOCK_BIT 3
27 37
28static DEFINE_SPINLOCK(native_tlbie_lock); 38static DEFINE_SPINLOCK(native_tlbie_lock);
29 39
40static inline void __tlbie(unsigned long va, unsigned int psize)
41{
42 unsigned int penc;
43
44 /* clear top 16 bits, non SLS segment */
45 va &= ~(0xffffULL << 48);
46
47 switch (psize) {
48 case MMU_PAGE_4K:
49 va &= ~0xffful;
50 asm volatile("tlbie %0,0" : : "r" (va) : "memory");
51 break;
52 default:
53 penc = mmu_psize_defs[psize].penc;
54 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
55 va |= (0x7f >> (8 - penc)) << 12;
56 asm volatile("tlbie %0,1" : : "r" (va) : "memory");
57 break;
58 }
59}
60
61static inline void __tlbiel(unsigned long va, unsigned int psize)
62{
63 unsigned int penc;
64
65 /* clear top 16 bits, non SLS segment */
66 va &= ~(0xffffULL << 48);
67
68 switch (psize) {
69 case MMU_PAGE_4K:
70 va &= ~0xffful;
71 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
72 : : "r"(va) : "memory");
73 break;
74 default:
75 penc = mmu_psize_defs[psize].penc;
76 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
77 va |= (0x7f >> (8 - penc)) << 12;
78 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
79 : : "r"(va) : "memory");
80 break;
81 }
82
83}
84
85static inline void tlbie(unsigned long va, int psize, int local)
86{
87 unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
88 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
89
90 if (use_local)
91 use_local = mmu_psize_defs[psize].tlbiel;
92 if (lock_tlbie && !use_local)
93 spin_lock(&native_tlbie_lock);
94 asm volatile("ptesync": : :"memory");
95 if (use_local) {
96 __tlbiel(va, psize);
97 asm volatile("ptesync": : :"memory");
98 } else {
99 __tlbie(va, psize);
100 asm volatile("eieio; tlbsync; ptesync": : :"memory");
101 }
102 if (lock_tlbie && !use_local)
103 spin_unlock(&native_tlbie_lock);
104}
105
30static inline void native_lock_hpte(hpte_t *hptep) 106static inline void native_lock_hpte(hpte_t *hptep)
31{ 107{
32 unsigned long *word = &hptep->v; 108 unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
48} 124}
49 125
50long native_hpte_insert(unsigned long hpte_group, unsigned long va, 126long native_hpte_insert(unsigned long hpte_group, unsigned long va,
51 unsigned long prpn, unsigned long vflags, 127 unsigned long pa, unsigned long rflags,
52 unsigned long rflags) 128 unsigned long vflags, int psize)
53{ 129{
54 hpte_t *hptep = htab_address + hpte_group; 130 hpte_t *hptep = htab_address + hpte_group;
55 unsigned long hpte_v, hpte_r; 131 unsigned long hpte_v, hpte_r;
56 int i; 132 int i;
57 133
134 if (!(vflags & HPTE_V_BOLTED)) {
135 DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
136 " rflags=%lx, vflags=%lx, psize=%d)\n",
137 hpte_group, va, pa, rflags, vflags, psize);
138 }
139
58 for (i = 0; i < HPTES_PER_GROUP; i++) { 140 for (i = 0; i < HPTES_PER_GROUP; i++) {
59 if (! (hptep->v & HPTE_V_VALID)) { 141 if (! (hptep->v & HPTE_V_VALID)) {
60 /* retry with lock held */ 142 /* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
70 if (i == HPTES_PER_GROUP) 152 if (i == HPTES_PER_GROUP)
71 return -1; 153 return -1;
72 154
73 hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; 155 hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
74 if (vflags & HPTE_V_LARGE) 156 hpte_r = hpte_encode_r(pa, psize) | rflags;
75 va &= ~(1UL << HPTE_V_AVPN_SHIFT); 157
76 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; 158 if (!(vflags & HPTE_V_BOLTED)) {
159 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
160 i, hpte_v, hpte_r);
161 }
77 162
78 hptep->r = hpte_r; 163 hptep->r = hpte_r;
79 /* Guarantee the second dword is visible before the valid bit */ 164 /* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
96 int slot_offset; 181 int slot_offset;
97 unsigned long hpte_v; 182 unsigned long hpte_v;
98 183
184 DBG_LOW(" remove(group=%lx)\n", hpte_group);
185
99 /* pick a random entry to start at */ 186 /* pick a random entry to start at */
100 slot_offset = mftb() & 0x7; 187 slot_offset = mftb() & 0x7;
101 188
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
126 return i; 213 return i;
127} 214}
128 215
129static inline void set_pp_bit(unsigned long pp, hpte_t *addr) 216static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
217 unsigned long va, int psize, int local)
130{ 218{
131 unsigned long old; 219 hpte_t *hptep = htab_address + slot;
132 unsigned long *p = &addr->r; 220 unsigned long hpte_v, want_v;
133 221 int ret = 0;
134 __asm__ __volatile__( 222
135 "1: ldarx %0,0,%3\n\ 223 want_v = hpte_encode_v(va, psize);
136 rldimi %0,%2,0,61\n\ 224
137 stdcx. %0,0,%3\n\ 225 DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
138 bne 1b" 226 va, want_v & HPTE_V_AVPN, slot, newpp);
139 : "=&r" (old), "=m" (*p) 227
140 : "r" (pp), "r" (p), "m" (*p) 228 native_lock_hpte(hptep);
141 : "cc"); 229
230 hpte_v = hptep->v;
231
232 /* Even if we miss, we need to invalidate the TLB */
233 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
234 DBG_LOW(" -> miss\n");
235 native_unlock_hpte(hptep);
236 ret = -1;
237 } else {
238 DBG_LOW(" -> hit\n");
239 /* Update the HPTE */
240 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
241 (newpp & (HPTE_R_PP | HPTE_R_N));
242 native_unlock_hpte(hptep);
243 }
244
245 /* Ensure it is out of the tlb too. */
246 tlbie(va, psize, local);
247
248 return ret;
142} 249}
143 250
144/* 251static long native_hpte_find(unsigned long va, int psize)
145 * Only works on small pages. Yes its ugly to have to check each slot in
146 * the group but we only use this during bootup.
147 */
148static long native_hpte_find(unsigned long vpn)
149{ 252{
150 hpte_t *hptep; 253 hpte_t *hptep;
151 unsigned long hash; 254 unsigned long hash;
152 unsigned long i, j; 255 unsigned long i, j;
153 long slot; 256 long slot;
154 unsigned long hpte_v; 257 unsigned long want_v, hpte_v;
155 258
156 hash = hpt_hash(vpn, 0); 259 hash = hpt_hash(va, mmu_psize_defs[psize].shift);
260 want_v = hpte_encode_v(va, psize);
157 261
158 for (j = 0; j < 2; j++) { 262 for (j = 0; j < 2; j++) {
159 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 263 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
161 hptep = htab_address + slot; 265 hptep = htab_address + slot;
162 hpte_v = hptep->v; 266 hpte_v = hptep->v;
163 267
164 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) 268 if (HPTE_V_COMPARE(hpte_v, want_v)
165 && (hpte_v & HPTE_V_VALID) 269 && (hpte_v & HPTE_V_VALID)
166 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { 270 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
167 /* HPTE matches */ 271 /* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
177 return -1; 281 return -1;
178} 282}
179 283
180static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
181 unsigned long va, int large, int local)
182{
183 hpte_t *hptep = htab_address + slot;
184 unsigned long hpte_v;
185 unsigned long avpn = va >> 23;
186 int ret = 0;
187
188 if (large)
189 avpn &= ~1;
190
191 native_lock_hpte(hptep);
192
193 hpte_v = hptep->v;
194
195 /* Even if we miss, we need to invalidate the TLB */
196 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
197 || !(hpte_v & HPTE_V_VALID)) {
198 native_unlock_hpte(hptep);
199 ret = -1;
200 } else {
201 set_pp_bit(newpp, hptep);
202 native_unlock_hpte(hptep);
203 }
204
205 /* Ensure it is out of the tlb too */
206 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
207 tlbiel(va);
208 } else {
209 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
210
211 if (lock_tlbie)
212 spin_lock(&native_tlbie_lock);
213 tlbie(va, large);
214 if (lock_tlbie)
215 spin_unlock(&native_tlbie_lock);
216 }
217
218 return ret;
219}
220
221/* 284/*
222 * Update the page protection bits. Intended to be used to create 285 * Update the page protection bits. Intended to be used to create
223 * guard pages for kernel data structures on pages which are bolted 286 * guard pages for kernel data structures on pages which are bolted
224 * in the HPT. Assumes pages being operated on will not be stolen. 287 * in the HPT. Assumes pages being operated on will not be stolen.
225 * Does not work on large pages.
226 * 288 *
227 * No need to lock here because we should be the only user. 289 * No need to lock here because we should be the only user.
228 */ 290 */
229static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) 291static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
292 int psize)
230{ 293{
231 unsigned long vsid, va, vpn, flags = 0; 294 unsigned long vsid, va;
232 long slot; 295 long slot;
233 hpte_t *hptep; 296 hpte_t *hptep;
234 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
235 297
236 vsid = get_kernel_vsid(ea); 298 vsid = get_kernel_vsid(ea);
237 va = (vsid << 28) | (ea & 0x0fffffff); 299 va = (vsid << 28) | (ea & 0x0fffffff);
238 vpn = va >> PAGE_SHIFT;
239 300
240 slot = native_hpte_find(vpn); 301 slot = native_hpte_find(va, psize);
241 if (slot == -1) 302 if (slot == -1)
242 panic("could not find page to bolt\n"); 303 panic("could not find page to bolt\n");
243 hptep = htab_address + slot; 304 hptep = htab_address + slot;
244 305
245 set_pp_bit(newpp, hptep); 306 /* Update the HPTE */
307 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
308 (newpp & (HPTE_R_PP | HPTE_R_N));
246 309
247 /* Ensure it is out of the tlb too */ 310 /* Ensure it is out of the tlb too. */
248 if (lock_tlbie) 311 tlbie(va, psize, 0);
249 spin_lock_irqsave(&native_tlbie_lock, flags);
250 tlbie(va, 0);
251 if (lock_tlbie)
252 spin_unlock_irqrestore(&native_tlbie_lock, flags);
253} 312}
254 313
255static void native_hpte_invalidate(unsigned long slot, unsigned long va, 314static void native_hpte_invalidate(unsigned long slot, unsigned long va,
256 int large, int local) 315 int psize, int local)
257{ 316{
258 hpte_t *hptep = htab_address + slot; 317 hpte_t *hptep = htab_address + slot;
259 unsigned long hpte_v; 318 unsigned long hpte_v;
260 unsigned long avpn = va >> 23; 319 unsigned long want_v;
261 unsigned long flags; 320 unsigned long flags;
262 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
263
264 if (large)
265 avpn &= ~1;
266 321
267 local_irq_save(flags); 322 local_irq_save(flags);
268 native_lock_hpte(hptep);
269 323
324 DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
325
326 want_v = hpte_encode_v(va, psize);
327 native_lock_hpte(hptep);
270 hpte_v = hptep->v; 328 hpte_v = hptep->v;
271 329
272 /* Even if we miss, we need to invalidate the TLB */ 330 /* Even if we miss, we need to invalidate the TLB */
273 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) 331 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
274 || !(hpte_v & HPTE_V_VALID)) {
275 native_unlock_hpte(hptep); 332 native_unlock_hpte(hptep);
276 } else { 333 else
277 /* Invalidate the hpte. NOTE: this also unlocks it */ 334 /* Invalidate the hpte. NOTE: this also unlocks it */
278 hptep->v = 0; 335 hptep->v = 0;
279 }
280 336
281 /* Invalidate the tlb */ 337 /* Invalidate the TLB */
282 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 338 tlbie(va, psize, local);
283 tlbiel(va); 339
284 } else {
285 if (lock_tlbie)
286 spin_lock(&native_tlbie_lock);
287 tlbie(va, large);
288 if (lock_tlbie)
289 spin_unlock(&native_tlbie_lock);
290 }
291 local_irq_restore(flags); 340 local_irq_restore(flags);
292} 341}
293 342
294/* 343/*
344 * XXX This need fixing based on page size. It's only used by
345 * native_hpte_clear() for now which needs fixing too so they
346 * make a good pair...
347 */
348static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
349{
350 unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
351 unsigned long va;
352
353 va = avpn << 23;
354
355 if (! (hpte_v & HPTE_V_LARGE)) {
356 unsigned long vpi, pteg;
357
358 pteg = slot / HPTES_PER_GROUP;
359 if (hpte_v & HPTE_V_SECONDARY)
360 pteg = ~pteg;
361
362 vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
363
364 va |= vpi << PAGE_SHIFT;
365 }
366
367 return va;
368}
369
370/*
295 * clear all mappings on kexec. All cpus are in real mode (or they will 371 * clear all mappings on kexec. All cpus are in real mode (or they will
296 * be when they isi), and we are the only one left. We rely on our kernel 372 * be when they isi), and we are the only one left. We rely on our kernel
297 * mapping being 0xC0's and the hardware ignoring those two real bits. 373 * mapping being 0xC0's and the hardware ignoring those two real bits.
298 * 374 *
299 * TODO: add batching support when enabled. remember, no dynamic memory here, 375 * TODO: add batching support when enabled. remember, no dynamic memory here,
300 * athough there is the control page available... 376 * athough there is the control page available...
377 *
378 * XXX FIXME: 4k only for now !
301 */ 379 */
302static void native_hpte_clear(void) 380static void native_hpte_clear(void)
303{ 381{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
327 405
328 if (hpte_v & HPTE_V_VALID) { 406 if (hpte_v & HPTE_V_VALID) {
329 hptep->v = 0; 407 hptep->v = 0;
330 tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); 408 tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
331 } 409 }
332 } 410 }
333 411
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
335 local_irq_restore(flags); 413 local_irq_restore(flags);
336} 414}
337 415
416/*
417 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
418 * the lock all the time
419 */
338static void native_flush_hash_range(unsigned long number, int local) 420static void native_flush_hash_range(unsigned long number, int local)
339{ 421{
340 unsigned long va, vpn, hash, secondary, slot, flags, avpn; 422 unsigned long va, hash, index, hidx, shift, slot;
341 int i, j;
342 hpte_t *hptep; 423 hpte_t *hptep;
343 unsigned long hpte_v; 424 unsigned long hpte_v;
425 unsigned long want_v;
426 unsigned long flags;
427 real_pte_t pte;
344 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 428 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
345 unsigned long large = batch->large; 429 unsigned long psize = batch->psize;
430 int i;
346 431
347 local_irq_save(flags); 432 local_irq_save(flags);
348 433
349 j = 0;
350 for (i = 0; i < number; i++) { 434 for (i = 0; i < number; i++) {
351 va = batch->vaddr[j]; 435 va = batch->vaddr[i];
352 if (large) 436 pte = batch->pte[i];
353 vpn = va >> HPAGE_SHIFT; 437
354 else 438 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
355 vpn = va >> PAGE_SHIFT; 439 hash = hpt_hash(va, shift);
356 hash = hpt_hash(vpn, large); 440 hidx = __rpte_to_hidx(pte, index);
357 secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; 441 if (hidx & _PTEIDX_SECONDARY)
358 if (secondary) 442 hash = ~hash;
359 hash = ~hash; 443 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
360 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 444 slot += hidx & _PTEIDX_GROUP_IX;
361 slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; 445 hptep = htab_address + slot;
362 446 want_v = hpte_encode_v(va, psize);
363 hptep = htab_address + slot; 447 native_lock_hpte(hptep);
364 448 hpte_v = hptep->v;
365 avpn = va >> 23; 449 if (!HPTE_V_COMPARE(hpte_v, want_v) ||
366 if (large) 450 !(hpte_v & HPTE_V_VALID))
367 avpn &= ~0x1UL; 451 native_unlock_hpte(hptep);
368 452 else
369 native_lock_hpte(hptep); 453 hptep->v = 0;
370 454 } pte_iterate_hashed_end();
371 hpte_v = hptep->v;
372
373 /* Even if we miss, we need to invalidate the TLB */
374 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
375 || !(hpte_v & HPTE_V_VALID)) {
376 native_unlock_hpte(hptep);
377 } else {
378 /* Invalidate the hpte. NOTE: this also unlocks it */
379 hptep->v = 0;
380 }
381
382 j++;
383 } 455 }
384 456
385 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 457 if (cpu_has_feature(CPU_FTR_TLBIEL) &&
458 mmu_psize_defs[psize].tlbiel && local) {
386 asm volatile("ptesync":::"memory"); 459 asm volatile("ptesync":::"memory");
387 460 for (i = 0; i < number; i++) {
388 for (i = 0; i < j; i++) 461 va = batch->vaddr[i];
389 __tlbiel(batch->vaddr[i]); 462 pte = batch->pte[i];
390 463
464 pte_iterate_hashed_subpages(pte, psize, va, index,
465 shift) {
466 __tlbiel(va, psize);
467 } pte_iterate_hashed_end();
468 }
391 asm volatile("ptesync":::"memory"); 469 asm volatile("ptesync":::"memory");
392 } else { 470 } else {
393 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); 471 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
396 spin_lock(&native_tlbie_lock); 474 spin_lock(&native_tlbie_lock);
397 475
398 asm volatile("ptesync":::"memory"); 476 asm volatile("ptesync":::"memory");
399 477 for (i = 0; i < number; i++) {
400 for (i = 0; i < j; i++) 478 va = batch->vaddr[i];
401 __tlbie(batch->vaddr[i], large); 479 pte = batch->pte[i];
402 480
481 pte_iterate_hashed_subpages(pte, psize, va, index,
482 shift) {
483 __tlbie(va, psize);
484 } pte_iterate_hashed_end();
485 }
403 asm volatile("eieio; tlbsync; ptesync":::"memory"); 486 asm volatile("eieio; tlbsync; ptesync":::"memory");
404 487
405 if (lock_tlbie) 488 if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b6970c96d96f..37273f518a35 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#undef DEBUG 21#undef DEBUG
22#undef DEBUG_LOW
22 23
23#include <linux/config.h> 24#include <linux/config.h>
24#include <linux/spinlock.h> 25#include <linux/spinlock.h>
@@ -58,6 +59,15 @@
58#define DBG(fmt...) 59#define DBG(fmt...)
59#endif 60#endif
60 61
62#ifdef DEBUG_LOW
63#define DBG_LOW(fmt...) udbg_printf(fmt)
64#else
65#define DBG_LOW(fmt...)
66#endif
67
68#define KB (1024)
69#define MB (1024*KB)
70
61/* 71/*
62 * Note: pte --> Linux PTE 72 * Note: pte --> Linux PTE
63 * HPTE --> PowerPC Hashed Page Table Entry 73 * HPTE --> PowerPC Hashed Page Table Entry
@@ -76,91 +86,290 @@ extern unsigned long dart_tablebase;
76 86
77hpte_t *htab_address; 87hpte_t *htab_address;
78unsigned long htab_hash_mask; 88unsigned long htab_hash_mask;
79
80unsigned long _SDR1; 89unsigned long _SDR1;
90struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
91int mmu_linear_psize = MMU_PAGE_4K;
92int mmu_virtual_psize = MMU_PAGE_4K;
93#ifdef CONFIG_HUGETLB_PAGE
94int mmu_huge_psize = MMU_PAGE_16M;
95unsigned int HPAGE_SHIFT;
96#endif
81 97
82#define KB (1024) 98/* There are definitions of page sizes arrays to be used when none
83#define MB (1024*KB) 99 * is provided by the firmware.
84 100 */
85static inline void loop_forever(void)
86{
87 volatile unsigned long x = 1;
88 for(;x;x|=1)
89 ;
90}
91 101
92static inline void create_pte_mapping(unsigned long start, unsigned long end, 102/* Pre-POWER4 CPUs (4k pages only)
93 unsigned long mode, int large) 103 */
104struct mmu_psize_def mmu_psize_defaults_old[] = {
105 [MMU_PAGE_4K] = {
106 .shift = 12,
107 .sllp = 0,
108 .penc = 0,
109 .avpnm = 0,
110 .tlbiel = 0,
111 },
112};
113
114/* POWER4, GPUL, POWER5
115 *
116 * Support for 16Mb large pages
117 */
118struct mmu_psize_def mmu_psize_defaults_gp[] = {
119 [MMU_PAGE_4K] = {
120 .shift = 12,
121 .sllp = 0,
122 .penc = 0,
123 .avpnm = 0,
124 .tlbiel = 1,
125 },
126 [MMU_PAGE_16M] = {
127 .shift = 24,
128 .sllp = SLB_VSID_L,
129 .penc = 0,
130 .avpnm = 0x1UL,
131 .tlbiel = 0,
132 },
133};
134
135
136int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
137 unsigned long pstart, unsigned long mode, int psize)
94{ 138{
95 unsigned long addr; 139 unsigned long vaddr, paddr;
96 unsigned int step; 140 unsigned int step, shift;
97 unsigned long tmp_mode; 141 unsigned long tmp_mode;
98 unsigned long vflags; 142 int ret = 0;
99 143
100 if (large) { 144 shift = mmu_psize_defs[psize].shift;
101 step = 16*MB; 145 step = 1 << shift;
102 vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
103 } else {
104 step = 4*KB;
105 vflags = HPTE_V_BOLTED;
106 }
107 146
108 for (addr = start; addr < end; addr += step) { 147 for (vaddr = vstart, paddr = pstart; vaddr < vend;
148 vaddr += step, paddr += step) {
109 unsigned long vpn, hash, hpteg; 149 unsigned long vpn, hash, hpteg;
110 unsigned long vsid = get_kernel_vsid(addr); 150 unsigned long vsid = get_kernel_vsid(vaddr);
111 unsigned long va = (vsid << 28) | (addr & 0xfffffff); 151 unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
112 int ret = -1;
113
114 if (large)
115 vpn = va >> HPAGE_SHIFT;
116 else
117 vpn = va >> PAGE_SHIFT;
118
119 152
153 vpn = va >> shift;
120 tmp_mode = mode; 154 tmp_mode = mode;
121 155
122 /* Make non-kernel text non-executable */ 156 /* Make non-kernel text non-executable */
123 if (!in_kernel_text(addr)) 157 if (!in_kernel_text(vaddr))
124 tmp_mode = mode | HW_NO_EXEC; 158 tmp_mode = mode | HPTE_R_N;
125
126 hash = hpt_hash(vpn, large);
127 159
160 hash = hpt_hash(va, shift);
128 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 161 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
129 162
163 /* The crap below can be cleaned once ppd_md.probe() can
164 * set up the hash callbacks, thus we can just used the
165 * normal insert callback here.
166 */
130#ifdef CONFIG_PPC_ISERIES 167#ifdef CONFIG_PPC_ISERIES
131 if (systemcfg->platform & PLATFORM_ISERIES_LPAR) 168 if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
132 ret = iSeries_hpte_bolt_or_insert(hpteg, va, 169 ret = iSeries_hpte_insert(hpteg, va,
133 virt_to_abs(addr) >> PAGE_SHIFT, 170 virt_to_abs(paddr),
134 vflags, tmp_mode); 171 tmp_mode,
172 HPTE_V_BOLTED,
173 psize);
135 else 174 else
136#endif 175#endif
137#ifdef CONFIG_PPC_PSERIES 176#ifdef CONFIG_PPC_PSERIES
138 if (systemcfg->platform & PLATFORM_LPAR) 177 if (systemcfg->platform & PLATFORM_LPAR)
139 ret = pSeries_lpar_hpte_insert(hpteg, va, 178 ret = pSeries_lpar_hpte_insert(hpteg, va,
140 virt_to_abs(addr) >> PAGE_SHIFT, 179 virt_to_abs(paddr),
141 vflags, tmp_mode); 180 tmp_mode,
181 HPTE_V_BOLTED,
182 psize);
142 else 183 else
143#endif 184#endif
144#ifdef CONFIG_PPC_MULTIPLATFORM 185#ifdef CONFIG_PPC_MULTIPLATFORM
145 ret = native_hpte_insert(hpteg, va, 186 ret = native_hpte_insert(hpteg, va,
146 virt_to_abs(addr) >> PAGE_SHIFT, 187 virt_to_abs(paddr),
147 vflags, tmp_mode); 188 tmp_mode, HPTE_V_BOLTED,
189 psize);
148#endif 190#endif
191 if (ret < 0)
192 break;
193 }
194 return ret < 0 ? ret : 0;
195}
149 196
150 if (ret == -1) { 197static int __init htab_dt_scan_page_sizes(unsigned long node,
151 ppc64_terminate_msg(0x20, "create_pte_mapping"); 198 const char *uname, int depth,
152 loop_forever(); 199 void *data)
200{
201 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
202 u32 *prop;
203 unsigned long size = 0;
204
205 /* We are scanning "cpu" nodes only */
206 if (type == NULL || strcmp(type, "cpu") != 0)
207 return 0;
208
209 prop = (u32 *)of_get_flat_dt_prop(node,
210 "ibm,segment-page-sizes", &size);
211 if (prop != NULL) {
212 DBG("Page sizes from device-tree:\n");
213 size /= 4;
214 cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
215 while(size > 0) {
216 unsigned int shift = prop[0];
217 unsigned int slbenc = prop[1];
218 unsigned int lpnum = prop[2];
219 unsigned int lpenc = 0;
220 struct mmu_psize_def *def;
221 int idx = -1;
222
223 size -= 3; prop += 3;
224 while(size > 0 && lpnum) {
225 if (prop[0] == shift)
226 lpenc = prop[1];
227 prop += 2; size -= 2;
228 lpnum--;
229 }
230 switch(shift) {
231 case 0xc:
232 idx = MMU_PAGE_4K;
233 break;
234 case 0x10:
235 idx = MMU_PAGE_64K;
236 break;
237 case 0x14:
238 idx = MMU_PAGE_1M;
239 break;
240 case 0x18:
241 idx = MMU_PAGE_16M;
242 cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
243 break;
244 case 0x22:
245 idx = MMU_PAGE_16G;
246 break;
247 }
248 if (idx < 0)
249 continue;
250 def = &mmu_psize_defs[idx];
251 def->shift = shift;
252 if (shift <= 23)
253 def->avpnm = 0;
254 else
255 def->avpnm = (1 << (shift - 23)) - 1;
256 def->sllp = slbenc;
257 def->penc = lpenc;
258 /* We don't know for sure what's up with tlbiel, so
259 * for now we only set it for 4K and 64K pages
260 */
261 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
262 def->tlbiel = 1;
263 else
264 def->tlbiel = 0;
265
266 DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
267 "tlbiel=%d, penc=%d\n",
268 idx, shift, def->sllp, def->avpnm, def->tlbiel,
269 def->penc);
153 } 270 }
271 return 1;
272 }
273 return 0;
274}
275
276
277static void __init htab_init_page_sizes(void)
278{
279 int rc;
280
281 /* Default to 4K pages only */
282 memcpy(mmu_psize_defs, mmu_psize_defaults_old,
283 sizeof(mmu_psize_defaults_old));
284
285 /*
286 * Try to find the available page sizes in the device-tree
287 */
288 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
289 if (rc != 0) /* Found */
290 goto found;
291
292 /*
293 * Not in the device-tree, let's fallback on known size
294 * list for 16M capable GP & GR
295 */
296 if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
297 cpu_has_feature(CPU_FTR_16M_PAGE))
298 memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
299 sizeof(mmu_psize_defaults_gp));
300 found:
301 /*
302 * Pick a size for the linear mapping. Currently, we only support
303 * 16M, 1M and 4K which is the default
304 */
305 if (mmu_psize_defs[MMU_PAGE_16M].shift)
306 mmu_linear_psize = MMU_PAGE_16M;
307 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
308 mmu_linear_psize = MMU_PAGE_1M;
309
310 /*
311 * Pick a size for the ordinary pages. Default is 4K, we support
312 * 64K if cache inhibited large pages are supported by the
313 * processor
314 */
315#ifdef CONFIG_PPC_64K_PAGES
316 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
317 cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
318 mmu_virtual_psize = MMU_PAGE_64K;
319#endif
320
321 printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
322 mmu_psize_defs[mmu_linear_psize].shift,
323 mmu_psize_defs[mmu_virtual_psize].shift);
324
325#ifdef CONFIG_HUGETLB_PAGE
326 /* Init large page size. Currently, we pick 16M or 1M depending
327 * on what is available
328 */
329 if (mmu_psize_defs[MMU_PAGE_16M].shift)
330 mmu_huge_psize = MMU_PAGE_16M;
331 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
332 mmu_huge_psize = MMU_PAGE_1M;
333
334 /* Calculate HPAGE_SHIFT and sanity check it */
335 if (mmu_psize_defs[mmu_huge_psize].shift > 16 &&
336 mmu_psize_defs[mmu_huge_psize].shift < 28)
337 HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
338 else
339 HPAGE_SHIFT = 0; /* No huge pages dude ! */
340#endif /* CONFIG_HUGETLB_PAGE */
341}
342
343static int __init htab_dt_scan_pftsize(unsigned long node,
344 const char *uname, int depth,
345 void *data)
346{
347 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
348 u32 *prop;
349
350 /* We are scanning "cpu" nodes only */
351 if (type == NULL || strcmp(type, "cpu") != 0)
352 return 0;
353
354 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
355 if (prop != NULL) {
356 /* pft_size[0] is the NUMA CEC cookie */
357 ppc64_pft_size = prop[1];
358 return 1;
154 } 359 }
360 return 0;
155} 361}
156 362
157static unsigned long get_hashtable_size(void) 363static unsigned long __init htab_get_table_size(void)
158{ 364{
159 unsigned long rnd_mem_size, pteg_count; 365 unsigned long rnd_mem_size, pteg_count;
160 366
161 /* If hash size wasn't obtained in prom.c, we calculate it now based on 367 /* If hash size isn't already provided by the platform, we try to
162 * the total RAM size 368 * retreive it from the device-tree. If it's not there neither, we
369 * calculate it now based on the total RAM size
163 */ 370 */
371 if (ppc64_pft_size == 0)
372 of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
164 if (ppc64_pft_size) 373 if (ppc64_pft_size)
165 return 1UL << ppc64_pft_size; 374 return 1UL << ppc64_pft_size;
166 375
@@ -180,17 +389,21 @@ void __init htab_initialize(void)
180 unsigned long table, htab_size_bytes; 389 unsigned long table, htab_size_bytes;
181 unsigned long pteg_count; 390 unsigned long pteg_count;
182 unsigned long mode_rw; 391 unsigned long mode_rw;
183 int i, use_largepages = 0;
184 unsigned long base = 0, size = 0; 392 unsigned long base = 0, size = 0;
393 int i;
394
185 extern unsigned long tce_alloc_start, tce_alloc_end; 395 extern unsigned long tce_alloc_start, tce_alloc_end;
186 396
187 DBG(" -> htab_initialize()\n"); 397 DBG(" -> htab_initialize()\n");
188 398
399 /* Initialize page sizes */
400 htab_init_page_sizes();
401
189 /* 402 /*
190 * Calculate the required size of the htab. We want the number of 403 * Calculate the required size of the htab. We want the number of
191 * PTEGs to equal one half the number of real pages. 404 * PTEGs to equal one half the number of real pages.
192 */ 405 */
193 htab_size_bytes = get_hashtable_size(); 406 htab_size_bytes = htab_get_table_size();
194 pteg_count = htab_size_bytes >> 7; 407 pteg_count = htab_size_bytes >> 7;
195 408
196 htab_hash_mask = pteg_count - 1; 409 htab_hash_mask = pteg_count - 1;
@@ -204,14 +417,11 @@ void __init htab_initialize(void)
204 * the absolute address space. 417 * the absolute address space.
205 */ 418 */
206 table = lmb_alloc(htab_size_bytes, htab_size_bytes); 419 table = lmb_alloc(htab_size_bytes, htab_size_bytes);
420 BUG_ON(table == 0);
207 421
208 DBG("Hash table allocated at %lx, size: %lx\n", table, 422 DBG("Hash table allocated at %lx, size: %lx\n", table,
209 htab_size_bytes); 423 htab_size_bytes);
210 424
211 if ( !table ) {
212 ppc64_terminate_msg(0x20, "hpt space");
213 loop_forever();
214 }
215 htab_address = abs_to_virt(table); 425 htab_address = abs_to_virt(table);
216 426
217 /* htab absolute addr + encoded htabsize */ 427 /* htab absolute addr + encoded htabsize */
@@ -227,8 +437,6 @@ void __init htab_initialize(void)
227 * _NOT_ map it to avoid cache paradoxes as it's remapped non 437 * _NOT_ map it to avoid cache paradoxes as it's remapped non
228 * cacheable later on 438 * cacheable later on
229 */ 439 */
230 if (cpu_has_feature(CPU_FTR_16M_PAGE))
231 use_largepages = 1;
232 440
233 /* create bolted the linear mapping in the hash table */ 441 /* create bolted the linear mapping in the hash table */
234 for (i=0; i < lmb.memory.cnt; i++) { 442 for (i=0; i < lmb.memory.cnt; i++) {
@@ -239,27 +447,32 @@ void __init htab_initialize(void)
239 447
240#ifdef CONFIG_U3_DART 448#ifdef CONFIG_U3_DART
241 /* Do not map the DART space. Fortunately, it will be aligned 449 /* Do not map the DART space. Fortunately, it will be aligned
242 * in such a way that it will not cross two lmb regions and will 450 * in such a way that it will not cross two lmb regions and
243 * fit within a single 16Mb page. 451 * will fit within a single 16Mb page.
244 * The DART space is assumed to be a full 16Mb region even if we 452 * The DART space is assumed to be a full 16Mb region even if
245 * only use 2Mb of that space. We will use more of it later for 453 * we only use 2Mb of that space. We will use more of it later
246 * AGP GART. We have to use a full 16Mb large page. 454 * for AGP GART. We have to use a full 16Mb large page.
247 */ 455 */
248 DBG("DART base: %lx\n", dart_tablebase); 456 DBG("DART base: %lx\n", dart_tablebase);
249 457
250 if (dart_tablebase != 0 && dart_tablebase >= base 458 if (dart_tablebase != 0 && dart_tablebase >= base
251 && dart_tablebase < (base + size)) { 459 && dart_tablebase < (base + size)) {
252 if (base != dart_tablebase) 460 if (base != dart_tablebase)
253 create_pte_mapping(base, dart_tablebase, mode_rw, 461 BUG_ON(htab_bolt_mapping(base, dart_tablebase,
254 use_largepages); 462 base, mode_rw,
463 mmu_linear_psize));
255 if ((base + size) > (dart_tablebase + 16*MB)) 464 if ((base + size) > (dart_tablebase + 16*MB))
256 create_pte_mapping(dart_tablebase + 16*MB, base + size, 465 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
257 mode_rw, use_largepages); 466 base + size,
467 dart_tablebase+16*MB,
468 mode_rw,
469 mmu_linear_psize));
258 continue; 470 continue;
259 } 471 }
260#endif /* CONFIG_U3_DART */ 472#endif /* CONFIG_U3_DART */
261 create_pte_mapping(base, base + size, mode_rw, use_largepages); 473 BUG_ON(htab_bolt_mapping(base, base + size, base,
262 } 474 mode_rw, mmu_linear_psize));
475 }
263 476
264 /* 477 /*
265 * If we have a memory_limit and we've allocated TCEs then we need to 478 * If we have a memory_limit and we've allocated TCEs then we need to
@@ -275,8 +488,9 @@ void __init htab_initialize(void)
275 if (base + size >= tce_alloc_start) 488 if (base + size >= tce_alloc_start)
276 tce_alloc_start = base + size + 1; 489 tce_alloc_start = base + size + 1;
277 490
278 create_pte_mapping(tce_alloc_start, tce_alloc_end, 491 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
279 mode_rw, use_largepages); 492 tce_alloc_start, mode_rw,
493 mmu_linear_psize));
280 } 494 }
281 495
282 DBG(" <- htab_initialize()\n"); 496 DBG(" <- htab_initialize()\n");
@@ -291,9 +505,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
291{ 505{
292 struct page *page; 506 struct page *page;
293 507
294 if (!pfn_valid(pte_pfn(pte)))
295 return pp;
296
297 page = pte_page(pte); 508 page = pte_page(pte);
298 509
299 /* page is dirty */ 510 /* page is dirty */
@@ -302,7 +513,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
302 __flush_dcache_icache(page_address(page)); 513 __flush_dcache_icache(page_address(page));
303 set_bit(PG_arch_1, &page->flags); 514 set_bit(PG_arch_1, &page->flags);
304 } else 515 } else
305 pp |= HW_NO_EXEC; 516 pp |= HPTE_R_N;
306 } 517 }
307 return pp; 518 return pp;
308} 519}
@@ -318,94 +529,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
318 unsigned long vsid; 529 unsigned long vsid;
319 struct mm_struct *mm; 530 struct mm_struct *mm;
320 pte_t *ptep; 531 pte_t *ptep;
321 int ret;
322 int user_region = 0;
323 int local = 0;
324 cpumask_t tmp; 532 cpumask_t tmp;
533 int rc, user_region = 0, local = 0;
325 534
326 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 535 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
327 return 1; 536 ea, access, trap);
328 537
538 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
539 DBG_LOW(" out of pgtable range !\n");
540 return 1;
541 }
542
543 /* Get region & vsid */
329 switch (REGION_ID(ea)) { 544 switch (REGION_ID(ea)) {
330 case USER_REGION_ID: 545 case USER_REGION_ID:
331 user_region = 1; 546 user_region = 1;
332 mm = current->mm; 547 mm = current->mm;
333 if (! mm) 548 if (! mm) {
549 DBG_LOW(" user region with no mm !\n");
334 return 1; 550 return 1;
335 551 }
336 vsid = get_vsid(mm->context.id, ea); 552 vsid = get_vsid(mm->context.id, ea);
337 break; 553 break;
338 case VMALLOC_REGION_ID: 554 case VMALLOC_REGION_ID:
339 mm = &init_mm; 555 mm = &init_mm;
340 vsid = get_kernel_vsid(ea); 556 vsid = get_kernel_vsid(ea);
341 break; 557 break;
342#if 0
343 case KERNEL_REGION_ID:
344 /*
345 * Should never get here - entire 0xC0... region is bolted.
346 * Send the problem up to do_page_fault
347 */
348#endif
349 default: 558 default:
350 /* Not a valid range 559 /* Not a valid range
351 * Send the problem up to do_page_fault 560 * Send the problem up to do_page_fault
352 */ 561 */
353 return 1; 562 return 1;
354 break;
355 } 563 }
564 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
356 565
566 /* Get pgdir */
357 pgdir = mm->pgd; 567 pgdir = mm->pgd;
358
359 if (pgdir == NULL) 568 if (pgdir == NULL)
360 return 1; 569 return 1;
361 570
571 /* Check CPU locality */
362 tmp = cpumask_of_cpu(smp_processor_id()); 572 tmp = cpumask_of_cpu(smp_processor_id());
363 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 573 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
364 local = 1; 574 local = 1;
365 575
366 /* Is this a huge page ? */ 576 /* Handle hugepage regions */
367 if (unlikely(in_hugepage_area(mm->context, ea))) 577 if (unlikely(in_hugepage_area(mm->context, ea))) {
368 ret = hash_huge_page(mm, access, ea, vsid, local); 578 DBG_LOW(" -> huge page !\n");
369 else { 579 return hash_huge_page(mm, access, ea, vsid, local);
370 ptep = find_linux_pte(pgdir, ea); 580 }
371 if (ptep == NULL) 581
372 return 1; 582 /* Get PTE and page size from page tables */
373 ret = __hash_page(ea, access, vsid, ptep, trap, local); 583 ptep = find_linux_pte(pgdir, ea);
584 if (ptep == NULL || !pte_present(*ptep)) {
585 DBG_LOW(" no PTE !\n");
586 return 1;
587 }
588
589#ifndef CONFIG_PPC_64K_PAGES
590 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
591#else
592 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
593 pte_val(*(ptep + PTRS_PER_PTE)));
594#endif
595 /* Pre-check access permissions (will be re-checked atomically
596 * in __hash_page_XX but this pre-check is a fast path
597 */
598 if (access & ~pte_val(*ptep)) {
599 DBG_LOW(" no access !\n");
600 return 1;
374 } 601 }
375 602
376 return ret; 603 /* Do actual hashing */
604#ifndef CONFIG_PPC_64K_PAGES
605 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
606#else
607 if (mmu_virtual_psize == MMU_PAGE_64K)
608 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
609 else
610 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
611#endif /* CONFIG_PPC_64K_PAGES */
612
613#ifndef CONFIG_PPC_64K_PAGES
614 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
615#else
616 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
617 pte_val(*(ptep + PTRS_PER_PTE)));
618#endif
619 DBG_LOW(" -> rc=%d\n", rc);
620 return rc;
377} 621}
378 622
379void flush_hash_page(unsigned long va, pte_t pte, int local) 623void hash_preload(struct mm_struct *mm, unsigned long ea,
624 unsigned long access, unsigned long trap)
380{ 625{
381 unsigned long vpn, hash, secondary, slot; 626 unsigned long vsid;
382 unsigned long huge = pte_huge(pte); 627 void *pgdir;
628 pte_t *ptep;
629 cpumask_t mask;
630 unsigned long flags;
631 int local = 0;
632
633 /* We don't want huge pages prefaulted for now
634 */
635 if (unlikely(in_hugepage_area(mm->context, ea)))
636 return;
637
638 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
639 " trap=%lx\n", mm, mm->pgd, ea, access, trap);
383 640
384 if (huge) 641 /* Get PTE, VSID, access mask */
385 vpn = va >> HPAGE_SHIFT; 642 pgdir = mm->pgd;
643 if (pgdir == NULL)
644 return;
645 ptep = find_linux_pte(pgdir, ea);
646 if (!ptep)
647 return;
648 vsid = get_vsid(mm->context.id, ea);
649
650 /* Hash it in */
651 local_irq_save(flags);
652 mask = cpumask_of_cpu(smp_processor_id());
653 if (cpus_equal(mm->cpu_vm_mask, mask))
654 local = 1;
655#ifndef CONFIG_PPC_64K_PAGES
656 __hash_page_4K(ea, access, vsid, ptep, trap, local);
657#else
658 if (mmu_virtual_psize == MMU_PAGE_64K)
659 __hash_page_64K(ea, access, vsid, ptep, trap, local);
386 else 660 else
387 vpn = va >> PAGE_SHIFT; 661 __hash_page_4K(ea, access, vsid, ptep, trap, local);
388 hash = hpt_hash(vpn, huge); 662#endif /* CONFIG_PPC_64K_PAGES */
389 secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; 663 local_irq_restore(flags);
390 if (secondary) 664}
391 hash = ~hash; 665
392 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 666void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
393 slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; 667{
394 668 unsigned long hash, index, shift, hidx, slot;
395 ppc_md.hpte_invalidate(slot, va, huge, local); 669
670 DBG_LOW("flush_hash_page(va=%016x)\n", va);
671 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
672 hash = hpt_hash(va, shift);
673 hidx = __rpte_to_hidx(pte, index);
674 if (hidx & _PTEIDX_SECONDARY)
675 hash = ~hash;
676 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
677 slot += hidx & _PTEIDX_GROUP_IX;
678 DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
679 ppc_md.hpte_invalidate(slot, va, psize, local);
680 } pte_iterate_hashed_end();
396} 681}
397 682
398void flush_hash_range(unsigned long number, int local) 683void flush_hash_range(unsigned long number, int local)
399{ 684{
400 if (ppc_md.flush_hash_range) { 685 if (ppc_md.flush_hash_range)
401 ppc_md.flush_hash_range(number, local); 686 ppc_md.flush_hash_range(number, local);
402 } else { 687 else {
403 int i; 688 int i;
404 struct ppc64_tlb_batch *batch = 689 struct ppc64_tlb_batch *batch =
405 &__get_cpu_var(ppc64_tlb_batch); 690 &__get_cpu_var(ppc64_tlb_batch);
406 691
407 for (i = 0; i < number; i++) 692 for (i = 0; i < number; i++)
408 flush_hash_page(batch->vaddr[i], batch->pte[i], local); 693 flush_hash_page(batch->vaddr[i], batch->pte[i],
694 batch->psize, local);
409 } 695 }
410} 696}
411 697
@@ -445,6 +731,18 @@ void __init htab_finish_init(void)
445 extern unsigned int *htab_call_hpte_remove; 731 extern unsigned int *htab_call_hpte_remove;
446 extern unsigned int *htab_call_hpte_updatepp; 732 extern unsigned int *htab_call_hpte_updatepp;
447 733
734#ifdef CONFIG_PPC_64K_PAGES
735 extern unsigned int *ht64_call_hpte_insert1;
736 extern unsigned int *ht64_call_hpte_insert2;
737 extern unsigned int *ht64_call_hpte_remove;
738 extern unsigned int *ht64_call_hpte_updatepp;
739
740 make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
741 make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
742 make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
743 make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
744#endif /* CONFIG_PPC_64K_PAGES */
745
448 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); 746 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
449 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); 747 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
450 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); 748 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974..0073a04047e4 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
47 pu = pud_offset(pg, addr); 47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) { 48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr); 49 pm = pmd_offset(pu, addr);
50#ifdef CONFIG_PPC_64K_PAGES
51 /* Currently, we use the normal PTE offset within full
52 * size PTE pages, thus our huge PTEs are scattered in
53 * the PTE page and we do waste some. We may change
54 * that in the future, but the current mecanism keeps
55 * things much simpler
56 */
57 if (!pmd_none(*pm)) {
58 /* Note: pte_offset_* are all equivalent on
59 * ppc64 as we don't have HIGHMEM
60 */
61 pt = pte_offset_kernel(pm, addr);
62 return pt;
63 }
64#else /* CONFIG_PPC_64K_PAGES */
65 /* On 4k pages, we put huge PTEs in the PMD page */
50 pt = (pte_t *)pm; 66 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt; 67 return pt;
68#endif /* CONFIG_PPC_64K_PAGES */
54 } 69 }
55 } 70 }
56 71
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
74 if (pu) { 89 if (pu) {
75 pm = pmd_alloc(mm, pu, addr); 90 pm = pmd_alloc(mm, pu, addr);
76 if (pm) { 91 if (pm) {
92#ifdef CONFIG_PPC_64K_PAGES
93 /* See comment in huge_pte_offset. Note that if we ever
94 * want to put the page size in the PMD, we would have
95 * to open code our own pte_alloc* function in order
96 * to populate and set the size atomically
97 */
98 pt = pte_alloc_map(mm, pm, addr);
99#else /* CONFIG_PPC_64K_PAGES */
77 pt = (pte_t *)pm; 100 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm) 101#endif /* CONFIG_PPC_64K_PAGES */
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt; 102 return pt;
81 } 103 }
82 } 104 }
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
84 return NULL; 106 return NULL;
85} 107}
86 108
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 109void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte) 110 pte_t *ptep, pte_t pte)
91{ 111{
92 int i;
93
94 if (pte_present(*ptep)) { 112 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep); 113 /* We open-code pte_clear because we need to pass the right
114 * argument to hpte_update (huge / !huge)
115 */
116 unsigned long old = pte_update(ptep, ~0UL);
117 if (old & _PAGE_HASHPTE)
118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
96 flush_tlb_pending(); 119 flush_tlb_pending();
97 } 120 }
98 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103} 122}
104 123
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 124pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep) 125 pte_t *ptep)
107{ 126{
108 unsigned long old = pte_update(ptep, ~0UL); 127 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110 128
111 if (old & _PAGE_HASHPTE) 129 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0); 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
113 131 *ptep = __pte(0);
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116 132
117 return __pte(old); 133 return __pte(old);
118} 134}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
563 int lastshift; 579 int lastshift;
564 u16 areamask, curareas; 580 u16 areamask, curareas;
565 581
582 if (HPAGE_SHIFT == 0)
583 return -EINVAL;
566 if (len & ~HPAGE_MASK) 584 if (len & ~HPAGE_MASK)
567 return -EINVAL; 585 return -EINVAL;
568 586
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local) 637 unsigned long ea, unsigned long vsid, int local)
620{ 638{
621 pte_t *ptep; 639 pte_t *ptep;
622 unsigned long va, vpn; 640 unsigned long old_pte, new_pte;
623 pte_t old_pte, new_pte; 641 unsigned long va, rflags, pa;
624 unsigned long rflags, prpn;
625 long slot; 642 long slot;
626 int err = 1; 643 int err = 1;
627 644
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea); 645 ptep = huge_pte_offset(mm, ea);
631 646
632 /* Search the Linux page table for a match with va */ 647 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff); 648 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635 649
636 /* 650 /*
637 * If no pte found or not present, send the problem up to 651 * If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
640 if (unlikely(!ptep || pte_none(*ptep))) 654 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out; 655 goto out;
642 656
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /* 657 /*
646 * Check the user's access rights to the page. If access should be 658 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault. 659 * prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
661 */ 673 */
662 674
663 675
664 old_pte = *ptep; 676 do {
665 new_pte = old_pte; 677 old_pte = pte_val(*ptep);
666 678 if (old_pte & _PAGE_BUSY)
667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); 679 goto out;
680 new_pte = old_pte | _PAGE_BUSY |
681 _PAGE_ACCESSED | _PAGE_HASHPTE;
682 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
683 old_pte, new_pte));
684
685 rflags = 0x2 | (!(new_pte & _PAGE_RW));
668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 686 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); 687 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
670 688
671 /* Check if pte already has an hpte (case 2) */ 689 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { 690 if (unlikely(old_pte & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */ 691 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot; 692 unsigned long hash, slot;
675 693
676 hash = hpt_hash(vpn, 1); 694 hash = hpt_hash(va, HPAGE_SHIFT);
677 if (pte_val(old_pte) & _PAGE_SECONDARY) 695 if (old_pte & _PAGE_F_SECOND)
678 hash = ~hash; 696 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 697 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; 698 slot += (old_pte & _PAGE_F_GIX) >> 12;
681 699
682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) 700 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; 701 old_pte &= ~_PAGE_HPTEFLAGS;
684 } 702 }
685 703
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { 704 if (likely(!(old_pte & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1); 705 unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
688 unsigned long hpte_group; 706 unsigned long hpte_group;
689 707
690 prpn = pte_pfn(old_pte); 708 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
691 709
692repeat: 710repeat:
693 hpte_group = ((hash & htab_hash_mask) * 711 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL; 712 HPTES_PER_GROUP) & ~0x7UL;
695 713
696 /* Update the linux pte with the HPTE slot */ 714 /* clear HPTE slot informations in new PTE */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; 715 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699 716
700 /* Add in WIMG bits */ 717 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */ 718 /* XXX We should store these in the pte */
719 /* --BenH: I think they are ... */
702 rflags |= _PAGE_COHERENT; 720 rflags |= _PAGE_COHERENT;
703 721
704 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 722 /* Insert into the hash table, primary slot */
705 HPTE_V_LARGE, rflags); 723 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
724 mmu_huge_psize);
706 725
707 /* Primary is full, try the secondary */ 726 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) { 727 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY; 728 new_pte |= _PAGE_F_SECOND;
710 hpte_group = ((~hash & htab_hash_mask) * 729 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL; 730 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 731 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY, 732 HPTE_V_SECONDARY,
715 rflags); 733 mmu_huge_psize);
716 if (slot == -1) { 734 if (slot == -1) {
717 if (mftb() & 0x1) 735 if (mftb() & 0x1)
718 hpte_group = ((hash & htab_hash_mask) * 736 hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
726 if (unlikely(slot == -2)) 744 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n"); 745 panic("hash_huge_page: pte_insert failed\n");
728 746
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; 747 new_pte |= (slot << 12) & _PAGE_F_GIX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 } 748 }
738 749
750 /*
751 * No need to use ldarx/stdcx here because all who
752 * might be updating the pte will hold the
753 * page_table_lock
754 */
755 *ptep = __pte(new_pte & ~_PAGE_BUSY);
756
739 err = 0; 757 err = 0;
740 758
741 out: 759 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err; 760 return err;
745} 761}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index aa98b79e734c..ce974c83d88a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -187,12 +187,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
187 memset(addr, 0, kmem_cache_size(cache)); 187 memset(addr, 0, kmem_cache_size(cache));
188} 188}
189 189
190#ifdef CONFIG_PPC_64K_PAGES
191static const int pgtable_cache_size[2] = {
192 PTE_TABLE_SIZE, PGD_TABLE_SIZE
193};
194static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
195 "pte_pmd_cache", "pgd_cache",
196};
197#else
190static const int pgtable_cache_size[2] = { 198static const int pgtable_cache_size[2] = {
191 PTE_TABLE_SIZE, PMD_TABLE_SIZE 199 PTE_TABLE_SIZE, PMD_TABLE_SIZE
192}; 200};
193static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 201static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
194 "pgd_pte_cache", "pud_pmd_cache", 202 "pgd_pte_cache", "pud_pmd_cache",
195}; 203};
204#endif /* CONFIG_PPC_64K_PAGES */
196 205
197kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 206kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
198 207
@@ -200,19 +209,14 @@ void pgtable_cache_init(void)
200{ 209{
201 int i; 210 int i;
202 211
203 BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
204 BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
205 BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
206 BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
207
208 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { 212 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
209 int size = pgtable_cache_size[i]; 213 int size = pgtable_cache_size[i];
210 const char *name = pgtable_cache_name[i]; 214 const char *name = pgtable_cache_name[i];
211 215
212 pgtable_cache[i] = kmem_cache_create(name, 216 pgtable_cache[i] = kmem_cache_create(name,
213 size, size, 217 size, size,
214 SLAB_HWCACHE_ALIGN 218 SLAB_HWCACHE_ALIGN |
215 | SLAB_MUST_HWCACHE_ALIGN, 219 SLAB_MUST_HWCACHE_ALIGN,
216 zero_ctor, 220 zero_ctor,
217 NULL); 221 NULL);
218 if (! pgtable_cache[i]) 222 if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e14..7faa46b71f21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
61int mem_init_done; 61int mem_init_done;
62unsigned long memory_limit; 62unsigned long memory_limit;
63 63
64extern void hash_preload(struct mm_struct *mm, unsigned long ea,
65 unsigned long access, unsigned long trap);
66
64/* 67/*
65 * This is called by /dev/mem to know if a given address has to 68 * This is called by /dev/mem to know if a given address has to
66 * be mapped non-cacheable or not 69 * be mapped non-cacheable or not
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
493void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 496void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
494 pte_t pte) 497 pte_t pte)
495{ 498{
496 /* handle i-cache coherency */ 499#ifdef CONFIG_PPC_STD_MMU
497 unsigned long pfn = pte_pfn(pte); 500 unsigned long access = 0, trap;
498#ifdef CONFIG_PPC32
499 pmd_t *pmd;
500#else
501 unsigned long vsid;
502 void *pgdir;
503 pte_t *ptep;
504 int local = 0;
505 cpumask_t tmp;
506 unsigned long flags;
507#endif 501#endif
502 unsigned long pfn = pte_pfn(pte);
508 503
509 /* handle i-cache coherency */ 504 /* handle i-cache coherency */
510 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && 505 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
535 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 530 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
536 if (!pte_young(pte) || address >= TASK_SIZE) 531 if (!pte_young(pte) || address >= TASK_SIZE)
537 return; 532 return;
538#ifdef CONFIG_PPC32
539 if (Hash == 0)
540 return;
541 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
542 if (!pmd_none(*pmd))
543 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
544#else
545 pgdir = vma->vm_mm->pgd;
546 if (pgdir == NULL)
547 return;
548 533
549 ptep = find_linux_pte(pgdir, address); 534 /* We try to figure out if we are coming from an instruction
550 if (!ptep) 535 * access fault and pass that down to __hash_page so we avoid
536 * double-faulting on execution of fresh text. We have to test
537 * for regs NULL since init will get here first thing at boot
538 *
539 * We also avoid filling the hash if not coming from a fault
540 */
541 if (current->thread.regs == NULL)
551 return; 542 return;
552 543 trap = TRAP(current->thread.regs);
553 vsid = get_vsid(vma->vm_mm->context.id, address); 544 if (trap == 0x400)
554 545 access |= _PAGE_EXEC;
555 local_irq_save(flags); 546 else if (trap != 0x300)
556 tmp = cpumask_of_cpu(smp_processor_id()); 547 return;
557 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) 548 hash_preload(vma->vm_mm, address, access, trap);
558 local = 1; 549#endif /* CONFIG_PPC_STD_MMU */
559
560 __hash_page(address, 0, vsid, ptep, 0x300, local);
561 local_irq_restore(flags);
562#endif
563#endif
564} 550}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d1fbd3fe684f..900842451bd3 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -100,7 +100,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
100 pud_t *pudp; 100 pud_t *pudp;
101 pmd_t *pmdp; 101 pmd_t *pmdp;
102 pte_t *ptep; 102 pte_t *ptep;
103 unsigned long vsid;
104 103
105 if (mem_init_done) { 104 if (mem_init_done) {
106 pgdp = pgd_offset_k(ea); 105 pgdp = pgd_offset_k(ea);
@@ -116,28 +115,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
116 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 115 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
117 __pgprot(flags))); 116 __pgprot(flags)));
118 } else { 117 } else {
119 unsigned long va, vpn, hash, hpteg;
120
121 /* 118 /*
122 * If the mm subsystem is not fully up, we cannot create a 119 * If the mm subsystem is not fully up, we cannot create a
123 * linux page table entry for this mapping. Simply bolt an 120 * linux page table entry for this mapping. Simply bolt an
124 * entry in the hardware page table. 121 * entry in the hardware page table.
122 *
125 */ 123 */
126 vsid = get_kernel_vsid(ea); 124 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
127 va = (vsid << 28) | (ea & 0xFFFFFFF); 125 mmu_virtual_psize))
128 vpn = va >> PAGE_SHIFT; 126 panic("Can't map bolted IO mapping");
129
130 hash = hpt_hash(vpn, 0);
131
132 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
133
134 /* Panic if a pte grpup is full */
135 if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
136 HPTE_V_BOLTED,
137 _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
138 == -1) {
139 panic("map_io_page: could not insert mapping");
140 }
141 } 127 }
142 return 0; 128 return 0;
143} 129}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e9..d137abd241ff 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
179} 179}
180 180
181/* 181/*
182 * Preload a translation in the hash table
183 */
184void hash_preload(struct mm_struct *mm, unsigned long ea,
185 unsigned long access, unsigned long trap)
186{
187 pmd_t *pmd;
188
189 if (Hash == 0)
190 return;
191 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
192 if (!pmd_none(*pmd))
193 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
194}
195
196/*
182 * Initialize the hash table and patch the instructions in hashtable.S. 197 * Initialize the hash table and patch the instructions in hashtable.S.
183 */ 198 */
184void __init MMU_init_hw(void) 199void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a37..60e852f2f8e5 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 */ 15 */
16 16
17#undef DEBUG
18
17#include <linux/config.h> 19#include <linux/config.h>
18#include <asm/pgtable.h> 20#include <asm/pgtable.h>
19#include <asm/mmu.h> 21#include <asm/mmu.h>
20#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
21#include <asm/paca.h> 23#include <asm/paca.h>
22#include <asm/cputable.h> 24#include <asm/cputable.h>
25#include <asm/cacheflush.h>
26
27#ifdef DEBUG
28#define DBG(fmt...) udbg_printf(fmt)
29#else
30#define DBG(fmt...)
31#endif
23 32
24extern void slb_allocate(unsigned long ea); 33extern void slb_allocate_realmode(unsigned long ea);
34extern void slb_allocate_user(unsigned long ea);
35
36static void slb_allocate(unsigned long ea)
37{
38 /* Currently, we do real mode for all SLBs including user, but
39 * that will change if we bring back dynamic VSIDs
40 */
41 slb_allocate_realmode(ea);
42}
25 43
26static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) 44static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
27{ 45{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
46{ 64{
47 /* If you change this make sure you change SLB_NUM_BOLTED 65 /* If you change this make sure you change SLB_NUM_BOLTED
48 * appropriately too. */ 66 * appropriately too. */
49 unsigned long ksp_flags = SLB_VSID_KERNEL; 67 unsigned long linear_llp, virtual_llp, lflags, vflags;
50 unsigned long ksp_esid_data; 68 unsigned long ksp_esid_data;
51 69
52 WARN_ON(!irqs_disabled()); 70 WARN_ON(!irqs_disabled());
53 71
54 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 72 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
55 ksp_flags |= SLB_VSID_L; 73 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
74 lflags = SLB_VSID_KERNEL | linear_llp;
75 vflags = SLB_VSID_KERNEL | virtual_llp;
56 76
57 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); 77 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
58 if ((ksp_esid_data & ESID_MASK) == KERNELBASE) 78 if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
67 /* Slot 2 - kernel stack */ 87 /* Slot 2 - kernel stack */
68 "slbmte %2,%3\n" 88 "slbmte %2,%3\n"
69 "isync" 89 "isync"
70 :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), 90 :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
71 "r"(mk_esid_data(VMALLOCBASE, 1)), 91 "r"(mk_esid_data(VMALLOCBASE, 1)),
72 "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), 92 "r"(mk_vsid_data(ksp_esid_data, lflags)),
73 "r"(ksp_esid_data) 93 "r"(ksp_esid_data)
74 : "memory"); 94 : "memory");
75} 95}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
102 122
103 get_paca()->slb_cache_ptr = 0; 123 get_paca()->slb_cache_ptr = 0;
104 get_paca()->context = mm->context; 124 get_paca()->context = mm->context;
125#ifdef CONFIG_PPC_64K_PAGES
126 get_paca()->pgdir = mm->pgd;
127#endif /* CONFIG_PPC_64K_PAGES */
105 128
106 /* 129 /*
107 * preload some userspace segments into the SLB. 130 * preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
131 slb_allocate(unmapped_base); 154 slb_allocate(unmapped_base);
132} 155}
133 156
157static inline void patch_slb_encoding(unsigned int *insn_addr,
158 unsigned int immed)
159{
160 /* Assume the instruction had a "0" immediate value, just
161 * "or" in the new value
162 */
163 *insn_addr |= immed;
164 flush_icache_range((unsigned long)insn_addr, 4+
165 (unsigned long)insn_addr);
166}
167
134void slb_initialize(void) 168void slb_initialize(void)
135{ 169{
170 unsigned long linear_llp, virtual_llp;
171 static int slb_encoding_inited;
172 extern unsigned int *slb_miss_kernel_load_linear;
173 extern unsigned int *slb_miss_kernel_load_virtual;
174 extern unsigned int *slb_miss_user_load_normal;
175#ifdef CONFIG_HUGETLB_PAGE
176 extern unsigned int *slb_miss_user_load_huge;
177 unsigned long huge_llp;
178
179 huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
180#endif
181
182 /* Prepare our SLB miss handler based on our page size */
183 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
184 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
185 if (!slb_encoding_inited) {
186 slb_encoding_inited = 1;
187 patch_slb_encoding(slb_miss_kernel_load_linear,
188 SLB_VSID_KERNEL | linear_llp);
189 patch_slb_encoding(slb_miss_kernel_load_virtual,
190 SLB_VSID_KERNEL | virtual_llp);
191 patch_slb_encoding(slb_miss_user_load_normal,
192 SLB_VSID_USER | virtual_llp);
193
194 DBG("SLB: linear LLP = %04x\n", linear_llp);
195 DBG("SLB: virtual LLP = %04x\n", virtual_llp);
196#ifdef CONFIG_HUGETLB_PAGE
197 patch_slb_encoding(slb_miss_user_load_huge,
198 SLB_VSID_USER | huge_llp);
199 DBG("SLB: huge LLP = %04x\n", huge_llp);
200#endif
201 }
202
136 /* On iSeries the bolted entries have already been set up by 203 /* On iSeries the bolted entries have already been set up by
137 * the hypervisor from the lparMap data in head.S */ 204 * the hypervisor from the lparMap data in head.S */
138#ifndef CONFIG_PPC_ISERIES 205#ifndef CONFIG_PPC_ISERIES
139 unsigned long flags = SLB_VSID_KERNEL; 206 {
207 unsigned long lflags, vflags;
140 208
141 /* Invalidate the entire SLB (even slot 0) & all the ERATS */ 209 lflags = SLB_VSID_KERNEL | linear_llp;
142 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 210 vflags = SLB_VSID_KERNEL | virtual_llp;
143 flags |= SLB_VSID_L;
144 211
145 asm volatile("isync":::"memory"); 212 /* Invalidate the entire SLB (even slot 0) & all the ERATS */
146 asm volatile("slbmte %0,%0"::"r" (0) : "memory"); 213 asm volatile("isync":::"memory");
214 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
147 asm volatile("isync; slbia; isync":::"memory"); 215 asm volatile("isync; slbia; isync":::"memory");
148 create_slbe(KERNELBASE, flags, 0); 216 create_slbe(KERNELBASE, lflags, 0);
149 create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); 217
218 /* VMALLOC space has 4K pages always for now */
219 create_slbe(VMALLOCBASE, vflags, 1);
220
150 /* We don't bolt the stack for the time being - we're in boot, 221 /* We don't bolt the stack for the time being - we're in boot,
151 * so the stack is in the bolted segment. By the time it goes 222 * so the stack is in the bolted segment. By the time it goes
152 * elsewhere, we'll call _switch() which will bolt in the new 223 * elsewhere, we'll call _switch() which will bolt in the new
153 * one. */ 224 * one. */
154 asm volatile("isync":::"memory"); 225 asm volatile("isync":::"memory");
155#endif 226 }
227#endif /* CONFIG_PPC_ISERIES */
156 228
157 get_paca()->stab_rr = SLB_NUM_BOLTED; 229 get_paca()->stab_rr = SLB_NUM_BOLTED;
158} 230}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503bc..3e18241b6f35 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
18 18
19#include <linux/config.h> 19#include <linux/config.h>
20#include <asm/processor.h> 20#include <asm/processor.h>
21#include <asm/page.h>
22#include <asm/mmu.h>
23#include <asm/ppc_asm.h> 21#include <asm/ppc_asm.h>
24#include <asm/asm-offsets.h> 22#include <asm/asm-offsets.h>
25#include <asm/cputable.h> 23#include <asm/cputable.h>
24#include <asm/page.h>
25#include <asm/mmu.h>
26#include <asm/pgtable.h>
26 27
27/* void slb_allocate(unsigned long ea); 28/* void slb_allocate_realmode(unsigned long ea);
28 * 29 *
29 * Create an SLB entry for the given EA (user or kernel). 30 * Create an SLB entry for the given EA (user or kernel).
30 * r3 = faulting address, r13 = PACA 31 * r3 = faulting address, r13 = PACA
31 * r9, r10, r11 are clobbered by this function 32 * r9, r10, r11 are clobbered by this function
32 * No other registers are examined or changed. 33 * No other registers are examined or changed.
33 */ 34 */
34_GLOBAL(slb_allocate) 35_GLOBAL(slb_allocate_realmode)
35 /* 36 /* r3 = faulting address */
36 * First find a slot, round robin. Previously we tried to find
37 * a free slot first but that took too long. Unfortunately we
38 * dont have any LRU information to help us choose a slot.
39 */
40#ifdef CONFIG_PPC_ISERIES
41 /*
42 * On iSeries, the "bolted" stack segment can be cast out on
43 * shared processor switch so we need to check for a miss on
44 * it and restore it to the right slot.
45 */
46 ld r9,PACAKSAVE(r13)
47 clrrdi r9,r9,28
48 clrrdi r11,r3,28
49 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
50 cmpld r9,r11
51 beq 3f
52#endif /* CONFIG_PPC_ISERIES */
53
54 ld r10,PACASTABRR(r13)
55 addi r10,r10,1
56 /* use a cpu feature mask if we ever change our slb size */
57 cmpldi r10,SLB_NUM_ENTRIES
58
59 blt+ 4f
60 li r10,SLB_NUM_BOLTED
61
624:
63 std r10,PACASTABRR(r13)
643:
65 /* r3 = faulting address, r10 = entry */
66 37
67 srdi r9,r3,60 /* get region */ 38 srdi r9,r3,60 /* get region */
68 srdi r3,r3,28 /* get esid */ 39 srdi r10,r3,28 /* get esid */
69 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ 40 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
70 41
71 rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ 42 /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
72 oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
73
74 /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
75
76 blt cr7,0f /* user or kernel? */ 43 blt cr7,0f /* user or kernel? */
77 44
78 /* kernel address: proto-VSID = ESID */ 45 /* kernel address: proto-VSID = ESID */
@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
81 * top segment. That's ok, the scramble below will translate 48 * top segment. That's ok, the scramble below will translate
82 * it to VSID 0, which is reserved as a bad VSID - one which 49 * it to VSID 0, which is reserved as a bad VSID - one which
83 * will never have any pages in it. */ 50 * will never have any pages in it. */
84 li r11,SLB_VSID_KERNEL
85BEGIN_FTR_SECTION
86 bne cr7,9f
87 li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
88END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
89 b 9f
90 51
910: /* user address: proto-VSID = context<<15 | ESID */ 52 /* Check if hitting the linear mapping of the vmalloc/ioremap
92 srdi. r9,r3,USER_ESID_BITS 53 * kernel space
54 */
55 bne cr7,1f
56
57 /* Linear mapping encoding bits, the "li" instruction below will
58 * be patched by the kernel at boot
59 */
60_GLOBAL(slb_miss_kernel_load_linear)
61 li r11,0
62 b slb_finish_load
63
641: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
65 * will be patched by the kernel at boot
66 */
67_GLOBAL(slb_miss_kernel_load_virtual)
68 li r11,0
69 b slb_finish_load
70
71
720: /* user address: proto-VSID = context << 15 | ESID. First check
73 * if the address is within the boundaries of the user region
74 */
75 srdi. r9,r10,USER_ESID_BITS
93 bne- 8f /* invalid ea bits set */ 76 bne- 8f /* invalid ea bits set */
94 77
78 /* Figure out if the segment contains huge pages */
95#ifdef CONFIG_HUGETLB_PAGE 79#ifdef CONFIG_HUGETLB_PAGE
96BEGIN_FTR_SECTION 80BEGIN_FTR_SECTION
81 b 1f
82END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
97 lhz r9,PACAHIGHHTLBAREAS(r13) 83 lhz r9,PACAHIGHHTLBAREAS(r13)
98 srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) 84 srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
99 srd r9,r9,r11 85 srd r9,r9,r11
100 lhz r11,PACALOWHTLBAREAS(r13) 86 lhz r11,PACALOWHTLBAREAS(r13)
101 srd r11,r11,r3 87 srd r11,r11,r10
102 or r9,r9,r11 88 or. r9,r9,r11
103END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 89 beq 1f
90_GLOBAL(slb_miss_user_load_huge)
91 li r11,0
92 b 2f
931:
104#endif /* CONFIG_HUGETLB_PAGE */ 94#endif /* CONFIG_HUGETLB_PAGE */
105 95
106 li r11,SLB_VSID_USER 96_GLOBAL(slb_miss_user_load_normal)
97 li r11,0
107 98
108#ifdef CONFIG_HUGETLB_PAGE 992:
109BEGIN_FTR_SECTION 100 ld r9,PACACONTEXTID(r13)
110 rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */ 101 rldimi r10,r9,USER_ESID_BITS,0
111END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 102 b slb_finish_load
112#endif /* CONFIG_HUGETLB_PAGE */ 103
1048: /* invalid EA */
105 li r10,0 /* BAD_VSID */
106 li r11,SLB_VSID_USER /* flags don't much matter */
107 b slb_finish_load
108
109#ifdef __DISABLED__
110
111/* void slb_allocate_user(unsigned long ea);
112 *
113 * Create an SLB entry for the given EA (user or kernel).
114 * r3 = faulting address, r13 = PACA
115 * r9, r10, r11 are clobbered by this function
116 * No other registers are examined or changed.
117 *
118 * It is called with translation enabled in order to be able to walk the
119 * page tables. This is not currently used.
120 */
121_GLOBAL(slb_allocate_user)
122 /* r3 = faulting address */
123 srdi r10,r3,28 /* get esid */
124
125 crset 4*cr7+lt /* set "user" flag for later */
126
127 /* check if we fit in the range covered by the pagetables*/
128 srdi. r9,r3,PGTABLE_EADDR_SIZE
129 crnot 4*cr0+eq,4*cr0+eq
130 beqlr
113 131
132 /* now we need to get to the page tables in order to get the page
133 * size encoding from the PMD. In the future, we'll be able to deal
134 * with 1T segments too by getting the encoding from the PGD instead
135 */
136 ld r9,PACAPGDIR(r13)
137 cmpldi cr0,r9,0
138 beqlr
139 rlwinm r11,r10,8,25,28
140 ldx r9,r9,r11 /* get pgd_t */
141 cmpldi cr0,r9,0
142 beqlr
143 rlwinm r11,r10,3,17,28
144 ldx r9,r9,r11 /* get pmd_t */
145 cmpldi cr0,r9,0
146 beqlr
147
148 /* build vsid flags */
149 andi. r11,r9,SLB_VSID_LLP
150 ori r11,r11,SLB_VSID_USER
151
152 /* get context to calculate proto-VSID */
114 ld r9,PACACONTEXTID(r13) 153 ld r9,PACACONTEXTID(r13)
115 rldimi r3,r9,USER_ESID_BITS,0 154 rldimi r10,r9,USER_ESID_BITS,0
155
156 /* fall through slb_finish_load */
157
158#endif /* __DISABLED__ */
116 159
1179: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
118 ASM_VSID_SCRAMBLE(r3,r9)
119 160
120 rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ 161/*
162 * Finish loading of an SLB entry and return
163 *
164 * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
165 */
166slb_finish_load:
167 ASM_VSID_SCRAMBLE(r10,r9)
168 rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
169
170 /* r3 = EA, r11 = VSID data */
171 /*
172 * Find a slot, round robin. Previously we tried to find a
173 * free slot first but that took too long. Unfortunately we
174 * dont have any LRU information to help us choose a slot.
175 */
176#ifdef CONFIG_PPC_ISERIES
177 /*
178 * On iSeries, the "bolted" stack segment can be cast out on
179 * shared processor switch so we need to check for a miss on
180 * it and restore it to the right slot.
181 */
182 ld r9,PACAKSAVE(r13)
183 clrrdi r9,r9,28
184 clrrdi r3,r3,28
185 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
186 cmpld r9,r3
187 beq 3f
188#endif /* CONFIG_PPC_ISERIES */
189
190 ld r10,PACASTABRR(r13)
191 addi r10,r10,1
192 /* use a cpu feature mask if we ever change our slb size */
193 cmpldi r10,SLB_NUM_ENTRIES
194
195 blt+ 4f
196 li r10,SLB_NUM_BOLTED
197
1984:
199 std r10,PACASTABRR(r13)
200
2013:
202 rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
203 oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
204
205 /* r3 = ESID data, r11 = VSID data */
121 206
122 /* 207 /*
123 * No need for an isync before or after this slbmte. The exception 208 * No need for an isync before or after this slbmte. The exception
@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
125 */ 210 */
126 slbmte r11,r10 211 slbmte r11,r10
127 212
128 bgelr cr7 /* we're done for kernel addresses */ 213 /* we're done for kernel addresses */
214 crclr 4*cr0+eq /* set result to "success" */
215 bgelr cr7
129 216
130 /* Update the slb cache */ 217 /* Update the slb cache */
131 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 218 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
143 li r3,SLB_CACHE_ENTRIES+1 230 li r3,SLB_CACHE_ENTRIES+1
1442: 2312:
145 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 232 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
233 crclr 4*cr0+eq /* set result to "success" */
146 blr 234 blr
147 235
1488: /* invalid EA */
149 li r3,0 /* BAD_VSID */
150 li r11,SLB_VSID_USER /* flags don't much matter */
151 b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf27..fa325dbf98fc 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -26,7 +26,6 @@ struct stab_entry {
26 unsigned long vsid_data; 26 unsigned long vsid_data;
27}; 27};
28 28
29/* Both the segment table and SLB code uses the following cache */
30#define NR_STAB_CACHE_ENTRIES 8 29#define NR_STAB_CACHE_ENTRIES 8
31DEFINE_PER_CPU(long, stab_cache_ptr); 30DEFINE_PER_CPU(long, stab_cache_ptr);
32DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); 31DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
186 /* Never flush the first entry. */ 185 /* Never flush the first entry. */
187 ste += 1; 186 ste += 1;
188 for (entry = 1; 187 for (entry = 1;
189 entry < (PAGE_SIZE / sizeof(struct stab_entry)); 188 entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
190 entry++, ste++) { 189 entry++, ste++) {
191 unsigned long ea; 190 unsigned long ea;
192 ea = ste->esid_data & ESID_MASK; 191 ea = ste->esid_data & ESID_MASK;
@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
200 199
201 __get_cpu_var(stab_cache_ptr) = 0; 200 __get_cpu_var(stab_cache_ptr) = 0;
202 201
202#ifdef CONFIG_PPC_64K_PAGES
203 get_paca()->pgdir = mm->pgd;
204#endif /* CONFIG_PPC_64K_PAGES */
205
203 /* Now preload some entries for the new task */ 206 /* Now preload some entries for the new task */
204 if (test_tsk_thread_flag(tsk, TIF_32BIT)) 207 if (test_tsk_thread_flag(tsk, TIF_32BIT))
205 unmapped_base = TASK_UNMAPPED_BASE_USER32; 208 unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
223 asm volatile("sync" : : : "memory"); 226 asm volatile("sync" : : : "memory");
224} 227}
225 228
226extern void slb_initialize(void);
227
228/* 229/*
229 * Allocate segment tables for secondary CPUs. These must all go in 230 * Allocate segment tables for secondary CPUs. These must all go in
230 * the first (bolted) segment, so that do_stab_bolted won't get a 231 * the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +244,21 @@ void stabs_alloc(void)
243 if (cpu == 0) 244 if (cpu == 0)
244 continue; /* stab for CPU 0 is statically allocated */ 245 continue; /* stab for CPU 0 is statically allocated */
245 246
246 newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT); 247 newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
248 1<<SID_SHIFT);
247 if (! newstab) 249 if (! newstab)
248 panic("Unable to allocate segment table for CPU %d.\n", 250 panic("Unable to allocate segment table for CPU %d.\n",
249 cpu); 251 cpu);
250 252
251 newstab += KERNELBASE; 253 newstab += KERNELBASE;
252 254
253 memset((void *)newstab, 0, PAGE_SIZE); 255 memset((void *)newstab, 0, HW_PAGE_SIZE);
254 256
255 paca[cpu].stab_addr = newstab; 257 paca[cpu].stab_addr = newstab;
256 paca[cpu].stab_real = virt_to_abs(newstab); 258 paca[cpu].stab_real = virt_to_abs(newstab);
257 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); 259 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
260 "virtual, 0x%lx absolute\n",
261 cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
258 } 262 }
259} 263}
260 264
@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
267{ 271{
268 unsigned long vsid = get_kernel_vsid(KERNELBASE); 272 unsigned long vsid = get_kernel_vsid(KERNELBASE);
269 273
270 if (cpu_has_feature(CPU_FTR_SLB)) { 274 asm volatile("isync; slbia; isync":::"memory");
271 slb_initialize(); 275 make_ste(stab, GET_ESID(KERNELBASE), vsid);
272 } else {
273 asm volatile("isync; slbia; isync":::"memory");
274 make_ste(stab, GET_ESID(KERNELBASE), vsid);
275 276
276 /* Order update */ 277 /* Order update */
277 asm volatile("sync":::"memory"); 278 asm volatile("sync":::"memory");
278 }
279} 279}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4f..53e31b834ace 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
21 * as published by the Free Software Foundation; either version 21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version. 22 * 2 of the License, or (at your option) any later version.
23 */ 23 */
24
24#include <linux/config.h> 25#include <linux/config.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
26#include <linux/mm.h> 27#include <linux/mm.h>
@@ -30,7 +31,7 @@
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
33#include <linux/highmem.h> 34#include <asm/bug.h>
34 35
35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 36DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
36 37
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
126 * (if we remove it we should clear the _PTE_HPTEFLAGS bits). 127 * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
127 */ 128 */
128void hpte_update(struct mm_struct *mm, unsigned long addr, 129void hpte_update(struct mm_struct *mm, unsigned long addr,
129 unsigned long pte, int wrprot) 130 pte_t *ptep, unsigned long pte, int huge)
130{ 131{
131 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 132 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
132 unsigned long vsid; 133 unsigned long vsid;
134 unsigned int psize = mmu_virtual_psize;
133 int i; 135 int i;
134 136
135 i = batch->index; 137 i = batch->index;
136 138
139 /* We mask the address for the base page size. Huge pages will
140 * have applied their own masking already
141 */
142 addr &= PAGE_MASK;
143
144 /* Get page size (maybe move back to caller) */
145 if (huge) {
146#ifdef CONFIG_HUGETLB_PAGE
147 psize = mmu_huge_psize;
148#else
149 BUG();
150#endif
151 }
152
137 /* 153 /*
138 * This can happen when we are in the middle of a TLB batch and 154 * This can happen when we are in the middle of a TLB batch and
139 * we encounter memory pressure (eg copy_page_range when it tries 155 * we encounter memory pressure (eg copy_page_range when it tries
140 * to allocate a new pte). If we have to reclaim memory and end 156 * to allocate a new pte). If we have to reclaim memory and end
141 * up scanning and resetting referenced bits then our batch context 157 * up scanning and resetting referenced bits then our batch context
142 * will change mid stream. 158 * will change mid stream.
159 *
160 * We also need to ensure only one page size is present in a given
161 * batch
143 */ 162 */
144 if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) { 163 if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
145 flush_tlb_pending(); 164 flush_tlb_pending();
146 i = 0; 165 i = 0;
147 } 166 }
148 if (i == 0) { 167 if (i == 0) {
149 batch->mm = mm; 168 batch->mm = mm;
150 batch->large = pte_huge(pte); 169 batch->psize = psize;
151 } 170 }
152 if (addr < KERNELBASE) { 171 if (addr < KERNELBASE) {
153 vsid = get_vsid(mm->context.id, addr); 172 vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
155 } else 174 } else
156 vsid = get_kernel_vsid(addr); 175 vsid = get_kernel_vsid(addr);
157 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); 176 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
158 batch->pte[i] = __pte(pte); 177 batch->pte[i] = __real_pte(__pte(pte), ptep);
159 batch->index = ++i; 178 batch->index = ++i;
160 if (i >= PPC64_TLB_BATCH_NR) 179 if (i >= PPC64_TLB_BATCH_NR)
161 flush_tlb_pending(); 180 flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
177 local = 1; 196 local = 1;
178 197
179 if (i == 1) 198 if (i == 1)
180 flush_hash_page(batch->vaddr[0], batch->pte[0], local); 199 flush_hash_page(batch->vaddr[0], batch->pte[0],
200 batch->psize, local);
181 else 201 else
182 flush_hash_range(i, local); 202 flush_hash_range(i, local);
183 batch->index = 0; 203 batch->index = 0;