aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2005-11-06 19:06:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-06 19:56:47 -0500
commit3c726f8dee6f55e96475574e9f645327e461884c (patch)
treef67c381e8f57959aa4a94bda4c68e24253cd8171
parentf912696ab330bf539231d1f8032320f2a08b850f (diff)
[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/head_64.S300
-rw-r--r--arch/powerpc/kernel/lparmap.c2
-rw-r--r--arch/powerpc/kernel/process.c6
-rw-r--r--arch/powerpc/kernel/prom.c76
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_64.S4
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c532
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
-rw-r--r--arch/powerpc/mm/init_64.c18
-rw-r--r--arch/powerpc/mm/mem.c56
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S220
-rw-r--r--arch/powerpc/mm/stab.c30
-rw-r--r--arch/powerpc/mm/tlb_64.c32
-rw-r--r--arch/powerpc/platforms/iseries/htab.c65
-rw-r--r--arch/powerpc/platforms/iseries/hvlog.c4
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c74
-rw-r--r--arch/powerpc/platforms/iseries/setup.c13
-rw-r--r--arch/powerpc/platforms/iseries/vio.c39
-rw-r--r--arch/powerpc/platforms/iseries/viopath.c16
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c115
-rw-r--r--arch/ppc64/Kconfig13
-rw-r--r--arch/ppc64/kernel/asm-offsets.c3
-rw-r--r--arch/ppc64/kernel/head.S300
-rw-r--r--arch/ppc64/kernel/pacaData.c2
-rw-r--r--arch/ppc64/kernel/prom.c94
-rw-r--r--include/asm-powerpc/cputable.h3
-rw-r--r--include/asm-powerpc/iommu.h5
-rw-r--r--include/asm-powerpc/machdep.h10
-rw-r--r--include/asm-powerpc/prom.h8
-rw-r--r--include/asm-powerpc/system.h2
-rw-r--r--include/asm-powerpc/thread_info.h20
-rw-r--r--include/asm-powerpc/tlbflush.h9
-rw-r--r--include/asm-ppc64/mmu.h208
-rw-r--r--include/asm-ppc64/mmu_context.h15
-rw-r--r--include/asm-ppc64/paca.h13
-rw-r--r--include/asm-ppc64/page.h147
-rw-r--r--include/asm-ppc64/pgalloc.h47
-rw-r--r--include/asm-ppc64/pgtable-4k.h88
-rw-r--r--include/asm-ppc64/pgtable-64k.h87
-rw-r--r--include/asm-ppc64/pgtable.h160
-rw-r--r--include/asm-ppc64/prom.h8
-rw-r--r--include/asm-ppc64/system.h2
-rw-r--r--mm/hugetlb.c3
52 files changed, 2949 insertions, 1212 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f4e25c648fbb..ca7acb0c79f0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -603,6 +603,15 @@ config NODES_SPAN_OTHER_NODES
603 def_bool y 603 def_bool y
604 depends on NEED_MULTIPLE_NODES 604 depends on NEED_MULTIPLE_NODES
605 605
606config PPC_64K_PAGES
607 bool "64k page size"
608 help
609 This option changes the kernel logical page size to 64k. On machines
610 without processor support for 64k pages, the kernel will simulate
611 them by loading each individual 4k page on demand transparently,
612 while on hardware with such support, it will be used to map
613 normal application pages.
614
606config SCHED_SMT 615config SCHED_SMT
607 bool "SMT (Hyperthreading) scheduler support" 616 bool "SMT (Hyperthreading) scheduler support"
608 depends on PPC64 && SMP 617 depends on PPC64 && SMP
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bc5a3689cc05..b75757251994 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -125,6 +125,9 @@ int main(void)
125 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 125 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
126 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 126 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
127 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 127 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
128#ifdef CONFIG_PPC_64K_PAGES
129 DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
130#endif
128#ifdef CONFIG_HUGETLB_PAGE 131#ifdef CONFIG_HUGETLB_PAGE
129 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); 132 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
130 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); 133 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b91345fa0805..33c63bcf69f8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = {
240 .oprofile_model = &op_model_power4, 240 .oprofile_model = &op_model_power4,
241#endif 241#endif
242 }, 242 },
243 { /* Power5 */ 243 { /* Power5 GR */
244 .pvr_mask = 0xffff0000, 244 .pvr_mask = 0xffff0000,
245 .pvr_value = 0x003a0000, 245 .pvr_value = 0x003a0000,
246 .cpu_name = "POWER5 (gr)", 246 .cpu_name = "POWER5 (gr)",
@@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = {
255 .oprofile_model = &op_model_power4, 255 .oprofile_model = &op_model_power4,
256#endif 256#endif
257 }, 257 },
258 { /* Power5 */ 258 { /* Power5 GS */
259 .pvr_mask = 0xffff0000, 259 .pvr_mask = 0xffff0000,
260 .pvr_value = 0x003b0000, 260 .pvr_value = 0x003b0000,
261 .cpu_name = "POWER5 (gs)", 261 .cpu_name = "POWER5 (gs)",
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 45d81976987f..16ab40daa738 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -195,11 +195,11 @@ exception_marker:
195#define EX_R12 24 195#define EX_R12 24
196#define EX_R13 32 196#define EX_R13 32
197#define EX_SRR0 40 197#define EX_SRR0 40
198#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
199#define EX_DAR 48 198#define EX_DAR 48
200#define EX_LR 48 /* SLB miss saves LR, but not DAR */
201#define EX_DSISR 56 199#define EX_DSISR 56
202#define EX_CCR 60 200#define EX_CCR 60
201#define EX_R3 64
202#define EX_LR 72
203 203
204#define EXCEPTION_PROLOG_PSERIES(area, label) \ 204#define EXCEPTION_PROLOG_PSERIES(area, label) \
205 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ 205 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +419,22 @@ data_access_slb_pSeries:
419 mtspr SPRN_SPRG1,r13 419 mtspr SPRN_SPRG1,r13
420 RUNLATCH_ON(r13) 420 RUNLATCH_ON(r13)
421 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ 421 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
422 std r3,PACA_EXSLB+EX_R3(r13)
423 mfspr r3,SPRN_DAR
422 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 424 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
425 mfcr r9
426#ifdef __DISABLED__
427 /* Keep that around for when we re-implement dynamic VSIDs */
428 cmpdi r3,0
429 bge slb_miss_user_pseries
430#endif /* __DISABLED__ */
423 std r10,PACA_EXSLB+EX_R10(r13) 431 std r10,PACA_EXSLB+EX_R10(r13)
424 std r11,PACA_EXSLB+EX_R11(r13) 432 std r11,PACA_EXSLB+EX_R11(r13)
425 std r12,PACA_EXSLB+EX_R12(r13) 433 std r12,PACA_EXSLB+EX_R12(r13)
426 std r3,PACA_EXSLB+EX_R3(r13) 434 mfspr r10,SPRN_SPRG1
427 mfspr r9,SPRN_SPRG1 435 std r10,PACA_EXSLB+EX_R13(r13)
428 std r9,PACA_EXSLB+EX_R13(r13)
429 mfcr r9
430 mfspr r12,SPRN_SRR1 /* and SRR1 */ 436 mfspr r12,SPRN_SRR1 /* and SRR1 */
431 mfspr r3,SPRN_DAR 437 b .slb_miss_realmode /* Rel. branch works in real mode */
432 b .do_slb_miss /* Rel. branch works in real mode */
433 438
434 STD_EXCEPTION_PSERIES(0x400, instruction_access) 439 STD_EXCEPTION_PSERIES(0x400, instruction_access)
435 440
@@ -440,17 +445,22 @@ instruction_access_slb_pSeries:
440 mtspr SPRN_SPRG1,r13 445 mtspr SPRN_SPRG1,r13
441 RUNLATCH_ON(r13) 446 RUNLATCH_ON(r13)
442 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ 447 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
448 std r3,PACA_EXSLB+EX_R3(r13)
449 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
443 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 450 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
451 mfcr r9
452#ifdef __DISABLED__
453 /* Keep that around for when we re-implement dynamic VSIDs */
454 cmpdi r3,0
455 bge slb_miss_user_pseries
456#endif /* __DISABLED__ */
444 std r10,PACA_EXSLB+EX_R10(r13) 457 std r10,PACA_EXSLB+EX_R10(r13)
445 std r11,PACA_EXSLB+EX_R11(r13) 458 std r11,PACA_EXSLB+EX_R11(r13)
446 std r12,PACA_EXSLB+EX_R12(r13) 459 std r12,PACA_EXSLB+EX_R12(r13)
447 std r3,PACA_EXSLB+EX_R3(r13) 460 mfspr r10,SPRN_SPRG1
448 mfspr r9,SPRN_SPRG1 461 std r10,PACA_EXSLB+EX_R13(r13)
449 std r9,PACA_EXSLB+EX_R13(r13)
450 mfcr r9
451 mfspr r12,SPRN_SRR1 /* and SRR1 */ 462 mfspr r12,SPRN_SRR1 /* and SRR1 */
452 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 463 b .slb_miss_realmode /* Rel. branch works in real mode */
453 b .do_slb_miss /* Rel. branch works in real mode */
454 464
455 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) 465 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
456 STD_EXCEPTION_PSERIES(0x600, alignment) 466 STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +519,38 @@ _GLOBAL(do_stab_bolted_pSeries)
509 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) 519 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
510 520
511/* 521/*
522 * We have some room here we use that to put
523 * the peries slb miss user trampoline code so it's reasonably
524 * away from slb_miss_user_common to avoid problems with rfid
525 *
526 * This is used for when the SLB miss handler has to go virtual,
527 * which doesn't happen for now anymore but will once we re-implement
528 * dynamic VSIDs for shared page tables
529 */
530#ifdef __DISABLED__
531slb_miss_user_pseries:
532 std r10,PACA_EXGEN+EX_R10(r13)
533 std r11,PACA_EXGEN+EX_R11(r13)
534 std r12,PACA_EXGEN+EX_R12(r13)
535 mfspr r10,SPRG1
536 ld r11,PACA_EXSLB+EX_R9(r13)
537 ld r12,PACA_EXSLB+EX_R3(r13)
538 std r10,PACA_EXGEN+EX_R13(r13)
539 std r11,PACA_EXGEN+EX_R9(r13)
540 std r12,PACA_EXGEN+EX_R3(r13)
541 clrrdi r12,r13,32
542 mfmsr r10
543 mfspr r11,SRR0 /* save SRR0 */
544 ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
545 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
546 mtspr SRR0,r12
547 mfspr r12,SRR1 /* and SRR1 */
548 mtspr SRR1,r10
549 rfid
550 b . /* prevent spec. execution */
551#endif /* __DISABLED__ */
552
553/*
512 * Vectors for the FWNMI option. Share common code. 554 * Vectors for the FWNMI option. Share common code.
513 */ 555 */
514 .globl system_reset_fwnmi 556 .globl system_reset_fwnmi
@@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
559 .globl data_access_slb_iSeries 601 .globl data_access_slb_iSeries
560data_access_slb_iSeries: 602data_access_slb_iSeries:
561 mtspr SPRN_SPRG1,r13 /* save r13 */ 603 mtspr SPRN_SPRG1,r13 /* save r13 */
562 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 604 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
563 std r3,PACA_EXSLB+EX_R3(r13) 605 std r3,PACA_EXSLB+EX_R3(r13)
564 ld r12,PACALPPACA+LPPACASRR1(r13)
565 mfspr r3,SPRN_DAR 606 mfspr r3,SPRN_DAR
566 b .do_slb_miss 607 std r9,PACA_EXSLB+EX_R9(r13)
608 mfcr r9
609#ifdef __DISABLED__
610 cmpdi r3,0
611 bge slb_miss_user_iseries
612#endif
613 std r10,PACA_EXSLB+EX_R10(r13)
614 std r11,PACA_EXSLB+EX_R11(r13)
615 std r12,PACA_EXSLB+EX_R12(r13)
616 mfspr r10,SPRN_SPRG1
617 std r10,PACA_EXSLB+EX_R13(r13)
618 ld r12,PACALPPACA+LPPACASRR1(r13);
619 b .slb_miss_realmode
567 620
568 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) 621 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
569 622
570 .globl instruction_access_slb_iSeries 623 .globl instruction_access_slb_iSeries
571instruction_access_slb_iSeries: 624instruction_access_slb_iSeries:
572 mtspr SPRN_SPRG1,r13 /* save r13 */ 625 mtspr SPRN_SPRG1,r13 /* save r13 */
573 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 626 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
574 std r3,PACA_EXSLB+EX_R3(r13) 627 std r3,PACA_EXSLB+EX_R3(r13)
575 ld r12,PACALPPACA+LPPACASRR1(r13) 628 ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
576 ld r3,PACALPPACA+LPPACASRR0(r13) 629 std r9,PACA_EXSLB+EX_R9(r13)
577 b .do_slb_miss 630 mfcr r9
631#ifdef __DISABLED__
632 cmpdi r3,0
633 bge .slb_miss_user_iseries
634#endif
635 std r10,PACA_EXSLB+EX_R10(r13)
636 std r11,PACA_EXSLB+EX_R11(r13)
637 std r12,PACA_EXSLB+EX_R12(r13)
638 mfspr r10,SPRN_SPRG1
639 std r10,PACA_EXSLB+EX_R13(r13)
640 ld r12,PACALPPACA+LPPACASRR1(r13);
641 b .slb_miss_realmode
642
643#ifdef __DISABLED__
644slb_miss_user_iseries:
645 std r10,PACA_EXGEN+EX_R10(r13)
646 std r11,PACA_EXGEN+EX_R11(r13)
647 std r12,PACA_EXGEN+EX_R12(r13)
648 mfspr r10,SPRG1
649 ld r11,PACA_EXSLB+EX_R9(r13)
650 ld r12,PACA_EXSLB+EX_R3(r13)
651 std r10,PACA_EXGEN+EX_R13(r13)
652 std r11,PACA_EXGEN+EX_R9(r13)
653 std r12,PACA_EXGEN+EX_R3(r13)
654 EXCEPTION_PROLOG_ISERIES_2
655 b slb_miss_user_common
656#endif
578 657
579 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) 658 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
580 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) 659 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +888,126 @@ instruction_access_common:
809 li r5,0x400 888 li r5,0x400
810 b .do_hash_page /* Try to handle as hpte fault */ 889 b .do_hash_page /* Try to handle as hpte fault */
811 890
891/*
892 * Here is the common SLB miss user that is used when going to virtual
893 * mode for SLB misses, that is currently not used
894 */
895#ifdef __DISABLED__
896 .align 7
897 .globl slb_miss_user_common
898slb_miss_user_common:
899 mflr r10
900 std r3,PACA_EXGEN+EX_DAR(r13)
901 stw r9,PACA_EXGEN+EX_CCR(r13)
902 std r10,PACA_EXGEN+EX_LR(r13)
903 std r11,PACA_EXGEN+EX_SRR0(r13)
904 bl .slb_allocate_user
905
906 ld r10,PACA_EXGEN+EX_LR(r13)
907 ld r3,PACA_EXGEN+EX_R3(r13)
908 lwz r9,PACA_EXGEN+EX_CCR(r13)
909 ld r11,PACA_EXGEN+EX_SRR0(r13)
910 mtlr r10
911 beq- slb_miss_fault
912
913 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
914 beq- unrecov_user_slb
915 mfmsr r10
916
917.machine push
918.machine "power4"
919 mtcrf 0x80,r9
920.machine pop
921
922 clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
923 mtmsrd r10,1
924
925 mtspr SRR0,r11
926 mtspr SRR1,r12
927
928 ld r9,PACA_EXGEN+EX_R9(r13)
929 ld r10,PACA_EXGEN+EX_R10(r13)
930 ld r11,PACA_EXGEN+EX_R11(r13)
931 ld r12,PACA_EXGEN+EX_R12(r13)
932 ld r13,PACA_EXGEN+EX_R13(r13)
933 rfid
934 b .
935
936slb_miss_fault:
937 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
938 ld r4,PACA_EXGEN+EX_DAR(r13)
939 li r5,0
940 std r4,_DAR(r1)
941 std r5,_DSISR(r1)
942 b .handle_page_fault
943
944unrecov_user_slb:
945 EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
946 DISABLE_INTS
947 bl .save_nvgprs
9481: addi r3,r1,STACK_FRAME_OVERHEAD
949 bl .unrecoverable_exception
950 b 1b
951
952#endif /* __DISABLED__ */
953
954
955/*
956 * r13 points to the PACA, r9 contains the saved CR,
957 * r12 contain the saved SRR1, SRR0 is still ready for return
958 * r3 has the faulting address
959 * r9 - r13 are saved in paca->exslb.
960 * r3 is saved in paca->slb_r3
961 * We assume we aren't going to take any exceptions during this procedure.
962 */
963_GLOBAL(slb_miss_realmode)
964 mflr r10
965
966 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
967 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
968
969 bl .slb_allocate_realmode
970
971 /* All done -- return from exception. */
972
973 ld r10,PACA_EXSLB+EX_LR(r13)
974 ld r3,PACA_EXSLB+EX_R3(r13)
975 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
976#ifdef CONFIG_PPC_ISERIES
977 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
978#endif /* CONFIG_PPC_ISERIES */
979
980 mtlr r10
981
982 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
983 beq- unrecov_slb
984
985.machine push
986.machine "power4"
987 mtcrf 0x80,r9
988 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
989.machine pop
990
991#ifdef CONFIG_PPC_ISERIES
992 mtspr SPRN_SRR0,r11
993 mtspr SPRN_SRR1,r12
994#endif /* CONFIG_PPC_ISERIES */
995 ld r9,PACA_EXSLB+EX_R9(r13)
996 ld r10,PACA_EXSLB+EX_R10(r13)
997 ld r11,PACA_EXSLB+EX_R11(r13)
998 ld r12,PACA_EXSLB+EX_R12(r13)
999 ld r13,PACA_EXSLB+EX_R13(r13)
1000 rfid
1001 b . /* prevent speculative execution */
1002
1003unrecov_slb:
1004 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1005 DISABLE_INTS
1006 bl .save_nvgprs
10071: addi r3,r1,STACK_FRAME_OVERHEAD
1008 bl .unrecoverable_exception
1009 b 1b
1010
812 .align 7 1011 .align 7
813 .globl hardware_interrupt_common 1012 .globl hardware_interrupt_common
814 .globl hardware_interrupt_entry 1013 .globl hardware_interrupt_entry
@@ -1139,62 +1338,6 @@ _GLOBAL(do_stab_bolted)
1139 b . /* prevent speculative execution */ 1338 b . /* prevent speculative execution */
1140 1339
1141/* 1340/*
1142 * r13 points to the PACA, r9 contains the saved CR,
1143 * r11 and r12 contain the saved SRR0 and SRR1.
1144 * r3 has the faulting address
1145 * r9 - r13 are saved in paca->exslb.
1146 * r3 is saved in paca->slb_r3
1147 * We assume we aren't going to take any exceptions during this procedure.
1148 */
1149_GLOBAL(do_slb_miss)
1150 mflr r10
1151
1152 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
1153 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
1154
1155 bl .slb_allocate /* handle it */
1156
1157 /* All done -- return from exception. */
1158
1159 ld r10,PACA_EXSLB+EX_LR(r13)
1160 ld r3,PACA_EXSLB+EX_R3(r13)
1161 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
1162#ifdef CONFIG_PPC_ISERIES
1163 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
1164#endif /* CONFIG_PPC_ISERIES */
1165
1166 mtlr r10
1167
1168 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
1169 beq- unrecov_slb
1170
1171.machine push
1172.machine "power4"
1173 mtcrf 0x80,r9
1174 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
1175.machine pop
1176
1177#ifdef CONFIG_PPC_ISERIES
1178 mtspr SPRN_SRR0,r11
1179 mtspr SPRN_SRR1,r12
1180#endif /* CONFIG_PPC_ISERIES */
1181 ld r9,PACA_EXSLB+EX_R9(r13)
1182 ld r10,PACA_EXSLB+EX_R10(r13)
1183 ld r11,PACA_EXSLB+EX_R11(r13)
1184 ld r12,PACA_EXSLB+EX_R12(r13)
1185 ld r13,PACA_EXSLB+EX_R13(r13)
1186 rfid
1187 b . /* prevent speculative execution */
1188
1189unrecov_slb:
1190 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1191 DISABLE_INTS
1192 bl .save_nvgprs
11931: addi r3,r1,STACK_FRAME_OVERHEAD
1194 bl .unrecoverable_exception
1195 b 1b
1196
1197/*
1198 * Space for CPU0's segment table. 1341 * Space for CPU0's segment table.
1199 * 1342 *
1200 * On iSeries, the hypervisor must fill in at least one entry before 1343 * On iSeries, the hypervisor must fill in at least one entry before
@@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start)
1569#endif 1712#endif
1570 /* Initialize the first segment table (or SLB) entry */ 1713 /* Initialize the first segment table (or SLB) entry */
1571 ld r3,PACASTABVIRT(r13) /* get addr of segment table */ 1714 ld r3,PACASTABVIRT(r13) /* get addr of segment table */
1715BEGIN_FTR_SECTION
1572 bl .stab_initialize 1716 bl .stab_initialize
1717END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
1718 bl .slb_initialize
1573 1719
1574 /* Initialize the kernel stack. Just a repeat for iSeries. */ 1720 /* Initialize the kernel stack. Just a repeat for iSeries. */
1575 LOADADDR(r3,current_set) 1721 LOADADDR(r3,current_set)
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index eded971d1bf9..5a05a797485f 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
25 .xRanges = { 25 .xRanges = {
26 { .xPages = HvPagesToMap, 26 { .xPages = HvPagesToMap,
27 .xOffset = 0, 27 .xOffset = 0,
28 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), 28 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
29 }, 29 },
30 }, 30 },
31}; 31};
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 96843211cc5c..7f64f0464d44 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -554,12 +554,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
554#ifdef CONFIG_PPC64 554#ifdef CONFIG_PPC64
555 if (cpu_has_feature(CPU_FTR_SLB)) { 555 if (cpu_has_feature(CPU_FTR_SLB)) {
556 unsigned long sp_vsid = get_kernel_vsid(sp); 556 unsigned long sp_vsid = get_kernel_vsid(sp);
557 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
557 558
558 sp_vsid <<= SLB_VSID_SHIFT; 559 sp_vsid <<= SLB_VSID_SHIFT;
559 sp_vsid |= SLB_VSID_KERNEL; 560 sp_vsid |= SLB_VSID_KERNEL | llp;
560 if (cpu_has_feature(CPU_FTR_16M_PAGE))
561 sp_vsid |= SLB_VSID_L;
562
563 p->thread.ksp_vsid = sp_vsid; 561 p->thread.ksp_vsid = sp_vsid;
564 } 562 }
565 563
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eec2da695508..3675ef4bac90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset)
724 * used to extract the memory informations at boot before we can 724 * used to extract the memory informations at boot before we can
725 * unflatten the tree 725 * unflatten the tree
726 */ 726 */
727static int __init scan_flat_dt(int (*it)(unsigned long node, 727int __init of_scan_flat_dt(int (*it)(unsigned long node,
728 const char *uname, int depth, 728 const char *uname, int depth,
729 void *data), 729 void *data),
730 void *data) 730 void *data)
731{ 731{
732 unsigned long p = ((unsigned long)initial_boot_params) + 732 unsigned long p = ((unsigned long)initial_boot_params) +
733 initial_boot_params->off_dt_struct; 733 initial_boot_params->off_dt_struct;
@@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
784 * This function can be used within scan_flattened_dt callback to get 784 * This function can be used within scan_flattened_dt callback to get
785 * access to properties 785 * access to properties
786 */ 786 */
787static void* __init get_flat_dt_prop(unsigned long node, const char *name, 787void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
788 unsigned long *size) 788 unsigned long *size)
789{ 789{
790 unsigned long p = node; 790 unsigned long p = node;
791 791
@@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void)
1087static int __init early_init_dt_scan_cpus(unsigned long node, 1087static int __init early_init_dt_scan_cpus(unsigned long node,
1088 const char *uname, int depth, void *data) 1088 const char *uname, int depth, void *data)
1089{ 1089{
1090 char *type = get_flat_dt_prop(node, "device_type", NULL); 1090 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
1091 u32 *prop; 1091 u32 *prop;
1092 unsigned long size = 0; 1092 unsigned long size = 0;
1093 1093
@@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1095 if (type == NULL || strcmp(type, "cpu") != 0) 1095 if (type == NULL || strcmp(type, "cpu") != 0)
1096 return 0; 1096 return 0;
1097 1097
1098#ifdef CONFIG_PPC_PSERIES
1099 /* On LPAR, look for the first ibm,pft-size property for the hash table size
1100 */
1101 if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
1102 u32 *pft_size;
1103 pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
1104 if (pft_size != NULL) {
1105 /* pft_size[0] is the NUMA CEC cookie */
1106 ppc64_pft_size = pft_size[1];
1107 }
1108 }
1109#endif
1110
1111 boot_cpuid = 0; 1098 boot_cpuid = 0;
1112 boot_cpuid_phys = 0; 1099 boot_cpuid_phys = 0;
1113 if (initial_boot_params && initial_boot_params->version >= 2) { 1100 if (initial_boot_params && initial_boot_params->version >= 2) {
@@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1117 boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; 1104 boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
1118 } else { 1105 } else {
1119 /* Check if it's the boot-cpu, set it's hw index now */ 1106 /* Check if it's the boot-cpu, set it's hw index now */
1120 if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { 1107 if (of_get_flat_dt_prop(node,
1121 prop = get_flat_dt_prop(node, "reg", NULL); 1108 "linux,boot-cpu", NULL) != NULL) {
1109 prop = of_get_flat_dt_prop(node, "reg", NULL);
1122 if (prop != NULL) 1110 if (prop != NULL)
1123 boot_cpuid_phys = *prop; 1111 boot_cpuid_phys = *prop;
1124 } 1112 }
@@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1127 1115
1128#ifdef CONFIG_ALTIVEC 1116#ifdef CONFIG_ALTIVEC
1129 /* Check if we have a VMX and eventually update CPU features */ 1117 /* Check if we have a VMX and eventually update CPU features */
1130 prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size); 1118 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
1131 if (prop && (*prop) > 0) { 1119 if (prop && (*prop) > 0) {
1132 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; 1120 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
1133 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; 1121 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
1134 } 1122 }
1135 1123
1136 /* Same goes for Apple's "altivec" property */ 1124 /* Same goes for Apple's "altivec" property */
1137 prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); 1125 prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
1138 if (prop) { 1126 if (prop) {
1139 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; 1127 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
1140 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; 1128 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1147 * this by looking at the size of the ibm,ppc-interrupt-server#s 1135 * this by looking at the size of the ibm,ppc-interrupt-server#s
1148 * property 1136 * property
1149 */ 1137 */
1150 prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", 1138 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
1151 &size); 1139 &size);
1152 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; 1140 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
1153 if (prop && ((size / sizeof(u32)) > 1)) 1141 if (prop && ((size / sizeof(u32)) > 1))
@@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1170 return 0; 1158 return 0;
1171 1159
1172 /* get platform type */ 1160 /* get platform type */
1173 prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); 1161 prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
1174 if (prop == NULL) 1162 if (prop == NULL)
1175 return 0; 1163 return 0;
1176#ifdef CONFIG_PPC64 1164#ifdef CONFIG_PPC64
@@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1183 1171
1184#ifdef CONFIG_PPC64 1172#ifdef CONFIG_PPC64
1185 /* check if iommu is forced on or off */ 1173 /* check if iommu is forced on or off */
1186 if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) 1174 if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
1187 iommu_is_off = 1; 1175 iommu_is_off = 1;
1188 if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) 1176 if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
1189 iommu_force_on = 1; 1177 iommu_force_on = 1;
1190#endif 1178#endif
1191 1179
1192 lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL); 1180 lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
1193 if (lprop) 1181 if (lprop)
1194 memory_limit = *lprop; 1182 memory_limit = *lprop;
1195 1183
1196#ifdef CONFIG_PPC64 1184#ifdef CONFIG_PPC64
1197 lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); 1185 lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
1198 if (lprop) 1186 if (lprop)
1199 tce_alloc_start = *lprop; 1187 tce_alloc_start = *lprop;
1200 lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); 1188 lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
1201 if (lprop) 1189 if (lprop)
1202 tce_alloc_end = *lprop; 1190 tce_alloc_end = *lprop;
1203#endif 1191#endif
@@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1209 { 1197 {
1210 u64 *basep, *entryp; 1198 u64 *basep, *entryp;
1211 1199
1212 basep = get_flat_dt_prop(node, "linux,rtas-base", NULL); 1200 basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
1213 entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL); 1201 entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
1214 prop = get_flat_dt_prop(node, "linux,rtas-size", NULL); 1202 prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
1215 if (basep && entryp && prop) { 1203 if (basep && entryp && prop) {
1216 rtas.base = *basep; 1204 rtas.base = *basep;
1217 rtas.entry = *entryp; 1205 rtas.entry = *entryp;
@@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
1232 if (depth != 0) 1220 if (depth != 0)
1233 return 0; 1221 return 0;
1234 1222
1235 prop = get_flat_dt_prop(node, "#size-cells", NULL); 1223 prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
1236 dt_root_size_cells = (prop == NULL) ? 1 : *prop; 1224 dt_root_size_cells = (prop == NULL) ? 1 : *prop;
1237 DBG("dt_root_size_cells = %x\n", dt_root_size_cells); 1225 DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
1238 1226
1239 prop = get_flat_dt_prop(node, "#address-cells", NULL); 1227 prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
1240 dt_root_addr_cells = (prop == NULL) ? 2 : *prop; 1228 dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
1241 DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); 1229 DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
1242 1230
@@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
1271static int __init early_init_dt_scan_memory(unsigned long node, 1259static int __init early_init_dt_scan_memory(unsigned long node,
1272 const char *uname, int depth, void *data) 1260 const char *uname, int depth, void *data)
1273{ 1261{
1274 char *type = get_flat_dt_prop(node, "device_type", NULL); 1262 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
1275 cell_t *reg, *endp; 1263 cell_t *reg, *endp;
1276 unsigned long l; 1264 unsigned long l;
1277 1265
@@ -1279,7 +1267,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
1279 if (type == NULL || strcmp(type, "memory") != 0) 1267 if (type == NULL || strcmp(type, "memory") != 0)
1280 return 0; 1268 return 0;
1281 1269
1282 reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); 1270 reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
1283 if (reg == NULL) 1271 if (reg == NULL)
1284 return 0; 1272 return 0;
1285 1273
@@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params)
1343 * device-tree, including the platform type, initrd location and 1331 * device-tree, including the platform type, initrd location and
1344 * size, TCE reserve, and more ... 1332 * size, TCE reserve, and more ...
1345 */ 1333 */
1346 scan_flat_dt(early_init_dt_scan_chosen, NULL); 1334 of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
1347 1335
1348 /* Scan memory nodes and rebuild LMBs */ 1336 /* Scan memory nodes and rebuild LMBs */
1349 lmb_init(); 1337 lmb_init();
1350 scan_flat_dt(early_init_dt_scan_root, NULL); 1338 of_scan_flat_dt(early_init_dt_scan_root, NULL);
1351 scan_flat_dt(early_init_dt_scan_memory, NULL); 1339 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
1352 lmb_enforce_memory_limit(memory_limit); 1340 lmb_enforce_memory_limit(memory_limit);
1353 lmb_analyze(); 1341 lmb_analyze();
1354#ifdef CONFIG_PPC64 1342#ifdef CONFIG_PPC64
@@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params)
1363 1351
1364 DBG("Scanning CPUs ...\n"); 1352 DBG("Scanning CPUs ...\n");
1365 1353
1366 /* Retreive hash table size from flattened tree plus other 1354 /* Retreive CPU related informations from the flat tree
1367 * CPU related informations (altivec support, boot CPU ID, ...) 1355 * (altivec support, boot CPU ID, ...)
1368 */ 1356 */
1369 scan_flat_dt(early_init_dt_scan_cpus, NULL); 1357 of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
1370 1358
1371 DBG(" <- early_init_devtree()\n"); 1359 DBG(" <- early_init_devtree()\n");
1372} 1360}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6b52cce872be..b0994050024f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -277,16 +277,21 @@ void __init early_setup(unsigned long dt_ptr)
277 DBG("Found, Initializing memory management...\n"); 277 DBG("Found, Initializing memory management...\n");
278 278
279 /* 279 /*
280 * Initialize stab / SLB management 280 * Initialize the MMU Hash table and create the linear mapping
281 * of memory. Has to be done before stab/slb initialization as
282 * this is currently where the page size encoding is obtained
281 */ 283 */
282 if (!firmware_has_feature(FW_FEATURE_ISERIES)) 284 htab_initialize();
283 stab_initialize(lpaca->stab_real);
284 285
285 /* 286 /*
286 * Initialize the MMU Hash table and create the linear mapping 287 * Initialize stab / SLB management except on iSeries
287 * of memory
288 */ 288 */
289 htab_initialize(); 289 if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
290 if (cpu_has_feature(CPU_FTR_SLB))
291 slb_initialize();
292 else
293 stab_initialize(lpaca->stab_real);
294 }
290 295
291 DBG(" <- early_setup()\n"); 296 DBG(" <- early_setup()\n");
292} 297}
@@ -552,10 +557,12 @@ static void __init irqstack_early_init(void)
552 * SLB misses on them. 557 * SLB misses on them.
553 */ 558 */
554 for_each_cpu(i) { 559 for_each_cpu(i) {
555 softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, 560 softirq_ctx[i] = (struct thread_info *)
556 THREAD_SIZE, 0x10000000)); 561 __va(lmb_alloc_base(THREAD_SIZE,
557 hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, 562 THREAD_SIZE, 0x10000000));
558 THREAD_SIZE, 0x10000000)); 563 hardirq_ctx[i] = (struct thread_info *)
564 __va(lmb_alloc_base(THREAD_SIZE,
565 THREAD_SIZE, 0x10000000));
559 } 566 }
560} 567}
561#else 568#else
@@ -583,8 +590,8 @@ static void __init emergency_stack_init(void)
583 limit = min(0x10000000UL, lmb.rmo_size); 590 limit = min(0x10000000UL, lmb.rmo_size);
584 591
585 for_each_cpu(i) 592 for_each_cpu(i)
586 paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, 593 paca[i].emergency_sp =
587 limit)) + PAGE_SIZE; 594 __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
588} 595}
589 596
590/* 597/*
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 733d61618bbf..40523b140109 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -11,7 +11,7 @@
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/ppc_asm.h> 12#include <asm/ppc_asm.h>
13 13
14_GLOBAL(copy_page) 14_GLOBAL(copy_4K_page)
15 std r31,-8(1) 15 std r31,-8(1)
16 std r30,-16(1) 16 std r30,-16(1)
17 std r29,-24(1) 17 std r29,-24(1)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a0b3fbbd6fb1..6d69ef39b7df 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
24 std r4,-16(r1) 24 std r4,-16(r1)
25 std r5,-8(r1) 25 std r5,-8(r1)
26 dcbt 0,r4 26 dcbt 0,r4
27 beq .Lcopy_page 27 beq .Lcopy_page_4K
28 andi. r6,r6,7 28 andi. r6,r6,7
29 mtcrf 0x01,r5 29 mtcrf 0x01,r5
30 blt cr1,.Lshort_copy 30 blt cr1,.Lshort_copy
@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
366 * above (following the .Ldst_aligned label) but it runs slightly 366 * above (following the .Ldst_aligned label) but it runs slightly
367 * slower on POWER3. 367 * slower on POWER3.
368 */ 368 */
369.Lcopy_page: 369.Lcopy_page_4K:
370 std r31,-32(1) 370 std r31,-32(1)
371 std r30,-40(1) 371 std r30,-40(1)
372 std r29,-48(1) 372 std r29,-48(1)
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eeea..e0d02c4a2615 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * ppc64 MMU hashtable management routines 2 * ppc64 MMU hashtable management routines
3 * 3 *
4 * (c) Copyright IBM Corp. 2003 4 * (c) Copyright IBM Corp. 2003, 2005
5 * 5 *
6 * Maintained by: Benjamin Herrenschmidt 6 * Maintained by: Benjamin Herrenschmidt
7 * <benh@kernel.crashing.org> 7 * <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
10 * described in the kernel's COPYING file. 10 * described in the kernel's COPYING file.
11 */ 11 */
12 12
13#include <linux/config.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/mmu.h> 16#include <asm/mmu.h>
@@ -42,14 +43,24 @@
42/* Save non-volatile offsets */ 43/* Save non-volatile offsets */
43#define STK_REG(i) (112 + ((i)-14)*8) 44#define STK_REG(i) (112 + ((i)-14)*8)
44 45
46
47#ifndef CONFIG_PPC_64K_PAGES
48
49/*****************************************************************************
50 * *
51 * 4K SW & 4K HW pages implementation *
52 * *
53 *****************************************************************************/
54
55
45/* 56/*
46 * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, 57 * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
47 * pte_t *ptep, unsigned long trap, int local) 58 * pte_t *ptep, unsigned long trap, int local)
48 * 59 *
49 * Adds a page to the hash table. This is the non-LPAR version for now 60 * Adds a 4K page to the hash table in a segment of 4K pages only
50 */ 61 */
51 62
52_GLOBAL(__hash_page) 63_GLOBAL(__hash_page_4K)
53 mflr r0 64 mflr r0
54 std r0,16(r1) 65 std r0,16(r1)
55 stdu r1,-STACKFRAMESIZE(r1) 66 stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
88 /* If so, just bail out and refault if needed. Someone else 99 /* If so, just bail out and refault if needed. Someone else
89 * is changing this PTE anyway and might hash it. 100 * is changing this PTE anyway and might hash it.
90 */ 101 */
91 bne- bail_ok 102 bne- htab_bail_ok
103
92 /* Prepare new PTE value (turn access RW into DIRTY, then 104 /* Prepare new PTE value (turn access RW into DIRTY, then
93 * add BUSY,HASHPTE and ACCESSED) 105 * add BUSY,HASHPTE and ACCESSED)
94 */ 106 */
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
118 130
119 /* Convert linux PTE bits into HW equivalents */ 131 /* Convert linux PTE bits into HW equivalents */
120 andi. r3,r30,0x1fe /* Get basic set of flags */ 132 andi. r3,r30,0x1fe /* Get basic set of flags */
121 xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ 133 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
122 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ 134 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
123 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ 135 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
124 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ 136 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
125 andc r0,r30,r0 /* r0 = pte & ~r0 */ 137 andc r0,r30,r0 /* r0 = pte & ~r0 */
126 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ 138 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
127 139
@@ -158,19 +170,21 @@ htab_insert_pte:
158 andc r30,r30,r0 170 andc r30,r30,r0
159 ori r30,r30,_PAGE_HASHPTE 171 ori r30,r30,_PAGE_HASHPTE
160 172
161 /* page number in r5 */ 173 /* physical address r5 */
162 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 174 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
175 sldi r5,r5,PAGE_SHIFT
163 176
164 /* Calculate primary group hash */ 177 /* Calculate primary group hash */
165 and r0,r28,r27 178 and r0,r28,r27
166 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 179 rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
167 180
168 /* Call ppc_md.hpte_insert */ 181 /* Call ppc_md.hpte_insert */
169 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 182 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
170 mr r4,r29 /* Retreive va */ 183 mr r4,r29 /* Retreive va */
171 li r6,0 /* no vflags */ 184 li r7,0 /* !bolted, !secondary */
185 li r8,MMU_PAGE_4K /* page size */
172_GLOBAL(htab_call_hpte_insert1) 186_GLOBAL(htab_call_hpte_insert1)
173 bl . /* Will be patched by htab_finish_init() */ 187 bl . /* Patched by htab_finish_init() */
174 cmpdi 0,r3,0 188 cmpdi 0,r3,0
175 bge htab_pte_insert_ok /* Insertion successful */ 189 bge htab_pte_insert_ok /* Insertion successful */
176 cmpdi 0,r3,-2 /* Critical failure */ 190 cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
178 192
179 /* Now try secondary slot */ 193 /* Now try secondary slot */
180 194
181 /* page number in r5 */ 195 /* physical address r5 */
182 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 196 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
197 sldi r5,r5,PAGE_SHIFT
183 198
184 /* Calculate secondary group hash */ 199 /* Calculate secondary group hash */
185 andc r0,r27,r28 200 andc r0,r27,r28
186 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 201 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
187 202
188 /* Call ppc_md.hpte_insert */ 203 /* Call ppc_md.hpte_insert */
189 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 204 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
190 mr r4,r29 /* Retreive va */ 205 mr r4,r29 /* Retreive va */
191 li r6,HPTE_V_SECONDARY@l /* secondary slot */ 206 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
207 li r8,MMU_PAGE_4K /* page size */
192_GLOBAL(htab_call_hpte_insert2) 208_GLOBAL(htab_call_hpte_insert2)
193 bl . /* Will be patched by htab_finish_init() */ 209 bl . /* Patched by htab_finish_init() */
194 cmpdi 0,r3,0 210 cmpdi 0,r3,0
195 bge+ htab_pte_insert_ok /* Insertion successful */ 211 bge+ htab_pte_insert_ok /* Insertion successful */
196 cmpdi 0,r3,-2 /* Critical failure */ 212 cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
207 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 223 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
208 /* Call ppc_md.hpte_remove */ 224 /* Call ppc_md.hpte_remove */
209_GLOBAL(htab_call_hpte_remove) 225_GLOBAL(htab_call_hpte_remove)
210 bl . /* Will be patched by htab_finish_init() */ 226 bl . /* Patched by htab_finish_init() */
211 227
212 /* Try all again */ 228 /* Try all again */
213 b htab_insert_pte 229 b htab_insert_pte
214 230
215bail_ok: 231htab_bail_ok:
216 li r3,0 232 li r3,0
217 b bail 233 b htab_bail
218 234
219htab_pte_insert_ok: 235htab_pte_insert_ok:
220 /* Insert slot number & secondary bit in PTE */ 236 /* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
227 ld r6,STK_PARM(r6)(r1) 243 ld r6,STK_PARM(r6)(r1)
228 std r30,0(r6) 244 std r30,0(r6)
229 li r3, 0 245 li r3, 0
230bail: 246htab_bail:
231 ld r27,STK_REG(r27)(r1) 247 ld r27,STK_REG(r27)(r1)
232 ld r28,STK_REG(r28)(r1) 248 ld r28,STK_REG(r28)(r1)
233 ld r29,STK_REG(r29)(r1) 249 ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
256 272
257 /* Call ppc_md.hpte_updatepp */ 273 /* Call ppc_md.hpte_updatepp */
258 mr r5,r29 /* va */ 274 mr r5,r29 /* va */
259 li r6,0 /* large is 0 */ 275 li r6,MMU_PAGE_4K /* page size */
260 ld r7,STK_PARM(r8)(r1) /* get "local" param */ 276 ld r7,STK_PARM(r8)(r1) /* get "local" param */
261_GLOBAL(htab_call_hpte_updatepp) 277_GLOBAL(htab_call_hpte_updatepp)
262 bl . /* Will be patched by htab_finish_init() */ 278 bl . /* Patched by htab_finish_init() */
263 279
264 /* if we failed because typically the HPTE wasn't really here 280 /* if we failed because typically the HPTE wasn't really here
265 * we try an insertion. 281 * we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
276 /* Bail out clearing reservation */ 292 /* Bail out clearing reservation */
277 stdcx. r31,0,r6 293 stdcx. r31,0,r6
278 li r3,1 294 li r3,1
279 b bail 295 b htab_bail
296
297htab_pte_insert_failure:
298 /* Bail out restoring old PTE */
299 ld r6,STK_PARM(r6)(r1)
300 std r31,0(r6)
301 li r3,-1
302 b htab_bail
303
304
305#else /* CONFIG_PPC_64K_PAGES */
306
307
308/*****************************************************************************
309 * *
310 * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
311 * *
312 *****************************************************************************/
313
314/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
315 * pte_t *ptep, unsigned long trap, int local)
316 */
317
318/*
319 * For now, we do NOT implement Admixed pages
320 */
321_GLOBAL(__hash_page_4K)
322 mflr r0
323 std r0,16(r1)
324 stdu r1,-STACKFRAMESIZE(r1)
325 /* Save all params that we need after a function call */
326 std r6,STK_PARM(r6)(r1)
327 std r8,STK_PARM(r8)(r1)
328
329 /* Add _PAGE_PRESENT to access */
330 ori r4,r4,_PAGE_PRESENT
331
332 /* Save non-volatile registers.
333 * r31 will hold "old PTE"
334 * r30 is "new PTE"
335 * r29 is "va"
336 * r28 is a hash value
337 * r27 is hashtab mask (maybe dynamic patched instead ?)
338 * r26 is the hidx mask
339 * r25 is the index in combo page
340 */
341 std r25,STK_REG(r25)(r1)
342 std r26,STK_REG(r26)(r1)
343 std r27,STK_REG(r27)(r1)
344 std r28,STK_REG(r28)(r1)
345 std r29,STK_REG(r29)(r1)
346 std r30,STK_REG(r30)(r1)
347 std r31,STK_REG(r31)(r1)
348
349 /* Step 1:
350 *
351 * Check permissions, atomically mark the linux PTE busy
352 * and hashed.
353 */
3541:
355 ldarx r31,0,r6
356 /* Check access rights (access & ~(pte_val(*ptep))) */
357 andc. r0,r4,r31
358 bne- htab_wrong_access
359 /* Check if PTE is busy */
360 andi. r0,r31,_PAGE_BUSY
361 /* If so, just bail out and refault if needed. Someone else
362 * is changing this PTE anyway and might hash it.
363 */
364 bne- htab_bail_ok
365 /* Prepare new PTE value (turn access RW into DIRTY, then
366 * add BUSY and ACCESSED)
367 */
368 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
369 or r30,r30,r31
370 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
371 /* Write the linux PTE atomically (setting busy) */
372 stdcx. r30,0,r6
373 bne- 1b
374 isync
375
376 /* Step 2:
377 *
378 * Insert/Update the HPTE in the hash table. At this point,
379 * r4 (access) is re-useable, we use it for the new HPTE flags
380 */
381
382 /* Load the hidx index */
383 rldicl r25,r3,64-12,60
384
385 /* Calc va and put it in r29 */
386 rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
387 rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
388 or r29,r3,r29 /* r29 = va
389
390 /* Calculate hash value for primary slot and store it in r28 */
391 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
392 rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
393 xor r28,r5,r0
394
395 /* Convert linux PTE bits into HW equivalents */
396 andi. r3,r30,0x1fe /* Get basic set of flags */
397 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
398 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
399 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
400 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
401 andc r0,r30,r0 /* r0 = pte & ~r0 */
402 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
403
404 /* We eventually do the icache sync here (maybe inline that
405 * code rather than call a C function...)
406 */
407BEGIN_FTR_SECTION
408 mr r4,r30
409 mr r5,r7
410 bl .hash_page_do_lazy_icache
411END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
412
413 /* At this point, r3 contains new PP bits, save them in
414 * place of "access" in the param area (sic)
415 */
416 std r3,STK_PARM(r4)(r1)
417
418 /* Get htab_hash_mask */
419 ld r4,htab_hash_mask@got(2)
420 ld r27,0(r4) /* htab_hash_mask -> r27 */
421
422 /* Check if we may already be in the hashtable, in this case, we
423 * go to out-of-line code to try to modify the HPTE. We look for
424 * the bit at (1 >> (index + 32))
425 */
426 andi. r0,r31,_PAGE_HASHPTE
427 li r26,0 /* Default hidx */
428 beq htab_insert_pte
429 ld r6,STK_PARM(r6)(r1)
430 ori r26,r6,0x8000 /* Load the hidx mask */
431 ld r26,0(r26)
432 addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
433 rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
434 bne htab_modify_pte
435
436htab_insert_pte:
437 /* real page number in r5, PTE RPN value + index */
438 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
439 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
440 add r5,r5,r25
441 sldi r5,r5,HW_PAGE_SHIFT
442
443 /* Calculate primary group hash */
444 and r0,r28,r27
445 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
446
447 /* Call ppc_md.hpte_insert */
448 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
449 mr r4,r29 /* Retreive va */
450 li r7,0 /* !bolted, !secondary */
451 li r8,MMU_PAGE_4K /* page size */
452_GLOBAL(htab_call_hpte_insert1)
453 bl . /* patched by htab_finish_init() */
454 cmpdi 0,r3,0
455 bge htab_pte_insert_ok /* Insertion successful */
456 cmpdi 0,r3,-2 /* Critical failure */
457 beq- htab_pte_insert_failure
458
459 /* Now try secondary slot */
460
461 /* real page number in r5, PTE RPN value + index */
462 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
463 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
464 add r5,r5,r25
465 sldi r5,r5,HW_PAGE_SHIFT
466
467 /* Calculate secondary group hash */
468 andc r0,r27,r28
469 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
470
471 /* Call ppc_md.hpte_insert */
472 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
473 mr r4,r29 /* Retreive va */
474 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
475 li r8,MMU_PAGE_4K /* page size */
476_GLOBAL(htab_call_hpte_insert2)
477 bl . /* patched by htab_finish_init() */
478 cmpdi 0,r3,0
479 bge+ htab_pte_insert_ok /* Insertion successful */
480 cmpdi 0,r3,-2 /* Critical failure */
481 beq- htab_pte_insert_failure
482
483 /* Both are full, we need to evict something */
484 mftb r0
485 /* Pick a random group based on TB */
486 andi. r0,r0,1
487 mr r5,r28
488 bne 2f
489 not r5,r5
4902: and r0,r5,r27
491 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
492 /* Call ppc_md.hpte_remove */
493_GLOBAL(htab_call_hpte_remove)
494 bl . /* patched by htab_finish_init() */
495
496 /* Try all again */
497 b htab_insert_pte
498
499htab_bail_ok:
500 li r3,0
501 b htab_bail
502
503htab_pte_insert_ok:
504 /* Insert slot number & secondary bit in PTE second half,
505 * clear _PAGE_BUSY and set approriate HPTE slot bit
506 */
507 ld r6,STK_PARM(r6)(r1)
508 li r0,_PAGE_BUSY
509 andc r30,r30,r0
510 /* HPTE SUB bit */
511 li r0,1
512 subfic r5,r25,27 /* Must match bit position in */
513 sld r0,r0,r5 /* pgtable.h */
514 or r30,r30,r0
515 /* hindx */
516 sldi r5,r25,2
517 sld r3,r3,r5
518 li r4,0xf
519 sld r4,r4,r5
520 andc r26,r26,r4
521 or r26,r26,r3
522 ori r5,r6,0x8000
523 std r26,0(r5)
524 lwsync
525 std r30,0(r6)
526 li r3, 0
527htab_bail:
528 ld r25,STK_REG(r25)(r1)
529 ld r26,STK_REG(r26)(r1)
530 ld r27,STK_REG(r27)(r1)
531 ld r28,STK_REG(r28)(r1)
532 ld r29,STK_REG(r29)(r1)
533 ld r30,STK_REG(r30)(r1)
534 ld r31,STK_REG(r31)(r1)
535 addi r1,r1,STACKFRAMESIZE
536 ld r0,16(r1)
537 mtlr r0
538 blr
539
540htab_modify_pte:
541 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
542 mr r4,r3
543 sldi r5,r25,2
544 srd r3,r26,r5
545
546 /* Secondary group ? if yes, get a inverted hash value */
547 mr r5,r28
548 andi. r0,r3,0x8 /* page secondary ? */
549 beq 1f
550 not r5,r5
5511: andi. r3,r3,0x7 /* extract idx alone */
552
553 /* Calculate proper slot value for ppc_md.hpte_updatepp */
554 and r0,r5,r27
555 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
556 add r3,r0,r3 /* add slot idx */
557
558 /* Call ppc_md.hpte_updatepp */
559 mr r5,r29 /* va */
560 li r6,MMU_PAGE_4K /* page size */
561 ld r7,STK_PARM(r8)(r1) /* get "local" param */
562_GLOBAL(htab_call_hpte_updatepp)
563 bl . /* patched by htab_finish_init() */
564
565 /* if we failed because typically the HPTE wasn't really here
566 * we try an insertion.
567 */
568 cmpdi 0,r3,-1
569 beq- htab_insert_pte
570
571 /* Clear the BUSY bit and Write out the PTE */
572 li r0,_PAGE_BUSY
573 andc r30,r30,r0
574 ld r6,STK_PARM(r6)(r1)
575 std r30,0(r6)
576 li r3,0
577 b htab_bail
578
579htab_wrong_access:
580 /* Bail out clearing reservation */
581 stdcx. r31,0,r6
582 li r3,1
583 b htab_bail
280 584
281htab_pte_insert_failure: 585htab_pte_insert_failure:
282 /* Bail out restoring old PTE */ 586 /* Bail out restoring old PTE */
283 ld r6,STK_PARM(r6)(r1) 587 ld r6,STK_PARM(r6)(r1)
284 std r31,0(r6) 588 std r31,0(r6)
285 li r3,-1 589 li r3,-1
286 b bail 590 b htab_bail
591
592
593/*****************************************************************************
594 * *
595 * 64K SW & 64K HW in a 64K segment pages implementation *
596 * *
597 *****************************************************************************/
598
599_GLOBAL(__hash_page_64K)
600 mflr r0
601 std r0,16(r1)
602 stdu r1,-STACKFRAMESIZE(r1)
603 /* Save all params that we need after a function call */
604 std r6,STK_PARM(r6)(r1)
605 std r8,STK_PARM(r8)(r1)
606
607 /* Add _PAGE_PRESENT to access */
608 ori r4,r4,_PAGE_PRESENT
609
610 /* Save non-volatile registers.
611 * r31 will hold "old PTE"
612 * r30 is "new PTE"
613 * r29 is "va"
614 * r28 is a hash value
615 * r27 is hashtab mask (maybe dynamic patched instead ?)
616 */
617 std r27,STK_REG(r27)(r1)
618 std r28,STK_REG(r28)(r1)
619 std r29,STK_REG(r29)(r1)
620 std r30,STK_REG(r30)(r1)
621 std r31,STK_REG(r31)(r1)
622
623 /* Step 1:
624 *
625 * Check permissions, atomically mark the linux PTE busy
626 * and hashed.
627 */
6281:
629 ldarx r31,0,r6
630 /* Check access rights (access & ~(pte_val(*ptep))) */
631 andc. r0,r4,r31
632 bne- ht64_wrong_access
633 /* Check if PTE is busy */
634 andi. r0,r31,_PAGE_BUSY
635 /* If so, just bail out and refault if needed. Someone else
636 * is changing this PTE anyway and might hash it.
637 */
638 bne- ht64_bail_ok
639 /* Prepare new PTE value (turn access RW into DIRTY, then
640 * add BUSY,HASHPTE and ACCESSED)
641 */
642 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
643 or r30,r30,r31
644 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
645 /* Write the linux PTE atomically (setting busy) */
646 stdcx. r30,0,r6
647 bne- 1b
648 isync
649
650 /* Step 2:
651 *
652 * Insert/Update the HPTE in the hash table. At this point,
653 * r4 (access) is re-useable, we use it for the new HPTE flags
654 */
655
656 /* Calc va and put it in r29 */
657 rldicr r29,r5,28,63-28
658 rldicl r3,r3,0,36
659 or r29,r3,r29
660
661 /* Calculate hash value for primary slot and store it in r28 */
662 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
663 rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
664 xor r28,r5,r0
665
666 /* Convert linux PTE bits into HW equivalents */
667 andi. r3,r30,0x1fe /* Get basic set of flags */
668 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
669 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
670 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
671 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
672 andc r0,r30,r0 /* r0 = pte & ~r0 */
673 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
674
675 /* We eventually do the icache sync here (maybe inline that
676 * code rather than call a C function...)
677 */
678BEGIN_FTR_SECTION
679 mr r4,r30
680 mr r5,r7
681 bl .hash_page_do_lazy_icache
682END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
683
684 /* At this point, r3 contains new PP bits, save them in
685 * place of "access" in the param area (sic)
686 */
687 std r3,STK_PARM(r4)(r1)
688
689 /* Get htab_hash_mask */
690 ld r4,htab_hash_mask@got(2)
691 ld r27,0(r4) /* htab_hash_mask -> r27 */
692
693 /* Check if we may already be in the hashtable, in this case, we
694 * go to out-of-line code to try to modify the HPTE
695 */
696 andi. r0,r31,_PAGE_HASHPTE
697 bne ht64_modify_pte
698
699ht64_insert_pte:
700 /* Clear hpte bits in new pte (we also clear BUSY btw) and
701 * add _PAGE_HASHPTE
702 */
703 lis r0,_PAGE_HPTEFLAGS@h
704 ori r0,r0,_PAGE_HPTEFLAGS@l
705 andc r30,r30,r0
706 ori r30,r30,_PAGE_HASHPTE
707
708 /* Phyical address in r5 */
709 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
710 sldi r5,r5,PAGE_SHIFT
711
712 /* Calculate primary group hash */
713 and r0,r28,r27
714 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
715
716 /* Call ppc_md.hpte_insert */
717 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
718 mr r4,r29 /* Retreive va */
719 li r7,0 /* !bolted, !secondary */
720 li r8,MMU_PAGE_64K
721_GLOBAL(ht64_call_hpte_insert1)
722 bl . /* patched by htab_finish_init() */
723 cmpdi 0,r3,0
724 bge ht64_pte_insert_ok /* Insertion successful */
725 cmpdi 0,r3,-2 /* Critical failure */
726 beq- ht64_pte_insert_failure
727
728 /* Now try secondary slot */
729
730 /* Phyical address in r5 */
731 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
732 sldi r5,r5,PAGE_SHIFT
733
734 /* Calculate secondary group hash */
735 andc r0,r27,r28
736 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
737
738 /* Call ppc_md.hpte_insert */
739 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
740 mr r4,r29 /* Retreive va */
741 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
742 li r8,MMU_PAGE_64K
743_GLOBAL(ht64_call_hpte_insert2)
744 bl . /* patched by htab_finish_init() */
745 cmpdi 0,r3,0
746 bge+ ht64_pte_insert_ok /* Insertion successful */
747 cmpdi 0,r3,-2 /* Critical failure */
748 beq- ht64_pte_insert_failure
749
750 /* Both are full, we need to evict something */
751 mftb r0
752 /* Pick a random group based on TB */
753 andi. r0,r0,1
754 mr r5,r28
755 bne 2f
756 not r5,r5
7572: and r0,r5,r27
758 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
759 /* Call ppc_md.hpte_remove */
760_GLOBAL(ht64_call_hpte_remove)
761 bl . /* patched by htab_finish_init() */
762
763 /* Try all again */
764 b ht64_insert_pte
765
766ht64_bail_ok:
767 li r3,0
768 b ht64_bail
769
770ht64_pte_insert_ok:
771 /* Insert slot number & secondary bit in PTE */
772 rldimi r30,r3,12,63-15
773
774 /* Write out the PTE with a normal write
775 * (maybe add eieio may be good still ?)
776 */
777ht64_write_out_pte:
778 ld r6,STK_PARM(r6)(r1)
779 std r30,0(r6)
780 li r3, 0
781ht64_bail:
782 ld r27,STK_REG(r27)(r1)
783 ld r28,STK_REG(r28)(r1)
784 ld r29,STK_REG(r29)(r1)
785 ld r30,STK_REG(r30)(r1)
786 ld r31,STK_REG(r31)(r1)
787 addi r1,r1,STACKFRAMESIZE
788 ld r0,16(r1)
789 mtlr r0
790 blr
791
792ht64_modify_pte:
793 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
794 mr r4,r3
795 rlwinm r3,r31,32-12,29,31
796
797 /* Secondary group ? if yes, get a inverted hash value */
798 mr r5,r28
799 andi. r0,r31,_PAGE_F_SECOND
800 beq 1f
801 not r5,r5
8021:
803 /* Calculate proper slot value for ppc_md.hpte_updatepp */
804 and r0,r5,r27
805 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
806 add r3,r0,r3 /* add slot idx */
807
808 /* Call ppc_md.hpte_updatepp */
809 mr r5,r29 /* va */
810 li r6,MMU_PAGE_64K
811 ld r7,STK_PARM(r8)(r1) /* get "local" param */
812_GLOBAL(ht64_call_hpte_updatepp)
813 bl . /* patched by htab_finish_init() */
814
815 /* if we failed because typically the HPTE wasn't really here
816 * we try an insertion.
817 */
818 cmpdi 0,r3,-1
819 beq- ht64_insert_pte
820
821 /* Clear the BUSY bit and Write out the PTE */
822 li r0,_PAGE_BUSY
823 andc r30,r30,r0
824 b ht64_write_out_pte
825
826ht64_wrong_access:
827 /* Bail out clearing reservation */
828 stdcx. r31,0,r6
829 li r3,1
830 b ht64_bail
831
832ht64_pte_insert_failure:
833 /* Bail out restoring old PTE */
834 ld r6,STK_PARM(r6)(r1)
835 std r31,0(r6)
836 li r3,-1
837 b ht64_bail
838
839
840#endif /* CONFIG_PPC_64K_PAGES */
287 841
288 842
843/*****************************************************************************
844 * *
845 * Huge pages implementation is in hugetlbpage.c *
846 * *
847 *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c28..d96bcfe4c6f6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12
13#undef DEBUG_LOW
14
12#include <linux/spinlock.h> 15#include <linux/spinlock.h>
13#include <linux/bitops.h> 16#include <linux/bitops.h>
14#include <linux/threads.h> 17#include <linux/threads.h>
@@ -22,11 +25,84 @@
22#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
23#include <asm/tlb.h> 26#include <asm/tlb.h>
24#include <asm/cputable.h> 27#include <asm/cputable.h>
28#include <asm/udbg.h>
29
30#ifdef DEBUG_LOW
31#define DBG_LOW(fmt...) udbg_printf(fmt)
32#else
33#define DBG_LOW(fmt...)
34#endif
25 35
26#define HPTE_LOCK_BIT 3 36#define HPTE_LOCK_BIT 3
27 37
28static DEFINE_SPINLOCK(native_tlbie_lock); 38static DEFINE_SPINLOCK(native_tlbie_lock);
29 39
40static inline void __tlbie(unsigned long va, unsigned int psize)
41{
42 unsigned int penc;
43
44 /* clear top 16 bits, non SLS segment */
45 va &= ~(0xffffULL << 48);
46
47 switch (psize) {
48 case MMU_PAGE_4K:
49 va &= ~0xffful;
50 asm volatile("tlbie %0,0" : : "r" (va) : "memory");
51 break;
52 default:
53 penc = mmu_psize_defs[psize].penc;
54 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
55 va |= (0x7f >> (8 - penc)) << 12;
56 asm volatile("tlbie %0,1" : : "r" (va) : "memory");
57 break;
58 }
59}
60
61static inline void __tlbiel(unsigned long va, unsigned int psize)
62{
63 unsigned int penc;
64
65 /* clear top 16 bits, non SLS segment */
66 va &= ~(0xffffULL << 48);
67
68 switch (psize) {
69 case MMU_PAGE_4K:
70 va &= ~0xffful;
71 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
72 : : "r"(va) : "memory");
73 break;
74 default:
75 penc = mmu_psize_defs[psize].penc;
76 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
77 va |= (0x7f >> (8 - penc)) << 12;
78 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
79 : : "r"(va) : "memory");
80 break;
81 }
82
83}
84
85static inline void tlbie(unsigned long va, int psize, int local)
86{
87 unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
88 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
89
90 if (use_local)
91 use_local = mmu_psize_defs[psize].tlbiel;
92 if (lock_tlbie && !use_local)
93 spin_lock(&native_tlbie_lock);
94 asm volatile("ptesync": : :"memory");
95 if (use_local) {
96 __tlbiel(va, psize);
97 asm volatile("ptesync": : :"memory");
98 } else {
99 __tlbie(va, psize);
100 asm volatile("eieio; tlbsync; ptesync": : :"memory");
101 }
102 if (lock_tlbie && !use_local)
103 spin_unlock(&native_tlbie_lock);
104}
105
30static inline void native_lock_hpte(hpte_t *hptep) 106static inline void native_lock_hpte(hpte_t *hptep)
31{ 107{
32 unsigned long *word = &hptep->v; 108 unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
48} 124}
49 125
50long native_hpte_insert(unsigned long hpte_group, unsigned long va, 126long native_hpte_insert(unsigned long hpte_group, unsigned long va,
51 unsigned long prpn, unsigned long vflags, 127 unsigned long pa, unsigned long rflags,
52 unsigned long rflags) 128 unsigned long vflags, int psize)
53{ 129{
54 hpte_t *hptep = htab_address + hpte_group; 130 hpte_t *hptep = htab_address + hpte_group;
55 unsigned long hpte_v, hpte_r; 131 unsigned long hpte_v, hpte_r;
56 int i; 132 int i;
57 133
134 if (!(vflags & HPTE_V_BOLTED)) {
135 DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
136 " rflags=%lx, vflags=%lx, psize=%d)\n",
137 hpte_group, va, pa, rflags, vflags, psize);
138 }
139
58 for (i = 0; i < HPTES_PER_GROUP; i++) { 140 for (i = 0; i < HPTES_PER_GROUP; i++) {
59 if (! (hptep->v & HPTE_V_VALID)) { 141 if (! (hptep->v & HPTE_V_VALID)) {
60 /* retry with lock held */ 142 /* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
70 if (i == HPTES_PER_GROUP) 152 if (i == HPTES_PER_GROUP)
71 return -1; 153 return -1;
72 154
73 hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; 155 hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
74 if (vflags & HPTE_V_LARGE) 156 hpte_r = hpte_encode_r(pa, psize) | rflags;
75 va &= ~(1UL << HPTE_V_AVPN_SHIFT); 157
76 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; 158 if (!(vflags & HPTE_V_BOLTED)) {
159 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
160 i, hpte_v, hpte_r);
161 }
77 162
78 hptep->r = hpte_r; 163 hptep->r = hpte_r;
79 /* Guarantee the second dword is visible before the valid bit */ 164 /* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
96 int slot_offset; 181 int slot_offset;
97 unsigned long hpte_v; 182 unsigned long hpte_v;
98 183
184 DBG_LOW(" remove(group=%lx)\n", hpte_group);
185
99 /* pick a random entry to start at */ 186 /* pick a random entry to start at */
100 slot_offset = mftb() & 0x7; 187 slot_offset = mftb() & 0x7;
101 188
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
126 return i; 213 return i;
127} 214}
128 215
129static inline void set_pp_bit(unsigned long pp, hpte_t *addr) 216static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
217 unsigned long va, int psize, int local)
130{ 218{
131 unsigned long old; 219 hpte_t *hptep = htab_address + slot;
132 unsigned long *p = &addr->r; 220 unsigned long hpte_v, want_v;
133 221 int ret = 0;
134 __asm__ __volatile__( 222
135 "1: ldarx %0,0,%3\n\ 223 want_v = hpte_encode_v(va, psize);
136 rldimi %0,%2,0,61\n\ 224
137 stdcx. %0,0,%3\n\ 225 DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
138 bne 1b" 226 va, want_v & HPTE_V_AVPN, slot, newpp);
139 : "=&r" (old), "=m" (*p) 227
140 : "r" (pp), "r" (p), "m" (*p) 228 native_lock_hpte(hptep);
141 : "cc"); 229
230 hpte_v = hptep->v;
231
232 /* Even if we miss, we need to invalidate the TLB */
233 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
234 DBG_LOW(" -> miss\n");
235 native_unlock_hpte(hptep);
236 ret = -1;
237 } else {
238 DBG_LOW(" -> hit\n");
239 /* Update the HPTE */
240 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
241 (newpp & (HPTE_R_PP | HPTE_R_N));
242 native_unlock_hpte(hptep);
243 }
244
245 /* Ensure it is out of the tlb too. */
246 tlbie(va, psize, local);
247
248 return ret;
142} 249}
143 250
144/* 251static long native_hpte_find(unsigned long va, int psize)
145 * Only works on small pages. Yes its ugly to have to check each slot in
146 * the group but we only use this during bootup.
147 */
148static long native_hpte_find(unsigned long vpn)
149{ 252{
150 hpte_t *hptep; 253 hpte_t *hptep;
151 unsigned long hash; 254 unsigned long hash;
152 unsigned long i, j; 255 unsigned long i, j;
153 long slot; 256 long slot;
154 unsigned long hpte_v; 257 unsigned long want_v, hpte_v;
155 258
156 hash = hpt_hash(vpn, 0); 259 hash = hpt_hash(va, mmu_psize_defs[psize].shift);
260 want_v = hpte_encode_v(va, psize);
157 261
158 for (j = 0; j < 2; j++) { 262 for (j = 0; j < 2; j++) {
159 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 263 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
161 hptep = htab_address + slot; 265 hptep = htab_address + slot;
162 hpte_v = hptep->v; 266 hpte_v = hptep->v;
163 267
164 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) 268 if (HPTE_V_COMPARE(hpte_v, want_v)
165 && (hpte_v & HPTE_V_VALID) 269 && (hpte_v & HPTE_V_VALID)
166 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { 270 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
167 /* HPTE matches */ 271 /* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
177 return -1; 281 return -1;
178} 282}
179 283
180static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
181 unsigned long va, int large, int local)
182{
183 hpte_t *hptep = htab_address + slot;
184 unsigned long hpte_v;
185 unsigned long avpn = va >> 23;
186 int ret = 0;
187
188 if (large)
189 avpn &= ~1;
190
191 native_lock_hpte(hptep);
192
193 hpte_v = hptep->v;
194
195 /* Even if we miss, we need to invalidate the TLB */
196 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
197 || !(hpte_v & HPTE_V_VALID)) {
198 native_unlock_hpte(hptep);
199 ret = -1;
200 } else {
201 set_pp_bit(newpp, hptep);
202 native_unlock_hpte(hptep);
203 }
204
205 /* Ensure it is out of the tlb too */
206 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
207 tlbiel(va);
208 } else {
209 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
210
211 if (lock_tlbie)
212 spin_lock(&native_tlbie_lock);
213 tlbie(va, large);
214 if (lock_tlbie)
215 spin_unlock(&native_tlbie_lock);
216 }
217
218 return ret;
219}
220
221/* 284/*
222 * Update the page protection bits. Intended to be used to create 285 * Update the page protection bits. Intended to be used to create
223 * guard pages for kernel data structures on pages which are bolted 286 * guard pages for kernel data structures on pages which are bolted
224 * in the HPT. Assumes pages being operated on will not be stolen. 287 * in the HPT. Assumes pages being operated on will not be stolen.
225 * Does not work on large pages.
226 * 288 *
227 * No need to lock here because we should be the only user. 289 * No need to lock here because we should be the only user.
228 */ 290 */
229static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) 291static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
292 int psize)
230{ 293{
231 unsigned long vsid, va, vpn, flags = 0; 294 unsigned long vsid, va;
232 long slot; 295 long slot;
233 hpte_t *hptep; 296 hpte_t *hptep;
234 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
235 297
236 vsid = get_kernel_vsid(ea); 298 vsid = get_kernel_vsid(ea);
237 va = (vsid << 28) | (ea & 0x0fffffff); 299 va = (vsid << 28) | (ea & 0x0fffffff);
238 vpn = va >> PAGE_SHIFT;
239 300
240 slot = native_hpte_find(vpn); 301 slot = native_hpte_find(va, psize);
241 if (slot == -1) 302 if (slot == -1)
242 panic("could not find page to bolt\n"); 303 panic("could not find page to bolt\n");
243 hptep = htab_address + slot; 304 hptep = htab_address + slot;
244 305
245 set_pp_bit(newpp, hptep); 306 /* Update the HPTE */
307 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
308 (newpp & (HPTE_R_PP | HPTE_R_N));
246 309
247 /* Ensure it is out of the tlb too */ 310 /* Ensure it is out of the tlb too. */
248 if (lock_tlbie) 311 tlbie(va, psize, 0);
249 spin_lock_irqsave(&native_tlbie_lock, flags);
250 tlbie(va, 0);
251 if (lock_tlbie)
252 spin_unlock_irqrestore(&native_tlbie_lock, flags);
253} 312}
254 313
255static void native_hpte_invalidate(unsigned long slot, unsigned long va, 314static void native_hpte_invalidate(unsigned long slot, unsigned long va,
256 int large, int local) 315 int psize, int local)
257{ 316{
258 hpte_t *hptep = htab_address + slot; 317 hpte_t *hptep = htab_address + slot;
259 unsigned long hpte_v; 318 unsigned long hpte_v;
260 unsigned long avpn = va >> 23; 319 unsigned long want_v;
261 unsigned long flags; 320 unsigned long flags;
262 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
263
264 if (large)
265 avpn &= ~1;
266 321
267 local_irq_save(flags); 322 local_irq_save(flags);
268 native_lock_hpte(hptep);
269 323
324 DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
325
326 want_v = hpte_encode_v(va, psize);
327 native_lock_hpte(hptep);
270 hpte_v = hptep->v; 328 hpte_v = hptep->v;
271 329
272 /* Even if we miss, we need to invalidate the TLB */ 330 /* Even if we miss, we need to invalidate the TLB */
273 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) 331 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
274 || !(hpte_v & HPTE_V_VALID)) {
275 native_unlock_hpte(hptep); 332 native_unlock_hpte(hptep);
276 } else { 333 else
277 /* Invalidate the hpte. NOTE: this also unlocks it */ 334 /* Invalidate the hpte. NOTE: this also unlocks it */
278 hptep->v = 0; 335 hptep->v = 0;
279 }
280 336
281 /* Invalidate the tlb */ 337 /* Invalidate the TLB */
282 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 338 tlbie(va, psize, local);
283 tlbiel(va); 339
284 } else {
285 if (lock_tlbie)
286 spin_lock(&native_tlbie_lock);
287 tlbie(va, large);
288 if (lock_tlbie)
289 spin_unlock(&native_tlbie_lock);
290 }
291 local_irq_restore(flags); 340 local_irq_restore(flags);
292} 341}
293 342
294/* 343/*
344 * XXX This need fixing based on page size. It's only used by
345 * native_hpte_clear() for now which needs fixing too so they
346 * make a good pair...
347 */
348static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
349{
350 unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
351 unsigned long va;
352
353 va = avpn << 23;
354
355 if (! (hpte_v & HPTE_V_LARGE)) {
356 unsigned long vpi, pteg;
357
358 pteg = slot / HPTES_PER_GROUP;
359 if (hpte_v & HPTE_V_SECONDARY)
360 pteg = ~pteg;
361
362 vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
363
364 va |= vpi << PAGE_SHIFT;
365 }
366
367 return va;
368}
369
370/*
295 * clear all mappings on kexec. All cpus are in real mode (or they will 371 * clear all mappings on kexec. All cpus are in real mode (or they will
296 * be when they isi), and we are the only one left. We rely on our kernel 372 * be when they isi), and we are the only one left. We rely on our kernel
297 * mapping being 0xC0's and the hardware ignoring those two real bits. 373 * mapping being 0xC0's and the hardware ignoring those two real bits.
298 * 374 *
299 * TODO: add batching support when enabled. remember, no dynamic memory here, 375 * TODO: add batching support when enabled. remember, no dynamic memory here,
300 * athough there is the control page available... 376 * athough there is the control page available...
377 *
378 * XXX FIXME: 4k only for now !
301 */ 379 */
302static void native_hpte_clear(void) 380static void native_hpte_clear(void)
303{ 381{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
327 405
328 if (hpte_v & HPTE_V_VALID) { 406 if (hpte_v & HPTE_V_VALID) {
329 hptep->v = 0; 407 hptep->v = 0;
330 tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); 408 tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
331 } 409 }
332 } 410 }
333 411
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
335 local_irq_restore(flags); 413 local_irq_restore(flags);
336} 414}
337 415
416/*
417 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
418 * the lock all the time
419 */
338static void native_flush_hash_range(unsigned long number, int local) 420static void native_flush_hash_range(unsigned long number, int local)
339{ 421{
340 unsigned long va, vpn, hash, secondary, slot, flags, avpn; 422 unsigned long va, hash, index, hidx, shift, slot;
341 int i, j;
342 hpte_t *hptep; 423 hpte_t *hptep;
343 unsigned long hpte_v; 424 unsigned long hpte_v;
425 unsigned long want_v;
426 unsigned long flags;
427 real_pte_t pte;
344 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 428 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
345 unsigned long large = batch->large; 429 unsigned long psize = batch->psize;
430 int i;
346 431
347 local_irq_save(flags); 432 local_irq_save(flags);
348 433
349 j = 0;
350 for (i = 0; i < number; i++) { 434 for (i = 0; i < number; i++) {
351 va = batch->vaddr[j]; 435 va = batch->vaddr[i];
352 if (large) 436 pte = batch->pte[i];
353 vpn = va >> HPAGE_SHIFT; 437
354 else 438 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
355 vpn = va >> PAGE_SHIFT; 439 hash = hpt_hash(va, shift);
356 hash = hpt_hash(vpn, large); 440 hidx = __rpte_to_hidx(pte, index);
357 secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; 441 if (hidx & _PTEIDX_SECONDARY)
358 if (secondary) 442 hash = ~hash;
359 hash = ~hash; 443 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
360 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 444 slot += hidx & _PTEIDX_GROUP_IX;
361 slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; 445 hptep = htab_address + slot;
362 446 want_v = hpte_encode_v(va, psize);
363 hptep = htab_address + slot; 447 native_lock_hpte(hptep);
364 448 hpte_v = hptep->v;
365 avpn = va >> 23; 449 if (!HPTE_V_COMPARE(hpte_v, want_v) ||
366 if (large) 450 !(hpte_v & HPTE_V_VALID))
367 avpn &= ~0x1UL; 451 native_unlock_hpte(hptep);
368 452 else
369 native_lock_hpte(hptep); 453 hptep->v = 0;
370 454 } pte_iterate_hashed_end();
371 hpte_v = hptep->v;
372
373 /* Even if we miss, we need to invalidate the TLB */
374 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
375 || !(hpte_v & HPTE_V_VALID)) {
376 native_unlock_hpte(hptep);
377 } else {
378 /* Invalidate the hpte. NOTE: this also unlocks it */
379 hptep->v = 0;
380 }
381
382 j++;
383 } 455 }
384 456
385 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 457 if (cpu_has_feature(CPU_FTR_TLBIEL) &&
458 mmu_psize_defs[psize].tlbiel && local) {
386 asm volatile("ptesync":::"memory"); 459 asm volatile("ptesync":::"memory");
387 460 for (i = 0; i < number; i++) {
388 for (i = 0; i < j; i++) 461 va = batch->vaddr[i];
389 __tlbiel(batch->vaddr[i]); 462 pte = batch->pte[i];
390 463
464 pte_iterate_hashed_subpages(pte, psize, va, index,
465 shift) {
466 __tlbiel(va, psize);
467 } pte_iterate_hashed_end();
468 }
391 asm volatile("ptesync":::"memory"); 469 asm volatile("ptesync":::"memory");
392 } else { 470 } else {
393 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); 471 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
396 spin_lock(&native_tlbie_lock); 474 spin_lock(&native_tlbie_lock);
397 475
398 asm volatile("ptesync":::"memory"); 476 asm volatile("ptesync":::"memory");
399 477 for (i = 0; i < number; i++) {
400 for (i = 0; i < j; i++) 478 va = batch->vaddr[i];
401 __tlbie(batch->vaddr[i], large); 479 pte = batch->pte[i];
402 480
481 pte_iterate_hashed_subpages(pte, psize, va, index,
482 shift) {
483 __tlbie(va, psize);
484 } pte_iterate_hashed_end();
485 }
403 asm volatile("eieio; tlbsync; ptesync":::"memory"); 486 asm volatile("eieio; tlbsync; ptesync":::"memory");
404 487
405 if (lock_tlbie) 488 if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02c..b2f3dbca6952 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#undef DEBUG 21#undef DEBUG
22#undef DEBUG_LOW
22 23
23#include <linux/config.h> 24#include <linux/config.h>
24#include <linux/spinlock.h> 25#include <linux/spinlock.h>
@@ -59,6 +60,15 @@
59#define DBG(fmt...) 60#define DBG(fmt...)
60#endif 61#endif
61 62
63#ifdef DEBUG_LOW
64#define DBG_LOW(fmt...) udbg_printf(fmt)
65#else
66#define DBG_LOW(fmt...)
67#endif
68
69#define KB (1024)
70#define MB (1024*KB)
71
62/* 72/*
63 * Note: pte --> Linux PTE 73 * Note: pte --> Linux PTE
64 * HPTE --> PowerPC Hashed Page Table Entry 74 * HPTE --> PowerPC Hashed Page Table Entry
@@ -77,91 +87,290 @@ extern unsigned long dart_tablebase;
77 87
78hpte_t *htab_address; 88hpte_t *htab_address;
79unsigned long htab_hash_mask; 89unsigned long htab_hash_mask;
80
81unsigned long _SDR1; 90unsigned long _SDR1;
91struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
92int mmu_linear_psize = MMU_PAGE_4K;
93int mmu_virtual_psize = MMU_PAGE_4K;
94#ifdef CONFIG_HUGETLB_PAGE
95int mmu_huge_psize = MMU_PAGE_16M;
96unsigned int HPAGE_SHIFT;
97#endif
82 98
83#define KB (1024) 99/* There are definitions of page sizes arrays to be used when none
84#define MB (1024*KB) 100 * is provided by the firmware.
85 101 */
86static inline void loop_forever(void)
87{
88 volatile unsigned long x = 1;
89 for(;x;x|=1)
90 ;
91}
92 102
93static inline void create_pte_mapping(unsigned long start, unsigned long end, 103/* Pre-POWER4 CPUs (4k pages only)
94 unsigned long mode, int large) 104 */
105struct mmu_psize_def mmu_psize_defaults_old[] = {
106 [MMU_PAGE_4K] = {
107 .shift = 12,
108 .sllp = 0,
109 .penc = 0,
110 .avpnm = 0,
111 .tlbiel = 0,
112 },
113};
114
115/* POWER4, GPUL, POWER5
116 *
117 * Support for 16Mb large pages
118 */
119struct mmu_psize_def mmu_psize_defaults_gp[] = {
120 [MMU_PAGE_4K] = {
121 .shift = 12,
122 .sllp = 0,
123 .penc = 0,
124 .avpnm = 0,
125 .tlbiel = 1,
126 },
127 [MMU_PAGE_16M] = {
128 .shift = 24,
129 .sllp = SLB_VSID_L,
130 .penc = 0,
131 .avpnm = 0x1UL,
132 .tlbiel = 0,
133 },
134};
135
136
137int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
138 unsigned long pstart, unsigned long mode, int psize)
95{ 139{
96 unsigned long addr; 140 unsigned long vaddr, paddr;
97 unsigned int step; 141 unsigned int step, shift;
98 unsigned long tmp_mode; 142 unsigned long tmp_mode;
99 unsigned long vflags; 143 int ret = 0;
100 144
101 if (large) { 145 shift = mmu_psize_defs[psize].shift;
102 step = 16*MB; 146 step = 1 << shift;
103 vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
104 } else {
105 step = 4*KB;
106 vflags = HPTE_V_BOLTED;
107 }
108 147
109 for (addr = start; addr < end; addr += step) { 148 for (vaddr = vstart, paddr = pstart; vaddr < vend;
149 vaddr += step, paddr += step) {
110 unsigned long vpn, hash, hpteg; 150 unsigned long vpn, hash, hpteg;
111 unsigned long vsid = get_kernel_vsid(addr); 151 unsigned long vsid = get_kernel_vsid(vaddr);
112 unsigned long va = (vsid << 28) | (addr & 0xfffffff); 152 unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
113 int ret = -1;
114
115 if (large)
116 vpn = va >> HPAGE_SHIFT;
117 else
118 vpn = va >> PAGE_SHIFT;
119
120 153
154 vpn = va >> shift;
121 tmp_mode = mode; 155 tmp_mode = mode;
122 156
123 /* Make non-kernel text non-executable */ 157 /* Make non-kernel text non-executable */
124 if (!in_kernel_text(addr)) 158 if (!in_kernel_text(vaddr))
125 tmp_mode = mode | HW_NO_EXEC; 159 tmp_mode = mode | HPTE_R_N;
126
127 hash = hpt_hash(vpn, large);
128 160
161 hash = hpt_hash(va, shift);
129 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 162 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
130 163
164 /* The crap below can be cleaned once ppd_md.probe() can
165 * set up the hash callbacks, thus we can just used the
166 * normal insert callback here.
167 */
131#ifdef CONFIG_PPC_ISERIES 168#ifdef CONFIG_PPC_ISERIES
132 if (systemcfg->platform & PLATFORM_ISERIES_LPAR) 169 if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
133 ret = iSeries_hpte_bolt_or_insert(hpteg, va, 170 ret = iSeries_hpte_insert(hpteg, va,
134 virt_to_abs(addr) >> PAGE_SHIFT, 171 virt_to_abs(paddr),
135 vflags, tmp_mode); 172 tmp_mode,
173 HPTE_V_BOLTED,
174 psize);
136 else 175 else
137#endif 176#endif
138#ifdef CONFIG_PPC_PSERIES 177#ifdef CONFIG_PPC_PSERIES
139 if (systemcfg->platform & PLATFORM_LPAR) 178 if (systemcfg->platform & PLATFORM_LPAR)
140 ret = pSeries_lpar_hpte_insert(hpteg, va, 179 ret = pSeries_lpar_hpte_insert(hpteg, va,
141 virt_to_abs(addr) >> PAGE_SHIFT, 180 virt_to_abs(paddr),
142 vflags, tmp_mode); 181 tmp_mode,
182 HPTE_V_BOLTED,
183 psize);
143 else 184 else
144#endif 185#endif
145#ifdef CONFIG_PPC_MULTIPLATFORM 186#ifdef CONFIG_PPC_MULTIPLATFORM
146 ret = native_hpte_insert(hpteg, va, 187 ret = native_hpte_insert(hpteg, va,
147 virt_to_abs(addr) >> PAGE_SHIFT, 188 virt_to_abs(paddr),
148 vflags, tmp_mode); 189 tmp_mode, HPTE_V_BOLTED,
190 psize);
149#endif 191#endif
192 if (ret < 0)
193 break;
194 }
195 return ret < 0 ? ret : 0;
196}
150 197
151 if (ret == -1) { 198static int __init htab_dt_scan_page_sizes(unsigned long node,
152 ppc64_terminate_msg(0x20, "create_pte_mapping"); 199 const char *uname, int depth,
153 loop_forever(); 200 void *data)
201{
202 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
203 u32 *prop;
204 unsigned long size = 0;
205
206 /* We are scanning "cpu" nodes only */
207 if (type == NULL || strcmp(type, "cpu") != 0)
208 return 0;
209
210 prop = (u32 *)of_get_flat_dt_prop(node,
211 "ibm,segment-page-sizes", &size);
212 if (prop != NULL) {
213 DBG("Page sizes from device-tree:\n");
214 size /= 4;
215 cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
216 while(size > 0) {
217 unsigned int shift = prop[0];
218 unsigned int slbenc = prop[1];
219 unsigned int lpnum = prop[2];
220 unsigned int lpenc = 0;
221 struct mmu_psize_def *def;
222 int idx = -1;
223
224 size -= 3; prop += 3;
225 while(size > 0 && lpnum) {
226 if (prop[0] == shift)
227 lpenc = prop[1];
228 prop += 2; size -= 2;
229 lpnum--;
230 }
231 switch(shift) {
232 case 0xc:
233 idx = MMU_PAGE_4K;
234 break;
235 case 0x10:
236 idx = MMU_PAGE_64K;
237 break;
238 case 0x14:
239 idx = MMU_PAGE_1M;
240 break;
241 case 0x18:
242 idx = MMU_PAGE_16M;
243 cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
244 break;
245 case 0x22:
246 idx = MMU_PAGE_16G;
247 break;
248 }
249 if (idx < 0)
250 continue;
251 def = &mmu_psize_defs[idx];
252 def->shift = shift;
253 if (shift <= 23)
254 def->avpnm = 0;
255 else
256 def->avpnm = (1 << (shift - 23)) - 1;
257 def->sllp = slbenc;
258 def->penc = lpenc;
259 /* We don't know for sure what's up with tlbiel, so
260 * for now we only set it for 4K and 64K pages
261 */
262 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
263 def->tlbiel = 1;
264 else
265 def->tlbiel = 0;
266
267 DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
268 "tlbiel=%d, penc=%d\n",
269 idx, shift, def->sllp, def->avpnm, def->tlbiel,
270 def->penc);
154 } 271 }
272 return 1;
273 }
274 return 0;
275}
276
277
278static void __init htab_init_page_sizes(void)
279{
280 int rc;
281
282 /* Default to 4K pages only */
283 memcpy(mmu_psize_defs, mmu_psize_defaults_old,
284 sizeof(mmu_psize_defaults_old));
285
286 /*
287 * Try to find the available page sizes in the device-tree
288 */
289 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
290 if (rc != 0) /* Found */
291 goto found;
292
293 /*
294 * Not in the device-tree, let's fallback on known size
295 * list for 16M capable GP & GR
296 */
297 if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
298 cpu_has_feature(CPU_FTR_16M_PAGE))
299 memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
300 sizeof(mmu_psize_defaults_gp));
301 found:
302 /*
303 * Pick a size for the linear mapping. Currently, we only support
304 * 16M, 1M and 4K which is the default
305 */
306 if (mmu_psize_defs[MMU_PAGE_16M].shift)
307 mmu_linear_psize = MMU_PAGE_16M;
308 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
309 mmu_linear_psize = MMU_PAGE_1M;
310
311 /*
312 * Pick a size for the ordinary pages. Default is 4K, we support
313 * 64K if cache inhibited large pages are supported by the
314 * processor
315 */
316#ifdef CONFIG_PPC_64K_PAGES
317 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
318 cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
319 mmu_virtual_psize = MMU_PAGE_64K;
320#endif
321
322 printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
323 mmu_psize_defs[mmu_linear_psize].shift,
324 mmu_psize_defs[mmu_virtual_psize].shift);
325
326#ifdef CONFIG_HUGETLB_PAGE
327 /* Init large page size. Currently, we pick 16M or 1M depending
328 * on what is available
329 */
330 if (mmu_psize_defs[MMU_PAGE_16M].shift)
331 mmu_huge_psize = MMU_PAGE_16M;
332 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
333 mmu_huge_psize = MMU_PAGE_1M;
334
335 /* Calculate HPAGE_SHIFT and sanity check it */
336 if (mmu_psize_defs[mmu_huge_psize].shift > 16 &&
337 mmu_psize_defs[mmu_huge_psize].shift < 28)
338 HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
339 else
340 HPAGE_SHIFT = 0; /* No huge pages dude ! */
341#endif /* CONFIG_HUGETLB_PAGE */
342}
343
344static int __init htab_dt_scan_pftsize(unsigned long node,
345 const char *uname, int depth,
346 void *data)
347{
348 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
349 u32 *prop;
350
351 /* We are scanning "cpu" nodes only */
352 if (type == NULL || strcmp(type, "cpu") != 0)
353 return 0;
354
355 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
356 if (prop != NULL) {
357 /* pft_size[0] is the NUMA CEC cookie */
358 ppc64_pft_size = prop[1];
359 return 1;
155 } 360 }
361 return 0;
156} 362}
157 363
158static unsigned long get_hashtable_size(void) 364static unsigned long __init htab_get_table_size(void)
159{ 365{
160 unsigned long rnd_mem_size, pteg_count; 366 unsigned long rnd_mem_size, pteg_count;
161 367
162 /* If hash size wasn't obtained in prom.c, we calculate it now based on 368 /* If hash size isn't already provided by the platform, we try to
163 * the total RAM size 369 * retreive it from the device-tree. If it's not there neither, we
370 * calculate it now based on the total RAM size
164 */ 371 */
372 if (ppc64_pft_size == 0)
373 of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
165 if (ppc64_pft_size) 374 if (ppc64_pft_size)
166 return 1UL << ppc64_pft_size; 375 return 1UL << ppc64_pft_size;
167 376
@@ -181,17 +390,21 @@ void __init htab_initialize(void)
181 unsigned long table, htab_size_bytes; 390 unsigned long table, htab_size_bytes;
182 unsigned long pteg_count; 391 unsigned long pteg_count;
183 unsigned long mode_rw; 392 unsigned long mode_rw;
184 int i, use_largepages = 0;
185 unsigned long base = 0, size = 0; 393 unsigned long base = 0, size = 0;
394 int i;
395
186 extern unsigned long tce_alloc_start, tce_alloc_end; 396 extern unsigned long tce_alloc_start, tce_alloc_end;
187 397
188 DBG(" -> htab_initialize()\n"); 398 DBG(" -> htab_initialize()\n");
189 399
400 /* Initialize page sizes */
401 htab_init_page_sizes();
402
190 /* 403 /*
191 * Calculate the required size of the htab. We want the number of 404 * Calculate the required size of the htab. We want the number of
192 * PTEGs to equal one half the number of real pages. 405 * PTEGs to equal one half the number of real pages.
193 */ 406 */
194 htab_size_bytes = get_hashtable_size(); 407 htab_size_bytes = htab_get_table_size();
195 pteg_count = htab_size_bytes >> 7; 408 pteg_count = htab_size_bytes >> 7;
196 409
197 /* For debug, make the HTAB 1/8 as big as it normally would be. */ 410 /* For debug, make the HTAB 1/8 as big as it normally would be. */
@@ -211,14 +424,11 @@ void __init htab_initialize(void)
211 * the absolute address space. 424 * the absolute address space.
212 */ 425 */
213 table = lmb_alloc(htab_size_bytes, htab_size_bytes); 426 table = lmb_alloc(htab_size_bytes, htab_size_bytes);
427 BUG_ON(table == 0);
214 428
215 DBG("Hash table allocated at %lx, size: %lx\n", table, 429 DBG("Hash table allocated at %lx, size: %lx\n", table,
216 htab_size_bytes); 430 htab_size_bytes);
217 431
218 if ( !table ) {
219 ppc64_terminate_msg(0x20, "hpt space");
220 loop_forever();
221 }
222 htab_address = abs_to_virt(table); 432 htab_address = abs_to_virt(table);
223 433
224 /* htab absolute addr + encoded htabsize */ 434 /* htab absolute addr + encoded htabsize */
@@ -234,8 +444,6 @@ void __init htab_initialize(void)
234 * _NOT_ map it to avoid cache paradoxes as it's remapped non 444 * _NOT_ map it to avoid cache paradoxes as it's remapped non
235 * cacheable later on 445 * cacheable later on
236 */ 446 */
237 if (cpu_has_feature(CPU_FTR_16M_PAGE))
238 use_largepages = 1;
239 447
240 /* create bolted the linear mapping in the hash table */ 448 /* create bolted the linear mapping in the hash table */
241 for (i=0; i < lmb.memory.cnt; i++) { 449 for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +454,32 @@ void __init htab_initialize(void)
246 454
247#ifdef CONFIG_U3_DART 455#ifdef CONFIG_U3_DART
248 /* Do not map the DART space. Fortunately, it will be aligned 456 /* Do not map the DART space. Fortunately, it will be aligned
249 * in such a way that it will not cross two lmb regions and will 457 * in such a way that it will not cross two lmb regions and
250 * fit within a single 16Mb page. 458 * will fit within a single 16Mb page.
251 * The DART space is assumed to be a full 16Mb region even if we 459 * The DART space is assumed to be a full 16Mb region even if
252 * only use 2Mb of that space. We will use more of it later for 460 * we only use 2Mb of that space. We will use more of it later
253 * AGP GART. We have to use a full 16Mb large page. 461 * for AGP GART. We have to use a full 16Mb large page.
254 */ 462 */
255 DBG("DART base: %lx\n", dart_tablebase); 463 DBG("DART base: %lx\n", dart_tablebase);
256 464
257 if (dart_tablebase != 0 && dart_tablebase >= base 465 if (dart_tablebase != 0 && dart_tablebase >= base
258 && dart_tablebase < (base + size)) { 466 && dart_tablebase < (base + size)) {
259 if (base != dart_tablebase) 467 if (base != dart_tablebase)
260 create_pte_mapping(base, dart_tablebase, mode_rw, 468 BUG_ON(htab_bolt_mapping(base, dart_tablebase,
261 use_largepages); 469 base, mode_rw,
470 mmu_linear_psize));
262 if ((base + size) > (dart_tablebase + 16*MB)) 471 if ((base + size) > (dart_tablebase + 16*MB))
263 create_pte_mapping(dart_tablebase + 16*MB, base + size, 472 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
264 mode_rw, use_largepages); 473 base + size,
474 dart_tablebase+16*MB,
475 mode_rw,
476 mmu_linear_psize));
265 continue; 477 continue;
266 } 478 }
267#endif /* CONFIG_U3_DART */ 479#endif /* CONFIG_U3_DART */
268 create_pte_mapping(base, base + size, mode_rw, use_largepages); 480 BUG_ON(htab_bolt_mapping(base, base + size, base,
269 } 481 mode_rw, mmu_linear_psize));
482 }
270 483
271 /* 484 /*
272 * If we have a memory_limit and we've allocated TCEs then we need to 485 * If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +495,9 @@ void __init htab_initialize(void)
282 if (base + size >= tce_alloc_start) 495 if (base + size >= tce_alloc_start)
283 tce_alloc_start = base + size + 1; 496 tce_alloc_start = base + size + 1;
284 497
285 create_pte_mapping(tce_alloc_start, tce_alloc_end, 498 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
286 mode_rw, use_largepages); 499 tce_alloc_start, mode_rw,
500 mmu_linear_psize));
287 } 501 }
288 502
289 DBG(" <- htab_initialize()\n"); 503 DBG(" <- htab_initialize()\n");
@@ -298,9 +512,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
298{ 512{
299 struct page *page; 513 struct page *page;
300 514
301 if (!pfn_valid(pte_pfn(pte)))
302 return pp;
303
304 page = pte_page(pte); 515 page = pte_page(pte);
305 516
306 /* page is dirty */ 517 /* page is dirty */
@@ -309,7 +520,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
309 __flush_dcache_icache(page_address(page)); 520 __flush_dcache_icache(page_address(page));
310 set_bit(PG_arch_1, &page->flags); 521 set_bit(PG_arch_1, &page->flags);
311 } else 522 } else
312 pp |= HW_NO_EXEC; 523 pp |= HPTE_R_N;
313 } 524 }
314 return pp; 525 return pp;
315} 526}
@@ -325,94 +536,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
325 unsigned long vsid; 536 unsigned long vsid;
326 struct mm_struct *mm; 537 struct mm_struct *mm;
327 pte_t *ptep; 538 pte_t *ptep;
328 int ret;
329 int user_region = 0;
330 int local = 0;
331 cpumask_t tmp; 539 cpumask_t tmp;
540 int rc, user_region = 0, local = 0;
332 541
333 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 542 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
334 return 1; 543 ea, access, trap);
335 544
545 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
546 DBG_LOW(" out of pgtable range !\n");
547 return 1;
548 }
549
550 /* Get region & vsid */
336 switch (REGION_ID(ea)) { 551 switch (REGION_ID(ea)) {
337 case USER_REGION_ID: 552 case USER_REGION_ID:
338 user_region = 1; 553 user_region = 1;
339 mm = current->mm; 554 mm = current->mm;
340 if (! mm) 555 if (! mm) {
556 DBG_LOW(" user region with no mm !\n");
341 return 1; 557 return 1;
342 558 }
343 vsid = get_vsid(mm->context.id, ea); 559 vsid = get_vsid(mm->context.id, ea);
344 break; 560 break;
345 case VMALLOC_REGION_ID: 561 case VMALLOC_REGION_ID:
346 mm = &init_mm; 562 mm = &init_mm;
347 vsid = get_kernel_vsid(ea); 563 vsid = get_kernel_vsid(ea);
348 break; 564 break;
349#if 0
350 case KERNEL_REGION_ID:
351 /*
352 * Should never get here - entire 0xC0... region is bolted.
353 * Send the problem up to do_page_fault
354 */
355#endif
356 default: 565 default:
357 /* Not a valid range 566 /* Not a valid range
358 * Send the problem up to do_page_fault 567 * Send the problem up to do_page_fault
359 */ 568 */
360 return 1; 569 return 1;
361 break;
362 } 570 }
571 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
363 572
573 /* Get pgdir */
364 pgdir = mm->pgd; 574 pgdir = mm->pgd;
365
366 if (pgdir == NULL) 575 if (pgdir == NULL)
367 return 1; 576 return 1;
368 577
578 /* Check CPU locality */
369 tmp = cpumask_of_cpu(smp_processor_id()); 579 tmp = cpumask_of_cpu(smp_processor_id());
370 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 580 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
371 local = 1; 581 local = 1;
372 582
373 /* Is this a huge page ? */ 583 /* Handle hugepage regions */
374 if (unlikely(in_hugepage_area(mm->context, ea))) 584 if (unlikely(in_hugepage_area(mm->context, ea))) {
375 ret = hash_huge_page(mm, access, ea, vsid, local); 585 DBG_LOW(" -> huge page !\n");
376 else { 586 return hash_huge_page(mm, access, ea, vsid, local);
377 ptep = find_linux_pte(pgdir, ea); 587 }
378 if (ptep == NULL) 588
379 return 1; 589 /* Get PTE and page size from page tables */
380 ret = __hash_page(ea, access, vsid, ptep, trap, local); 590 ptep = find_linux_pte(pgdir, ea);
591 if (ptep == NULL || !pte_present(*ptep)) {
592 DBG_LOW(" no PTE !\n");
593 return 1;
594 }
595
596#ifndef CONFIG_PPC_64K_PAGES
597 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
598#else
599 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
600 pte_val(*(ptep + PTRS_PER_PTE)));
601#endif
602 /* Pre-check access permissions (will be re-checked atomically
603 * in __hash_page_XX but this pre-check is a fast path
604 */
605 if (access & ~pte_val(*ptep)) {
606 DBG_LOW(" no access !\n");
607 return 1;
381 } 608 }
382 609
383 return ret; 610 /* Do actual hashing */
611#ifndef CONFIG_PPC_64K_PAGES
612 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
613#else
614 if (mmu_virtual_psize == MMU_PAGE_64K)
615 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
616 else
617 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
618#endif /* CONFIG_PPC_64K_PAGES */
619
620#ifndef CONFIG_PPC_64K_PAGES
621 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
622#else
623 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
624 pte_val(*(ptep + PTRS_PER_PTE)));
625#endif
626 DBG_LOW(" -> rc=%d\n", rc);
627 return rc;
384} 628}
385 629
386void flush_hash_page(unsigned long va, pte_t pte, int local) 630void hash_preload(struct mm_struct *mm, unsigned long ea,
631 unsigned long access, unsigned long trap)
387{ 632{
388 unsigned long vpn, hash, secondary, slot; 633 unsigned long vsid;
389 unsigned long huge = pte_huge(pte); 634 void *pgdir;
635 pte_t *ptep;
636 cpumask_t mask;
637 unsigned long flags;
638 int local = 0;
639
640 /* We don't want huge pages prefaulted for now
641 */
642 if (unlikely(in_hugepage_area(mm->context, ea)))
643 return;
644
645 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
646 " trap=%lx\n", mm, mm->pgd, ea, access, trap);
390 647
391 if (huge) 648 /* Get PTE, VSID, access mask */
392 vpn = va >> HPAGE_SHIFT; 649 pgdir = mm->pgd;
650 if (pgdir == NULL)
651 return;
652 ptep = find_linux_pte(pgdir, ea);
653 if (!ptep)
654 return;
655 vsid = get_vsid(mm->context.id, ea);
656
657 /* Hash it in */
658 local_irq_save(flags);
659 mask = cpumask_of_cpu(smp_processor_id());
660 if (cpus_equal(mm->cpu_vm_mask, mask))
661 local = 1;
662#ifndef CONFIG_PPC_64K_PAGES
663 __hash_page_4K(ea, access, vsid, ptep, trap, local);
664#else
665 if (mmu_virtual_psize == MMU_PAGE_64K)
666 __hash_page_64K(ea, access, vsid, ptep, trap, local);
393 else 667 else
394 vpn = va >> PAGE_SHIFT; 668 __hash_page_4K(ea, access, vsid, ptep, trap, local);
395 hash = hpt_hash(vpn, huge); 669#endif /* CONFIG_PPC_64K_PAGES */
396 secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; 670 local_irq_restore(flags);
397 if (secondary) 671}
398 hash = ~hash; 672
399 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 673void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
400 slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; 674{
401 675 unsigned long hash, index, shift, hidx, slot;
402 ppc_md.hpte_invalidate(slot, va, huge, local); 676
677 DBG_LOW("flush_hash_page(va=%016x)\n", va);
678 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
679 hash = hpt_hash(va, shift);
680 hidx = __rpte_to_hidx(pte, index);
681 if (hidx & _PTEIDX_SECONDARY)
682 hash = ~hash;
683 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
684 slot += hidx & _PTEIDX_GROUP_IX;
685 DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
686 ppc_md.hpte_invalidate(slot, va, psize, local);
687 } pte_iterate_hashed_end();
403} 688}
404 689
405void flush_hash_range(unsigned long number, int local) 690void flush_hash_range(unsigned long number, int local)
406{ 691{
407 if (ppc_md.flush_hash_range) { 692 if (ppc_md.flush_hash_range)
408 ppc_md.flush_hash_range(number, local); 693 ppc_md.flush_hash_range(number, local);
409 } else { 694 else {
410 int i; 695 int i;
411 struct ppc64_tlb_batch *batch = 696 struct ppc64_tlb_batch *batch =
412 &__get_cpu_var(ppc64_tlb_batch); 697 &__get_cpu_var(ppc64_tlb_batch);
413 698
414 for (i = 0; i < number; i++) 699 for (i = 0; i < number; i++)
415 flush_hash_page(batch->vaddr[i], batch->pte[i], local); 700 flush_hash_page(batch->vaddr[i], batch->pte[i],
701 batch->psize, local);
416 } 702 }
417} 703}
418 704
@@ -452,6 +738,18 @@ void __init htab_finish_init(void)
452 extern unsigned int *htab_call_hpte_remove; 738 extern unsigned int *htab_call_hpte_remove;
453 extern unsigned int *htab_call_hpte_updatepp; 739 extern unsigned int *htab_call_hpte_updatepp;
454 740
741#ifdef CONFIG_PPC_64K_PAGES
742 extern unsigned int *ht64_call_hpte_insert1;
743 extern unsigned int *ht64_call_hpte_insert2;
744 extern unsigned int *ht64_call_hpte_remove;
745 extern unsigned int *ht64_call_hpte_updatepp;
746
747 make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
748 make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
749 make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
750 make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
751#endif /* CONFIG_PPC_64K_PAGES */
752
455 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); 753 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
456 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); 754 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
457 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); 755 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974..0073a04047e4 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
47 pu = pud_offset(pg, addr); 47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) { 48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr); 49 pm = pmd_offset(pu, addr);
50#ifdef CONFIG_PPC_64K_PAGES
51 /* Currently, we use the normal PTE offset within full
52 * size PTE pages, thus our huge PTEs are scattered in
53 * the PTE page and we do waste some. We may change
54 * that in the future, but the current mecanism keeps
55 * things much simpler
56 */
57 if (!pmd_none(*pm)) {
58 /* Note: pte_offset_* are all equivalent on
59 * ppc64 as we don't have HIGHMEM
60 */
61 pt = pte_offset_kernel(pm, addr);
62 return pt;
63 }
64#else /* CONFIG_PPC_64K_PAGES */
65 /* On 4k pages, we put huge PTEs in the PMD page */
50 pt = (pte_t *)pm; 66 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt; 67 return pt;
68#endif /* CONFIG_PPC_64K_PAGES */
54 } 69 }
55 } 70 }
56 71
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
74 if (pu) { 89 if (pu) {
75 pm = pmd_alloc(mm, pu, addr); 90 pm = pmd_alloc(mm, pu, addr);
76 if (pm) { 91 if (pm) {
92#ifdef CONFIG_PPC_64K_PAGES
93 /* See comment in huge_pte_offset. Note that if we ever
94 * want to put the page size in the PMD, we would have
95 * to open code our own pte_alloc* function in order
96 * to populate and set the size atomically
97 */
98 pt = pte_alloc_map(mm, pm, addr);
99#else /* CONFIG_PPC_64K_PAGES */
77 pt = (pte_t *)pm; 100 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm) 101#endif /* CONFIG_PPC_64K_PAGES */
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt; 102 return pt;
81 } 103 }
82 } 104 }
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
84 return NULL; 106 return NULL;
85} 107}
86 108
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 109void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte) 110 pte_t *ptep, pte_t pte)
91{ 111{
92 int i;
93
94 if (pte_present(*ptep)) { 112 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep); 113 /* We open-code pte_clear because we need to pass the right
114 * argument to hpte_update (huge / !huge)
115 */
116 unsigned long old = pte_update(ptep, ~0UL);
117 if (old & _PAGE_HASHPTE)
118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
96 flush_tlb_pending(); 119 flush_tlb_pending();
97 } 120 }
98 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103} 122}
104 123
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 124pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep) 125 pte_t *ptep)
107{ 126{
108 unsigned long old = pte_update(ptep, ~0UL); 127 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110 128
111 if (old & _PAGE_HASHPTE) 129 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0); 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
113 131 *ptep = __pte(0);
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116 132
117 return __pte(old); 133 return __pte(old);
118} 134}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
563 int lastshift; 579 int lastshift;
564 u16 areamask, curareas; 580 u16 areamask, curareas;
565 581
582 if (HPAGE_SHIFT == 0)
583 return -EINVAL;
566 if (len & ~HPAGE_MASK) 584 if (len & ~HPAGE_MASK)
567 return -EINVAL; 585 return -EINVAL;
568 586
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local) 637 unsigned long ea, unsigned long vsid, int local)
620{ 638{
621 pte_t *ptep; 639 pte_t *ptep;
622 unsigned long va, vpn; 640 unsigned long old_pte, new_pte;
623 pte_t old_pte, new_pte; 641 unsigned long va, rflags, pa;
624 unsigned long rflags, prpn;
625 long slot; 642 long slot;
626 int err = 1; 643 int err = 1;
627 644
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea); 645 ptep = huge_pte_offset(mm, ea);
631 646
632 /* Search the Linux page table for a match with va */ 647 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff); 648 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635 649
636 /* 650 /*
637 * If no pte found or not present, send the problem up to 651 * If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
640 if (unlikely(!ptep || pte_none(*ptep))) 654 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out; 655 goto out;
642 656
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /* 657 /*
646 * Check the user's access rights to the page. If access should be 658 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault. 659 * prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
661 */ 673 */
662 674
663 675
664 old_pte = *ptep; 676 do {
665 new_pte = old_pte; 677 old_pte = pte_val(*ptep);
666 678 if (old_pte & _PAGE_BUSY)
667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); 679 goto out;
680 new_pte = old_pte | _PAGE_BUSY |
681 _PAGE_ACCESSED | _PAGE_HASHPTE;
682 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
683 old_pte, new_pte));
684
685 rflags = 0x2 | (!(new_pte & _PAGE_RW));
668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 686 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); 687 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
670 688
671 /* Check if pte already has an hpte (case 2) */ 689 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { 690 if (unlikely(old_pte & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */ 691 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot; 692 unsigned long hash, slot;
675 693
676 hash = hpt_hash(vpn, 1); 694 hash = hpt_hash(va, HPAGE_SHIFT);
677 if (pte_val(old_pte) & _PAGE_SECONDARY) 695 if (old_pte & _PAGE_F_SECOND)
678 hash = ~hash; 696 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 697 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; 698 slot += (old_pte & _PAGE_F_GIX) >> 12;
681 699
682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) 700 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; 701 old_pte &= ~_PAGE_HPTEFLAGS;
684 } 702 }
685 703
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { 704 if (likely(!(old_pte & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1); 705 unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
688 unsigned long hpte_group; 706 unsigned long hpte_group;
689 707
690 prpn = pte_pfn(old_pte); 708 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
691 709
692repeat: 710repeat:
693 hpte_group = ((hash & htab_hash_mask) * 711 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL; 712 HPTES_PER_GROUP) & ~0x7UL;
695 713
696 /* Update the linux pte with the HPTE slot */ 714 /* clear HPTE slot informations in new PTE */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; 715 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699 716
700 /* Add in WIMG bits */ 717 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */ 718 /* XXX We should store these in the pte */
719 /* --BenH: I think they are ... */
702 rflags |= _PAGE_COHERENT; 720 rflags |= _PAGE_COHERENT;
703 721
704 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 722 /* Insert into the hash table, primary slot */
705 HPTE_V_LARGE, rflags); 723 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
724 mmu_huge_psize);
706 725
707 /* Primary is full, try the secondary */ 726 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) { 727 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY; 728 new_pte |= _PAGE_F_SECOND;
710 hpte_group = ((~hash & htab_hash_mask) * 729 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL; 730 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 731 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY, 732 HPTE_V_SECONDARY,
715 rflags); 733 mmu_huge_psize);
716 if (slot == -1) { 734 if (slot == -1) {
717 if (mftb() & 0x1) 735 if (mftb() & 0x1)
718 hpte_group = ((hash & htab_hash_mask) * 736 hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
726 if (unlikely(slot == -2)) 744 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n"); 745 panic("hash_huge_page: pte_insert failed\n");
728 746
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; 747 new_pte |= (slot << 12) & _PAGE_F_GIX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 } 748 }
738 749
750 /*
751 * No need to use ldarx/stdcx here because all who
752 * might be updating the pte will hold the
753 * page_table_lock
754 */
755 *ptep = __pte(new_pte & ~_PAGE_BUSY);
756
739 err = 0; 757 err = 0;
740 758
741 out: 759 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err; 760 return err;
745} 761}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29f..dfe7fa37b41a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,12 +188,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
188 memset(addr, 0, kmem_cache_size(cache)); 188 memset(addr, 0, kmem_cache_size(cache));
189} 189}
190 190
191#ifdef CONFIG_PPC_64K_PAGES
192static const int pgtable_cache_size[2] = {
193 PTE_TABLE_SIZE, PGD_TABLE_SIZE
194};
195static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
196 "pte_pmd_cache", "pgd_cache",
197};
198#else
191static const int pgtable_cache_size[2] = { 199static const int pgtable_cache_size[2] = {
192 PTE_TABLE_SIZE, PMD_TABLE_SIZE 200 PTE_TABLE_SIZE, PMD_TABLE_SIZE
193}; 201};
194static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 202static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
195 "pgd_pte_cache", "pud_pmd_cache", 203 "pgd_pte_cache", "pud_pmd_cache",
196}; 204};
205#endif /* CONFIG_PPC_64K_PAGES */
197 206
198kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 207kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
199 208
@@ -201,19 +210,14 @@ void pgtable_cache_init(void)
201{ 210{
202 int i; 211 int i;
203 212
204 BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
205 BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
206 BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
207 BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
208
209 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { 213 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
210 int size = pgtable_cache_size[i]; 214 int size = pgtable_cache_size[i];
211 const char *name = pgtable_cache_name[i]; 215 const char *name = pgtable_cache_name[i];
212 216
213 pgtable_cache[i] = kmem_cache_create(name, 217 pgtable_cache[i] = kmem_cache_create(name,
214 size, size, 218 size, size,
215 SLAB_HWCACHE_ALIGN 219 SLAB_HWCACHE_ALIGN |
216 | SLAB_MUST_HWCACHE_ALIGN, 220 SLAB_MUST_HWCACHE_ALIGN,
217 zero_ctor, 221 zero_ctor,
218 NULL); 222 NULL);
219 if (! pgtable_cache[i]) 223 if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e14..7faa46b71f21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
61int mem_init_done; 61int mem_init_done;
62unsigned long memory_limit; 62unsigned long memory_limit;
63 63
64extern void hash_preload(struct mm_struct *mm, unsigned long ea,
65 unsigned long access, unsigned long trap);
66
64/* 67/*
65 * This is called by /dev/mem to know if a given address has to 68 * This is called by /dev/mem to know if a given address has to
66 * be mapped non-cacheable or not 69 * be mapped non-cacheable or not
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
493void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 496void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
494 pte_t pte) 497 pte_t pte)
495{ 498{
496 /* handle i-cache coherency */ 499#ifdef CONFIG_PPC_STD_MMU
497 unsigned long pfn = pte_pfn(pte); 500 unsigned long access = 0, trap;
498#ifdef CONFIG_PPC32
499 pmd_t *pmd;
500#else
501 unsigned long vsid;
502 void *pgdir;
503 pte_t *ptep;
504 int local = 0;
505 cpumask_t tmp;
506 unsigned long flags;
507#endif 501#endif
502 unsigned long pfn = pte_pfn(pte);
508 503
509 /* handle i-cache coherency */ 504 /* handle i-cache coherency */
510 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && 505 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
535 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 530 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
536 if (!pte_young(pte) || address >= TASK_SIZE) 531 if (!pte_young(pte) || address >= TASK_SIZE)
537 return; 532 return;
538#ifdef CONFIG_PPC32
539 if (Hash == 0)
540 return;
541 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
542 if (!pmd_none(*pmd))
543 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
544#else
545 pgdir = vma->vm_mm->pgd;
546 if (pgdir == NULL)
547 return;
548 533
549 ptep = find_linux_pte(pgdir, address); 534 /* We try to figure out if we are coming from an instruction
550 if (!ptep) 535 * access fault and pass that down to __hash_page so we avoid
536 * double-faulting on execution of fresh text. We have to test
537 * for regs NULL since init will get here first thing at boot
538 *
539 * We also avoid filling the hash if not coming from a fault
540 */
541 if (current->thread.regs == NULL)
551 return; 542 return;
552 543 trap = TRAP(current->thread.regs);
553 vsid = get_vsid(vma->vm_mm->context.id, address); 544 if (trap == 0x400)
554 545 access |= _PAGE_EXEC;
555 local_irq_save(flags); 546 else if (trap != 0x300)
556 tmp = cpumask_of_cpu(smp_processor_id()); 547 return;
557 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) 548 hash_preload(vma->vm_mm, address, access, trap);
558 local = 1; 549#endif /* CONFIG_PPC_STD_MMU */
559
560 __hash_page(address, 0, vsid, ptep, 0x300, local);
561 local_irq_restore(flags);
562#endif
563#endif
564} 550}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a78206135..51b786940971 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -101,7 +101,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
101 pud_t *pudp; 101 pud_t *pudp;
102 pmd_t *pmdp; 102 pmd_t *pmdp;
103 pte_t *ptep; 103 pte_t *ptep;
104 unsigned long vsid;
105 104
106 if (mem_init_done) { 105 if (mem_init_done) {
107 pgdp = pgd_offset_k(ea); 106 pgdp = pgd_offset_k(ea);
@@ -117,28 +116,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
117 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 116 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
118 __pgprot(flags))); 117 __pgprot(flags)));
119 } else { 118 } else {
120 unsigned long va, vpn, hash, hpteg;
121
122 /* 119 /*
123 * If the mm subsystem is not fully up, we cannot create a 120 * If the mm subsystem is not fully up, we cannot create a
124 * linux page table entry for this mapping. Simply bolt an 121 * linux page table entry for this mapping. Simply bolt an
125 * entry in the hardware page table. 122 * entry in the hardware page table.
123 *
126 */ 124 */
127 vsid = get_kernel_vsid(ea); 125 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
128 va = (vsid << 28) | (ea & 0xFFFFFFF); 126 mmu_virtual_psize))
129 vpn = va >> PAGE_SHIFT; 127 panic("Can't map bolted IO mapping");
130
131 hash = hpt_hash(vpn, 0);
132
133 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
134
135 /* Panic if a pte grpup is full */
136 if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
137 HPTE_V_BOLTED,
138 _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
139 == -1) {
140 panic("map_io_page: could not insert mapping");
141 }
142 } 128 }
143 return 0; 129 return 0;
144} 130}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e9..d137abd241ff 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
179} 179}
180 180
181/* 181/*
182 * Preload a translation in the hash table
183 */
184void hash_preload(struct mm_struct *mm, unsigned long ea,
185 unsigned long access, unsigned long trap)
186{
187 pmd_t *pmd;
188
189 if (Hash == 0)
190 return;
191 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
192 if (!pmd_none(*pmd))
193 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
194}
195
196/*
182 * Initialize the hash table and patch the instructions in hashtable.S. 197 * Initialize the hash table and patch the instructions in hashtable.S.
183 */ 198 */
184void __init MMU_init_hw(void) 199void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a37..60e852f2f8e5 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 */ 15 */
16 16
17#undef DEBUG
18
17#include <linux/config.h> 19#include <linux/config.h>
18#include <asm/pgtable.h> 20#include <asm/pgtable.h>
19#include <asm/mmu.h> 21#include <asm/mmu.h>
20#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
21#include <asm/paca.h> 23#include <asm/paca.h>
22#include <asm/cputable.h> 24#include <asm/cputable.h>
25#include <asm/cacheflush.h>
26
27#ifdef DEBUG
28#define DBG(fmt...) udbg_printf(fmt)
29#else
30#define DBG(fmt...)
31#endif
23 32
24extern void slb_allocate(unsigned long ea); 33extern void slb_allocate_realmode(unsigned long ea);
34extern void slb_allocate_user(unsigned long ea);
35
36static void slb_allocate(unsigned long ea)
37{
38 /* Currently, we do real mode for all SLBs including user, but
39 * that will change if we bring back dynamic VSIDs
40 */
41 slb_allocate_realmode(ea);
42}
25 43
26static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) 44static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
27{ 45{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
46{ 64{
47 /* If you change this make sure you change SLB_NUM_BOLTED 65 /* If you change this make sure you change SLB_NUM_BOLTED
48 * appropriately too. */ 66 * appropriately too. */
49 unsigned long ksp_flags = SLB_VSID_KERNEL; 67 unsigned long linear_llp, virtual_llp, lflags, vflags;
50 unsigned long ksp_esid_data; 68 unsigned long ksp_esid_data;
51 69
52 WARN_ON(!irqs_disabled()); 70 WARN_ON(!irqs_disabled());
53 71
54 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 72 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
55 ksp_flags |= SLB_VSID_L; 73 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
74 lflags = SLB_VSID_KERNEL | linear_llp;
75 vflags = SLB_VSID_KERNEL | virtual_llp;
56 76
57 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); 77 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
58 if ((ksp_esid_data & ESID_MASK) == KERNELBASE) 78 if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
67 /* Slot 2 - kernel stack */ 87 /* Slot 2 - kernel stack */
68 "slbmte %2,%3\n" 88 "slbmte %2,%3\n"
69 "isync" 89 "isync"
70 :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), 90 :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
71 "r"(mk_esid_data(VMALLOCBASE, 1)), 91 "r"(mk_esid_data(VMALLOCBASE, 1)),
72 "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), 92 "r"(mk_vsid_data(ksp_esid_data, lflags)),
73 "r"(ksp_esid_data) 93 "r"(ksp_esid_data)
74 : "memory"); 94 : "memory");
75} 95}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
102 122
103 get_paca()->slb_cache_ptr = 0; 123 get_paca()->slb_cache_ptr = 0;
104 get_paca()->context = mm->context; 124 get_paca()->context = mm->context;
125#ifdef CONFIG_PPC_64K_PAGES
126 get_paca()->pgdir = mm->pgd;
127#endif /* CONFIG_PPC_64K_PAGES */
105 128
106 /* 129 /*
107 * preload some userspace segments into the SLB. 130 * preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
131 slb_allocate(unmapped_base); 154 slb_allocate(unmapped_base);
132} 155}
133 156
157static inline void patch_slb_encoding(unsigned int *insn_addr,
158 unsigned int immed)
159{
160 /* Assume the instruction had a "0" immediate value, just
161 * "or" in the new value
162 */
163 *insn_addr |= immed;
164 flush_icache_range((unsigned long)insn_addr, 4+
165 (unsigned long)insn_addr);
166}
167
134void slb_initialize(void) 168void slb_initialize(void)
135{ 169{
170 unsigned long linear_llp, virtual_llp;
171 static int slb_encoding_inited;
172 extern unsigned int *slb_miss_kernel_load_linear;
173 extern unsigned int *slb_miss_kernel_load_virtual;
174 extern unsigned int *slb_miss_user_load_normal;
175#ifdef CONFIG_HUGETLB_PAGE
176 extern unsigned int *slb_miss_user_load_huge;
177 unsigned long huge_llp;
178
179 huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
180#endif
181
182 /* Prepare our SLB miss handler based on our page size */
183 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
184 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
185 if (!slb_encoding_inited) {
186 slb_encoding_inited = 1;
187 patch_slb_encoding(slb_miss_kernel_load_linear,
188 SLB_VSID_KERNEL | linear_llp);
189 patch_slb_encoding(slb_miss_kernel_load_virtual,
190 SLB_VSID_KERNEL | virtual_llp);
191 patch_slb_encoding(slb_miss_user_load_normal,
192 SLB_VSID_USER | virtual_llp);
193
194 DBG("SLB: linear LLP = %04x\n", linear_llp);
195 DBG("SLB: virtual LLP = %04x\n", virtual_llp);
196#ifdef CONFIG_HUGETLB_PAGE
197 patch_slb_encoding(slb_miss_user_load_huge,
198 SLB_VSID_USER | huge_llp);
199 DBG("SLB: huge LLP = %04x\n", huge_llp);
200#endif
201 }
202
136 /* On iSeries the bolted entries have already been set up by 203 /* On iSeries the bolted entries have already been set up by
137 * the hypervisor from the lparMap data in head.S */ 204 * the hypervisor from the lparMap data in head.S */
138#ifndef CONFIG_PPC_ISERIES 205#ifndef CONFIG_PPC_ISERIES
139 unsigned long flags = SLB_VSID_KERNEL; 206 {
207 unsigned long lflags, vflags;
140 208
141 /* Invalidate the entire SLB (even slot 0) & all the ERATS */ 209 lflags = SLB_VSID_KERNEL | linear_llp;
142 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 210 vflags = SLB_VSID_KERNEL | virtual_llp;
143 flags |= SLB_VSID_L;
144 211
145 asm volatile("isync":::"memory"); 212 /* Invalidate the entire SLB (even slot 0) & all the ERATS */
146 asm volatile("slbmte %0,%0"::"r" (0) : "memory"); 213 asm volatile("isync":::"memory");
214 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
147 asm volatile("isync; slbia; isync":::"memory"); 215 asm volatile("isync; slbia; isync":::"memory");
148 create_slbe(KERNELBASE, flags, 0); 216 create_slbe(KERNELBASE, lflags, 0);
149 create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); 217
218 /* VMALLOC space has 4K pages always for now */
219 create_slbe(VMALLOCBASE, vflags, 1);
220
150 /* We don't bolt the stack for the time being - we're in boot, 221 /* We don't bolt the stack for the time being - we're in boot,
151 * so the stack is in the bolted segment. By the time it goes 222 * so the stack is in the bolted segment. By the time it goes
152 * elsewhere, we'll call _switch() which will bolt in the new 223 * elsewhere, we'll call _switch() which will bolt in the new
153 * one. */ 224 * one. */
154 asm volatile("isync":::"memory"); 225 asm volatile("isync":::"memory");
155#endif 226 }
227#endif /* CONFIG_PPC_ISERIES */
156 228
157 get_paca()->stab_rr = SLB_NUM_BOLTED; 229 get_paca()->stab_rr = SLB_NUM_BOLTED;
158} 230}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503bc..3e18241b6f35 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
18 18
19#include <linux/config.h> 19#include <linux/config.h>
20#include <asm/processor.h> 20#include <asm/processor.h>
21#include <asm/page.h>
22#include <asm/mmu.h>
23#include <asm/ppc_asm.h> 21#include <asm/ppc_asm.h>
24#include <asm/asm-offsets.h> 22#include <asm/asm-offsets.h>
25#include <asm/cputable.h> 23#include <asm/cputable.h>
24#include <asm/page.h>
25#include <asm/mmu.h>
26#include <asm/pgtable.h>
26 27
27/* void slb_allocate(unsigned long ea); 28/* void slb_allocate_realmode(unsigned long ea);
28 * 29 *
29 * Create an SLB entry for the given EA (user or kernel). 30 * Create an SLB entry for the given EA (user or kernel).
30 * r3 = faulting address, r13 = PACA 31 * r3 = faulting address, r13 = PACA
31 * r9, r10, r11 are clobbered by this function 32 * r9, r10, r11 are clobbered by this function
32 * No other registers are examined or changed. 33 * No other registers are examined or changed.
33 */ 34 */
34_GLOBAL(slb_allocate) 35_GLOBAL(slb_allocate_realmode)
35 /* 36 /* r3 = faulting address */
36 * First find a slot, round robin. Previously we tried to find
37 * a free slot first but that took too long. Unfortunately we
38 * dont have any LRU information to help us choose a slot.
39 */
40#ifdef CONFIG_PPC_ISERIES
41 /*
42 * On iSeries, the "bolted" stack segment can be cast out on
43 * shared processor switch so we need to check for a miss on
44 * it and restore it to the right slot.
45 */
46 ld r9,PACAKSAVE(r13)
47 clrrdi r9,r9,28
48 clrrdi r11,r3,28
49 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
50 cmpld r9,r11
51 beq 3f
52#endif /* CONFIG_PPC_ISERIES */
53
54 ld r10,PACASTABRR(r13)
55 addi r10,r10,1
56 /* use a cpu feature mask if we ever change our slb size */
57 cmpldi r10,SLB_NUM_ENTRIES
58
59 blt+ 4f
60 li r10,SLB_NUM_BOLTED
61
624:
63 std r10,PACASTABRR(r13)
643:
65 /* r3 = faulting address, r10 = entry */
66 37
67 srdi r9,r3,60 /* get region */ 38 srdi r9,r3,60 /* get region */
68 srdi r3,r3,28 /* get esid */ 39 srdi r10,r3,28 /* get esid */
69 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ 40 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
70 41
71 rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ 42 /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
72 oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
73
74 /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
75
76 blt cr7,0f /* user or kernel? */ 43 blt cr7,0f /* user or kernel? */
77 44
78 /* kernel address: proto-VSID = ESID */ 45 /* kernel address: proto-VSID = ESID */
@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
81 * top segment. That's ok, the scramble below will translate 48 * top segment. That's ok, the scramble below will translate
82 * it to VSID 0, which is reserved as a bad VSID - one which 49 * it to VSID 0, which is reserved as a bad VSID - one which
83 * will never have any pages in it. */ 50 * will never have any pages in it. */
84 li r11,SLB_VSID_KERNEL
85BEGIN_FTR_SECTION
86 bne cr7,9f
87 li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
88END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
89 b 9f
90 51
910: /* user address: proto-VSID = context<<15 | ESID */ 52 /* Check if hitting the linear mapping of the vmalloc/ioremap
92 srdi. r9,r3,USER_ESID_BITS 53 * kernel space
54 */
55 bne cr7,1f
56
57 /* Linear mapping encoding bits, the "li" instruction below will
58 * be patched by the kernel at boot
59 */
60_GLOBAL(slb_miss_kernel_load_linear)
61 li r11,0
62 b slb_finish_load
63
641: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
65 * will be patched by the kernel at boot
66 */
67_GLOBAL(slb_miss_kernel_load_virtual)
68 li r11,0
69 b slb_finish_load
70
71
720: /* user address: proto-VSID = context << 15 | ESID. First check
73 * if the address is within the boundaries of the user region
74 */
75 srdi. r9,r10,USER_ESID_BITS
93 bne- 8f /* invalid ea bits set */ 76 bne- 8f /* invalid ea bits set */
94 77
78 /* Figure out if the segment contains huge pages */
95#ifdef CONFIG_HUGETLB_PAGE 79#ifdef CONFIG_HUGETLB_PAGE
96BEGIN_FTR_SECTION 80BEGIN_FTR_SECTION
81 b 1f
82END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
97 lhz r9,PACAHIGHHTLBAREAS(r13) 83 lhz r9,PACAHIGHHTLBAREAS(r13)
98 srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) 84 srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
99 srd r9,r9,r11 85 srd r9,r9,r11
100 lhz r11,PACALOWHTLBAREAS(r13) 86 lhz r11,PACALOWHTLBAREAS(r13)
101 srd r11,r11,r3 87 srd r11,r11,r10
102 or r9,r9,r11 88 or. r9,r9,r11
103END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 89 beq 1f
90_GLOBAL(slb_miss_user_load_huge)
91 li r11,0
92 b 2f
931:
104#endif /* CONFIG_HUGETLB_PAGE */ 94#endif /* CONFIG_HUGETLB_PAGE */
105 95
106 li r11,SLB_VSID_USER 96_GLOBAL(slb_miss_user_load_normal)
97 li r11,0
107 98
108#ifdef CONFIG_HUGETLB_PAGE 992:
109BEGIN_FTR_SECTION 100 ld r9,PACACONTEXTID(r13)
110 rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */ 101 rldimi r10,r9,USER_ESID_BITS,0
111END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 102 b slb_finish_load
112#endif /* CONFIG_HUGETLB_PAGE */ 103
1048: /* invalid EA */
105 li r10,0 /* BAD_VSID */
106 li r11,SLB_VSID_USER /* flags don't much matter */
107 b slb_finish_load
108
109#ifdef __DISABLED__
110
111/* void slb_allocate_user(unsigned long ea);
112 *
113 * Create an SLB entry for the given EA (user or kernel).
114 * r3 = faulting address, r13 = PACA
115 * r9, r10, r11 are clobbered by this function
116 * No other registers are examined or changed.
117 *
118 * It is called with translation enabled in order to be able to walk the
119 * page tables. This is not currently used.
120 */
121_GLOBAL(slb_allocate_user)
122 /* r3 = faulting address */
123 srdi r10,r3,28 /* get esid */
124
125 crset 4*cr7+lt /* set "user" flag for later */
126
127 /* check if we fit in the range covered by the pagetables*/
128 srdi. r9,r3,PGTABLE_EADDR_SIZE
129 crnot 4*cr0+eq,4*cr0+eq
130 beqlr
113 131
132 /* now we need to get to the page tables in order to get the page
133 * size encoding from the PMD. In the future, we'll be able to deal
134 * with 1T segments too by getting the encoding from the PGD instead
135 */
136 ld r9,PACAPGDIR(r13)
137 cmpldi cr0,r9,0
138 beqlr
139 rlwinm r11,r10,8,25,28
140 ldx r9,r9,r11 /* get pgd_t */
141 cmpldi cr0,r9,0
142 beqlr
143 rlwinm r11,r10,3,17,28
144 ldx r9,r9,r11 /* get pmd_t */
145 cmpldi cr0,r9,0
146 beqlr
147
148 /* build vsid flags */
149 andi. r11,r9,SLB_VSID_LLP
150 ori r11,r11,SLB_VSID_USER
151
152 /* get context to calculate proto-VSID */
114 ld r9,PACACONTEXTID(r13) 153 ld r9,PACACONTEXTID(r13)
115 rldimi r3,r9,USER_ESID_BITS,0 154 rldimi r10,r9,USER_ESID_BITS,0
155
156 /* fall through slb_finish_load */
157
158#endif /* __DISABLED__ */
116 159
1179: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
118 ASM_VSID_SCRAMBLE(r3,r9)
119 160
120 rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ 161/*
162 * Finish loading of an SLB entry and return
163 *
164 * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
165 */
166slb_finish_load:
167 ASM_VSID_SCRAMBLE(r10,r9)
168 rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
169
170 /* r3 = EA, r11 = VSID data */
171 /*
172 * Find a slot, round robin. Previously we tried to find a
173 * free slot first but that took too long. Unfortunately we
174 * dont have any LRU information to help us choose a slot.
175 */
176#ifdef CONFIG_PPC_ISERIES
177 /*
178 * On iSeries, the "bolted" stack segment can be cast out on
179 * shared processor switch so we need to check for a miss on
180 * it and restore it to the right slot.
181 */
182 ld r9,PACAKSAVE(r13)
183 clrrdi r9,r9,28
184 clrrdi r3,r3,28
185 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
186 cmpld r9,r3
187 beq 3f
188#endif /* CONFIG_PPC_ISERIES */
189
190 ld r10,PACASTABRR(r13)
191 addi r10,r10,1
192 /* use a cpu feature mask if we ever change our slb size */
193 cmpldi r10,SLB_NUM_ENTRIES
194
195 blt+ 4f
196 li r10,SLB_NUM_BOLTED
197
1984:
199 std r10,PACASTABRR(r13)
200
2013:
202 rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
203 oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
204
205 /* r3 = ESID data, r11 = VSID data */
121 206
122 /* 207 /*
123 * No need for an isync before or after this slbmte. The exception 208 * No need for an isync before or after this slbmte. The exception
@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
125 */ 210 */
126 slbmte r11,r10 211 slbmte r11,r10
127 212
128 bgelr cr7 /* we're done for kernel addresses */ 213 /* we're done for kernel addresses */
214 crclr 4*cr0+eq /* set result to "success" */
215 bgelr cr7
129 216
130 /* Update the slb cache */ 217 /* Update the slb cache */
131 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 218 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
143 li r3,SLB_CACHE_ENTRIES+1 230 li r3,SLB_CACHE_ENTRIES+1
1442: 2312:
145 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 232 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
233 crclr 4*cr0+eq /* set result to "success" */
146 blr 234 blr
147 235
1488: /* invalid EA */
149 li r3,0 /* BAD_VSID */
150 li r11,SLB_VSID_USER /* flags don't much matter */
151 b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf27..fa325dbf98fc 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -26,7 +26,6 @@ struct stab_entry {
26 unsigned long vsid_data; 26 unsigned long vsid_data;
27}; 27};
28 28
29/* Both the segment table and SLB code uses the following cache */
30#define NR_STAB_CACHE_ENTRIES 8 29#define NR_STAB_CACHE_ENTRIES 8
31DEFINE_PER_CPU(long, stab_cache_ptr); 30DEFINE_PER_CPU(long, stab_cache_ptr);
32DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); 31DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
186 /* Never flush the first entry. */ 185 /* Never flush the first entry. */
187 ste += 1; 186 ste += 1;
188 for (entry = 1; 187 for (entry = 1;
189 entry < (PAGE_SIZE / sizeof(struct stab_entry)); 188 entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
190 entry++, ste++) { 189 entry++, ste++) {
191 unsigned long ea; 190 unsigned long ea;
192 ea = ste->esid_data & ESID_MASK; 191 ea = ste->esid_data & ESID_MASK;
@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
200 199
201 __get_cpu_var(stab_cache_ptr) = 0; 200 __get_cpu_var(stab_cache_ptr) = 0;
202 201
202#ifdef CONFIG_PPC_64K_PAGES
203 get_paca()->pgdir = mm->pgd;
204#endif /* CONFIG_PPC_64K_PAGES */
205
203 /* Now preload some entries for the new task */ 206 /* Now preload some entries for the new task */
204 if (test_tsk_thread_flag(tsk, TIF_32BIT)) 207 if (test_tsk_thread_flag(tsk, TIF_32BIT))
205 unmapped_base = TASK_UNMAPPED_BASE_USER32; 208 unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
223 asm volatile("sync" : : : "memory"); 226 asm volatile("sync" : : : "memory");
224} 227}
225 228
226extern void slb_initialize(void);
227
228/* 229/*
229 * Allocate segment tables for secondary CPUs. These must all go in 230 * Allocate segment tables for secondary CPUs. These must all go in
230 * the first (bolted) segment, so that do_stab_bolted won't get a 231 * the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +244,21 @@ void stabs_alloc(void)
243 if (cpu == 0) 244 if (cpu == 0)
244 continue; /* stab for CPU 0 is statically allocated */ 245 continue; /* stab for CPU 0 is statically allocated */
245 246
246 newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT); 247 newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
248 1<<SID_SHIFT);
247 if (! newstab) 249 if (! newstab)
248 panic("Unable to allocate segment table for CPU %d.\n", 250 panic("Unable to allocate segment table for CPU %d.\n",
249 cpu); 251 cpu);
250 252
251 newstab += KERNELBASE; 253 newstab += KERNELBASE;
252 254
253 memset((void *)newstab, 0, PAGE_SIZE); 255 memset((void *)newstab, 0, HW_PAGE_SIZE);
254 256
255 paca[cpu].stab_addr = newstab; 257 paca[cpu].stab_addr = newstab;
256 paca[cpu].stab_real = virt_to_abs(newstab); 258 paca[cpu].stab_real = virt_to_abs(newstab);
257 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); 259 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
260 "virtual, 0x%lx absolute\n",
261 cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
258 } 262 }
259} 263}
260 264
@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
267{ 271{
268 unsigned long vsid = get_kernel_vsid(KERNELBASE); 272 unsigned long vsid = get_kernel_vsid(KERNELBASE);
269 273
270 if (cpu_has_feature(CPU_FTR_SLB)) { 274 asm volatile("isync; slbia; isync":::"memory");
271 slb_initialize(); 275 make_ste(stab, GET_ESID(KERNELBASE), vsid);
272 } else {
273 asm volatile("isync; slbia; isync":::"memory");
274 make_ste(stab, GET_ESID(KERNELBASE), vsid);
275 276
276 /* Order update */ 277 /* Order update */
277 asm volatile("sync":::"memory"); 278 asm volatile("sync":::"memory");
278 }
279} 279}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4f..53e31b834ace 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
21 * as published by the Free Software Foundation; either version 21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version. 22 * 2 of the License, or (at your option) any later version.
23 */ 23 */
24
24#include <linux/config.h> 25#include <linux/config.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
26#include <linux/mm.h> 27#include <linux/mm.h>
@@ -30,7 +31,7 @@
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
33#include <linux/highmem.h> 34#include <asm/bug.h>
34 35
35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 36DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
36 37
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
126 * (if we remove it we should clear the _PTE_HPTEFLAGS bits). 127 * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
127 */ 128 */
128void hpte_update(struct mm_struct *mm, unsigned long addr, 129void hpte_update(struct mm_struct *mm, unsigned long addr,
129 unsigned long pte, int wrprot) 130 pte_t *ptep, unsigned long pte, int huge)
130{ 131{
131 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 132 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
132 unsigned long vsid; 133 unsigned long vsid;
134 unsigned int psize = mmu_virtual_psize;
133 int i; 135 int i;
134 136
135 i = batch->index; 137 i = batch->index;
136 138
139 /* We mask the address for the base page size. Huge pages will
140 * have applied their own masking already
141 */
142 addr &= PAGE_MASK;
143
144 /* Get page size (maybe move back to caller) */
145 if (huge) {
146#ifdef CONFIG_HUGETLB_PAGE
147 psize = mmu_huge_psize;
148#else
149 BUG();
150#endif
151 }
152
137 /* 153 /*
138 * This can happen when we are in the middle of a TLB batch and 154 * This can happen when we are in the middle of a TLB batch and
139 * we encounter memory pressure (eg copy_page_range when it tries 155 * we encounter memory pressure (eg copy_page_range when it tries
140 * to allocate a new pte). If we have to reclaim memory and end 156 * to allocate a new pte). If we have to reclaim memory and end
141 * up scanning and resetting referenced bits then our batch context 157 * up scanning and resetting referenced bits then our batch context
142 * will change mid stream. 158 * will change mid stream.
159 *
160 * We also need to ensure only one page size is present in a given
161 * batch
143 */ 162 */
144 if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) { 163 if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
145 flush_tlb_pending(); 164 flush_tlb_pending();
146 i = 0; 165 i = 0;
147 } 166 }
148 if (i == 0) { 167 if (i == 0) {
149 batch->mm = mm; 168 batch->mm = mm;
150 batch->large = pte_huge(pte); 169 batch->psize = psize;
151 } 170 }
152 if (addr < KERNELBASE) { 171 if (addr < KERNELBASE) {
153 vsid = get_vsid(mm->context.id, addr); 172 vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
155 } else 174 } else
156 vsid = get_kernel_vsid(addr); 175 vsid = get_kernel_vsid(addr);
157 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); 176 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
158 batch->pte[i] = __pte(pte); 177 batch->pte[i] = __real_pte(__pte(pte), ptep);
159 batch->index = ++i; 178 batch->index = ++i;
160 if (i >= PPC64_TLB_BATCH_NR) 179 if (i >= PPC64_TLB_BATCH_NR)
161 flush_tlb_pending(); 180 flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
177 local = 1; 196 local = 1;
178 197
179 if (i == 1) 198 if (i == 1)
180 flush_hash_page(batch->vaddr[0], batch->pte[0], local); 199 flush_hash_page(batch->vaddr[0], batch->pte[0],
200 batch->psize, local);
181 else 201 else
182 flush_hash_range(i, local); 202 flush_hash_range(i, local);
183 batch->index = 0; 203 batch->index = 0;
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index b3c6c3374ca6..30bdcf3925d9 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot)
39 spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]); 39 spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
40} 40}
41 41
42static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, 42long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
43 unsigned long prpn, unsigned long vflags, 43 unsigned long pa, unsigned long rflags,
44 unsigned long rflags) 44 unsigned long vflags, int psize)
45{ 45{
46 unsigned long arpn;
47 long slot; 46 long slot;
48 hpte_t lhpte; 47 hpte_t lhpte;
49 int secondary = 0; 48 int secondary = 0;
50 49
50 BUG_ON(psize != MMU_PAGE_4K);
51
51 /* 52 /*
52 * The hypervisor tries both primary and secondary. 53 * The hypervisor tries both primary and secondary.
53 * If we are being called to insert in the secondary, 54 * If we are being called to insert in the secondary,
@@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
59 60
60 iSeries_hlock(hpte_group); 61 iSeries_hlock(hpte_group);
61 62
62 slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); 63 slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
63 BUG_ON(lhpte.v & HPTE_V_VALID); 64 if (unlikely(lhpte.v & HPTE_V_VALID)) {
65 if (vflags & HPTE_V_BOLTED) {
66 HvCallHpt_setSwBits(slot, 0x10, 0);
67 HvCallHpt_setPp(slot, PP_RWXX);
68 iSeries_hunlock(hpte_group);
69 if (slot < 0)
70 return 0x8 | (slot & 7);
71 else
72 return slot & 7;
73 }
74 BUG();
75 }
64 76
65 if (slot == -1) { /* No available entry found in either group */ 77 if (slot == -1) { /* No available entry found in either group */
66 iSeries_hunlock(hpte_group); 78 iSeries_hunlock(hpte_group);
@@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
73 slot &= 0x7fffffffffffffff; 85 slot &= 0x7fffffffffffffff;
74 } 86 }
75 87
76 arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
77 88
78 lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; 89 lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
79 lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; 90 lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
80 91
81 /* Now fill in the actual HPTE */ 92 /* Now fill in the actual HPTE */
82 HvCallHpt_addValidate(slot, secondary, &lhpte); 93 HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
86 return (secondary << 3) | (slot & 7); 97 return (secondary << 3) | (slot & 7);
87} 98}
88 99
89long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
90 unsigned long va, unsigned long prpn, unsigned long vflags,
91 unsigned long rflags)
92{
93 long slot;
94 hpte_t lhpte;
95
96 slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
97
98 if (lhpte.v & HPTE_V_VALID) {
99 /* Bolt the existing HPTE */
100 HvCallHpt_setSwBits(slot, 0x10, 0);
101 HvCallHpt_setPp(slot, PP_RWXX);
102 return 0;
103 }
104
105 return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
106}
107
108static unsigned long iSeries_hpte_getword0(unsigned long slot) 100static unsigned long iSeries_hpte_getword0(unsigned long slot)
109{ 101{
110 hpte_t hpte; 102 hpte_t hpte;
@@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
150 * bits 61..63 : PP2,PP1,PP0 142 * bits 61..63 : PP2,PP1,PP0
151 */ 143 */
152static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, 144static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
153 unsigned long va, int large, int local) 145 unsigned long va, int psize, int local)
154{ 146{
155 hpte_t hpte; 147 hpte_t hpte;
156 unsigned long avpn = va >> 23; 148 unsigned long want_v;
157 149
158 iSeries_hlock(slot); 150 iSeries_hlock(slot);
159 151
160 HvCallHpt_get(&hpte, slot); 152 HvCallHpt_get(&hpte, slot);
161 if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) { 153 want_v = hpte_encode_v(va, MMU_PAGE_4K);
154
155 if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
162 /* 156 /*
163 * Hypervisor expects bits as NPPP, which is 157 * Hypervisor expects bits as NPPP, which is
164 * different from how they are mapped in our PP. 158 * different from how they are mapped in our PP.
@@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn)
210 * 204 *
211 * No need to lock here because we should be the only user. 205 * No need to lock here because we should be the only user.
212 */ 206 */
213static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) 207static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
208 int psize)
214{ 209{
215 unsigned long vsid,va,vpn; 210 unsigned long vsid,va,vpn;
216 long slot; 211 long slot;
217 212
213 BUG_ON(psize != MMU_PAGE_4K);
214
218 vsid = get_kernel_vsid(ea); 215 vsid = get_kernel_vsid(ea);
219 va = (vsid << 28) | (ea & 0x0fffffff); 216 va = (vsid << 28) | (ea & 0x0fffffff);
220 vpn = va >> PAGE_SHIFT; 217 vpn = va >> HW_PAGE_SHIFT;
221 slot = iSeries_hpte_find(vpn); 218 slot = iSeries_hpte_find(vpn);
222 if (slot == -1) 219 if (slot == -1)
223 panic("updateboltedpp: Could not find page to bolt\n"); 220 panic("updateboltedpp: Could not find page to bolt\n");
@@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
225} 222}
226 223
227static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, 224static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
228 int large, int local) 225 int psize, int local)
229{ 226{
230 unsigned long hpte_v; 227 unsigned long hpte_v;
231 unsigned long avpn = va >> 23; 228 unsigned long avpn = va >> 23;
diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c
index 62ec73479687..f476d71194fa 100644
--- a/arch/powerpc/platforms/iseries/hvlog.c
+++ b/arch/powerpc/platforms/iseries/hvlog.c
@@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
22 22
23 while (len) { 23 while (len) {
24 hv_buf.addr = cur; 24 hv_buf.addr = cur;
25 left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur; 25 left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
26 if (left_this_page > len) 26 if (left_this_page > len)
27 left_this_page = len; 27 left_this_page = len;
28 hv_buf.len = left_this_page; 28 hv_buf.len = left_this_page;
@@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
30 HvCall2(HvCallBaseWriteLogBuffer, 30 HvCall2(HvCallBaseWriteLogBuffer,
31 virt_to_abs(&hv_buf), 31 virt_to_abs(&hv_buf),
32 left_this_page); 32 left_this_page);
33 cur = (cur & PAGE_MASK) + PAGE_SIZE; 33 cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
34 } 34 }
35} 35}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 1a6845b5c5a4..bf081b345820 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
43 u64 rc; 43 u64 rc;
44 union tce_entry tce; 44 union tce_entry tce;
45 45
46 index <<= TCE_PAGE_FACTOR;
47 npages <<= TCE_PAGE_FACTOR;
48
46 while (npages--) { 49 while (npages--) {
47 tce.te_word = 0; 50 tce.te_word = 0;
48 tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; 51 tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
49 52
50 if (tbl->it_type == TCE_VB) { 53 if (tbl->it_type == TCE_VB) {
51 /* Virtual Bus */ 54 /* Virtual Bus */
@@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
66 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", 69 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
67 rc); 70 rc);
68 index++; 71 index++;
69 uaddr += PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
70 } 73 }
71} 74}
72 75
@@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
74{ 77{
75 u64 rc; 78 u64 rc;
76 79
80 npages <<= TCE_PAGE_FACTOR;
81 index <<= TCE_PAGE_FACTOR;
82
77 while (npages--) { 83 while (npages--) {
78 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); 84 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
79 if (rc) 85 if (rc)
@@ -83,27 +89,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
83 } 89 }
84} 90}
85 91
86#ifdef CONFIG_PCI
87/*
88 * This function compares the known tables to find an iommu_table
89 * that has already been built for hardware TCEs.
90 */
91static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
92{
93 struct pci_dn *pdn;
94
95 list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
96 struct iommu_table *it = pdn->iommu_table;
97 if ((it != NULL) &&
98 (it->it_type == TCE_PCI) &&
99 (it->it_offset == tbl->it_offset) &&
100 (it->it_index == tbl->it_index) &&
101 (it->it_size == tbl->it_size))
102 return it;
103 }
104 return NULL;
105}
106
107/* 92/*
108 * Call Hv with the architected data structure to get TCE table info. 93 * Call Hv with the architected data structure to get TCE table info.
109 * info. Put the returned data into the Linux representation of the 94 * info. Put the returned data into the Linux representation of the
@@ -113,8 +98,10 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
113 * 2. TCE table per Bus. 98 * 2. TCE table per Bus.
114 * 3. TCE Table per IOA. 99 * 3. TCE Table per IOA.
115 */ 100 */
116static void iommu_table_getparms(struct pci_dn *pdn, 101void iommu_table_getparms_iSeries(unsigned long busno,
117 struct iommu_table* tbl) 102 unsigned char slotno,
103 unsigned char virtbus,
104 struct iommu_table* tbl)
118{ 105{
119 struct iommu_table_cb *parms; 106 struct iommu_table_cb *parms;
120 107
@@ -124,9 +111,9 @@ static void iommu_table_getparms(struct pci_dn *pdn,
124 111
125 memset(parms, 0, sizeof(*parms)); 112 memset(parms, 0, sizeof(*parms));
126 113
127 parms->itc_busno = pdn->busno; 114 parms->itc_busno = busno;
128 parms->itc_slotno = pdn->LogicalSlot; 115 parms->itc_slotno = slotno;
129 parms->itc_virtbus = 0; 116 parms->itc_virtbus = virtbus;
130 117
131 HvCallXm_getTceTableParms(iseries_hv_addr(parms)); 118 HvCallXm_getTceTableParms(iseries_hv_addr(parms));
132 119
@@ -134,17 +121,40 @@ static void iommu_table_getparms(struct pci_dn *pdn,
134 panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); 121 panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
135 122
136 /* itc_size is in pages worth of table, it_size is in # of entries */ 123 /* itc_size is in pages worth of table, it_size is in # of entries */
137 tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry); 124 tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
125 sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
138 tbl->it_busno = parms->itc_busno; 126 tbl->it_busno = parms->itc_busno;
139 tbl->it_offset = parms->itc_offset; 127 tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
140 tbl->it_index = parms->itc_index; 128 tbl->it_index = parms->itc_index;
141 tbl->it_blocksize = 1; 129 tbl->it_blocksize = 1;
142 tbl->it_type = TCE_PCI; 130 tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
143 131
144 kfree(parms); 132 kfree(parms);
145} 133}
146 134
147 135
136#ifdef CONFIG_PCI
137/*
138 * This function compares the known tables to find an iommu_table
139 * that has already been built for hardware TCEs.
140 */
141static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
142{
143 struct pci_dn *pdn;
144
145 list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
146 struct iommu_table *it = pdn->iommu_table;
147 if ((it != NULL) &&
148 (it->it_type == TCE_PCI) &&
149 (it->it_offset == tbl->it_offset) &&
150 (it->it_index == tbl->it_index) &&
151 (it->it_size == tbl->it_size))
152 return it;
153 }
154 return NULL;
155}
156
157
148void iommu_devnode_init_iSeries(struct device_node *dn) 158void iommu_devnode_init_iSeries(struct device_node *dn)
149{ 159{
150 struct iommu_table *tbl; 160 struct iommu_table *tbl;
@@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
152 162
153 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); 163 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
154 164
155 iommu_table_getparms(pdn, tbl); 165 iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
156 166
157 /* Look for existing tce table */ 167 /* Look for existing tce table */
158 pdn->iommu_table = iommu_table_find(tbl); 168 pdn->iommu_table = iommu_table_find(tbl);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index fda712b42168..c5207064977d 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -320,11 +320,11 @@ static void __init iSeries_init_early(void)
320 */ 320 */
321 if (naca.xRamDisk) { 321 if (naca.xRamDisk) {
322 initrd_start = (unsigned long)__va(naca.xRamDisk); 322 initrd_start = (unsigned long)__va(naca.xRamDisk);
323 initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; 323 initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
324 initrd_below_start_ok = 1; // ramdisk in kernel space 324 initrd_below_start_ok = 1; // ramdisk in kernel space
325 ROOT_DEV = Root_RAM0; 325 ROOT_DEV = Root_RAM0;
326 if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize) 326 if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
327 rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024; 327 rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
328 } else 328 } else
329#endif /* CONFIG_BLK_DEV_INITRD */ 329#endif /* CONFIG_BLK_DEV_INITRD */
330 { 330 {
@@ -470,13 +470,14 @@ static void __init build_iSeries_Memory_Map(void)
470 */ 470 */
471 hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); 471 hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
472 hptSizePages = (u32)HvCallHpt_getHptPages(); 472 hptSizePages = (u32)HvCallHpt_getHptPages();
473 hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); 473 hptSizeChunks = hptSizePages >>
474 (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
474 hptLastChunk = hptFirstChunk + hptSizeChunks - 1; 475 hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
475 476
476 printk("HPT absolute addr = %016lx, size = %dK\n", 477 printk("HPT absolute addr = %016lx, size = %dK\n",
477 chunk_to_addr(hptFirstChunk), hptSizeChunks * 256); 478 chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
478 479
479 ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE); 480 ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
480 481
481 /* 482 /*
482 * The actual hashed page table is in the hypervisor, 483 * The actual hashed page table is in the hypervisor,
@@ -629,7 +630,7 @@ static void __init iSeries_fixup_klimit(void)
629 */ 630 */
630 if (naca.xRamDisk) 631 if (naca.xRamDisk)
631 klimit = KERNELBASE + (u64)naca.xRamDisk + 632 klimit = KERNELBASE + (u64)naca.xRamDisk +
632 (naca.xRamDiskSize * PAGE_SIZE); 633 (naca.xRamDiskSize * HW_PAGE_SIZE);
633 else { 634 else {
634 /* 635 /*
635 * No ram disk was included - check and see if there 636 * No ram disk was included - check and see if there
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index c27a66876c2c..384360ee06ec 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table;
30 30
31static void __init iommu_vio_init(void) 31static void __init iommu_vio_init(void)
32{ 32{
33 struct iommu_table *t; 33 iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
34 struct iommu_table_cb cb; 34 veth_iommu_table.it_size /= 2;
35 unsigned long cbp; 35 vio_iommu_table = veth_iommu_table;
36 unsigned long itc_entries; 36 vio_iommu_table.it_offset += veth_iommu_table.it_size;
37 37
38 cb.itc_busno = 255; /* Bus 255 is the virtual bus */ 38 if (!iommu_init_table(&veth_iommu_table))
39 cb.itc_virtbus = 0xff; /* Ask for virtual bus */
40
41 cbp = virt_to_abs(&cb);
42 HvCallXm_getTceTableParms(cbp);
43
44 itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
45 veth_iommu_table.it_size = itc_entries / 2;
46 veth_iommu_table.it_busno = cb.itc_busno;
47 veth_iommu_table.it_offset = cb.itc_offset;
48 veth_iommu_table.it_index = cb.itc_index;
49 veth_iommu_table.it_type = TCE_VB;
50 veth_iommu_table.it_blocksize = 1;
51
52 t = iommu_init_table(&veth_iommu_table);
53
54 if (!t)
55 printk("Virtual Bus VETH TCE table failed.\n"); 39 printk("Virtual Bus VETH TCE table failed.\n");
56 40 if (!iommu_init_table(&vio_iommu_table))
57 vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
58 vio_iommu_table.it_busno = cb.itc_busno;
59 vio_iommu_table.it_offset = cb.itc_offset +
60 veth_iommu_table.it_size;
61 vio_iommu_table.it_index = cb.itc_index;
62 vio_iommu_table.it_type = TCE_VB;
63 vio_iommu_table.it_blocksize = 1;
64
65 t = iommu_init_table(&vio_iommu_table);
66
67 if (!t)
68 printk("Virtual Bus VIO TCE table failed.\n"); 41 printk("Virtual Bus VIO TCE table failed.\n");
69} 42}
70 43
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index fe97bfbf7463..842672695598 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock);
68 * For each kind of event we allocate a buffer that is 68 * For each kind of event we allocate a buffer that is
69 * guaranteed not to cross a page boundary 69 * guaranteed not to cross a page boundary
70 */ 70 */
71static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned; 71static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
72 __attribute__((__aligned__(4096)));
72static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; 73static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
73static int event_buffer_initialised; 74static int event_buffer_initialised;
74 75
@@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v)
116 HvLpEvent_Rc hvrc; 117 HvLpEvent_Rc hvrc;
117 DECLARE_MUTEX_LOCKED(Semaphore); 118 DECLARE_MUTEX_LOCKED(Semaphore);
118 119
119 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 120 buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
120 if (!buf) 121 if (!buf)
121 return 0; 122 return 0;
122 memset(buf, 0, PAGE_SIZE); 123 memset(buf, 0, HW_PAGE_SIZE);
123 124
124 handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE, 125 handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
125 DMA_FROM_DEVICE); 126 DMA_FROM_DEVICE);
126 127
127 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, 128 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
@@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
131 viopath_sourceinst(viopath_hostLp), 132 viopath_sourceinst(viopath_hostLp),
132 viopath_targetinst(viopath_hostLp), 133 viopath_targetinst(viopath_hostLp),
133 (u64)(unsigned long)&Semaphore, VIOVERSION << 16, 134 (u64)(unsigned long)&Semaphore, VIOVERSION << 16,
134 ((u64)handle) << 32, PAGE_SIZE, 0, 0); 135 ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
135 136
136 if (hvrc != HvLpEvent_Rc_Good) 137 if (hvrc != HvLpEvent_Rc_Good)
137 printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc); 138 printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
@@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
140 141
141 vlanMap = HvLpConfig_getVirtualLanIndexMap(); 142 vlanMap = HvLpConfig_getVirtualLanIndexMap();
142 143
143 buf[PAGE_SIZE-1] = '\0'; 144 buf[HW_PAGE_SIZE-1] = '\0';
144 seq_printf(m, "%s", buf); 145 seq_printf(m, "%s", buf);
145 seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); 146 seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
146 seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", 147 seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
@@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v)
152 e2a(xItExtVpdPanel.systemSerial[4]), 153 e2a(xItExtVpdPanel.systemSerial[4]),
153 e2a(xItExtVpdPanel.systemSerial[5])); 154 e2a(xItExtVpdPanel.systemSerial[5]));
154 155
155 dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE); 156 dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
157 DMA_FROM_DEVICE);
156 kfree(buf); 158 kfree(buf);
157 159
158 return 0; 160 return 0;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e384a5a91796..ab0c6dd6ec94 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,7 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#define DEBUG 22#undef DEBUG_LOW
23 23
24#include <linux/config.h> 24#include <linux/config.h>
25#include <linux/kernel.h> 25#include <linux/kernel.h>
@@ -41,10 +41,10 @@
41 41
42#include "plpar_wrappers.h" 42#include "plpar_wrappers.h"
43 43
44#ifdef DEBUG 44#ifdef DEBUG_LOW
45#define DBG(fmt...) udbg_printf(fmt) 45#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
46#else 46#else
47#define DBG(fmt...) 47#define DBG_LOW(fmt...) do { } while(0)
48#endif 48#endif
49 49
50/* in pSeries_hvCall.S */ 50/* in pSeries_hvCall.S */
@@ -276,8 +276,9 @@ void vpa_init(int cpu)
276} 276}
277 277
278long pSeries_lpar_hpte_insert(unsigned long hpte_group, 278long pSeries_lpar_hpte_insert(unsigned long hpte_group,
279 unsigned long va, unsigned long prpn, 279 unsigned long va, unsigned long pa,
280 unsigned long vflags, unsigned long rflags) 280 unsigned long rflags, unsigned long vflags,
281 int psize)
281{ 282{
282 unsigned long lpar_rc; 283 unsigned long lpar_rc;
283 unsigned long flags; 284 unsigned long flags;
@@ -285,11 +286,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
285 unsigned long hpte_v, hpte_r; 286 unsigned long hpte_v, hpte_r;
286 unsigned long dummy0, dummy1; 287 unsigned long dummy0, dummy1;
287 288
288 hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; 289 if (!(vflags & HPTE_V_BOLTED))
289 if (vflags & HPTE_V_LARGE) 290 DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
290 hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); 291 "rflags=%lx, vflags=%lx, psize=%d)\n",
291 292 hpte_group, va, pa, rflags, vflags, psize);
292 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; 293
294 hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
295 hpte_r = hpte_encode_r(pa, psize) | rflags;
296
297 if (!(vflags & HPTE_V_BOLTED))
298 DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
299
300#if 1
301 {
302 int i;
303 for (i=0;i<8;i++) {
304 unsigned long w0, w1;
305 plpar_pte_read(0, hpte_group, &w0, &w1);
306 BUG_ON (HPTE_V_COMPARE(hpte_v, w0)
307 && (w0 & HPTE_V_VALID));
308 }
309 }
310#endif
293 311
294 /* Now fill in the actual HPTE */ 312 /* Now fill in the actual HPTE */
295 /* Set CEC cookie to 0 */ 313 /* Set CEC cookie to 0 */
@@ -299,23 +317,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
299 /* Exact = 0 */ 317 /* Exact = 0 */
300 flags = 0; 318 flags = 0;
301 319
302 /* XXX why is this here? - Anton */ 320 /* Make pHyp happy */
303 if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) 321 if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
304 hpte_r &= ~_PAGE_COHERENT; 322 hpte_r &= ~_PAGE_COHERENT;
305 323
306 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, 324 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
307 hpte_r, &slot, &dummy0, &dummy1); 325 hpte_r, &slot, &dummy0, &dummy1);
308 326 if (unlikely(lpar_rc == H_PTEG_Full)) {
309 if (unlikely(lpar_rc == H_PTEG_Full)) 327 if (!(vflags & HPTE_V_BOLTED))
328 DBG_LOW(" full\n");
310 return -1; 329 return -1;
330 }
311 331
312 /* 332 /*
313 * Since we try and ioremap PHBs we don't own, the pte insert 333 * Since we try and ioremap PHBs we don't own, the pte insert
314 * will fail. However we must catch the failure in hash_page 334 * will fail. However we must catch the failure in hash_page
315 * or we will loop forever, so return -2 in this case. 335 * or we will loop forever, so return -2 in this case.
316 */ 336 */
317 if (unlikely(lpar_rc != H_Success)) 337 if (unlikely(lpar_rc != H_Success)) {
338 if (!(vflags & HPTE_V_BOLTED))
339 DBG_LOW(" lpar err %d\n", lpar_rc);
318 return -2; 340 return -2;
341 }
342 if (!(vflags & HPTE_V_BOLTED))
343 DBG_LOW(" -> slot: %d\n", slot & 7);
319 344
320 /* Because of iSeries, we have to pass down the secondary 345 /* Because of iSeries, we have to pass down the secondary
321 * bucket bit here as well 346 * bucket bit here as well
@@ -340,10 +365,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
340 /* don't remove a bolted entry */ 365 /* don't remove a bolted entry */
341 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, 366 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
342 (0x1UL << 4), &dummy1, &dummy2); 367 (0x1UL << 4), &dummy1, &dummy2);
343
344 if (lpar_rc == H_Success) 368 if (lpar_rc == H_Success)
345 return i; 369 return i;
346
347 BUG_ON(lpar_rc != H_Not_Found); 370 BUG_ON(lpar_rc != H_Not_Found);
348 371
349 slot_offset++; 372 slot_offset++;
@@ -371,20 +394,28 @@ static void pSeries_lpar_hptab_clear(void)
371 * We can probably optimize here and assume the high bits of newpp are 394 * We can probably optimize here and assume the high bits of newpp are
372 * already zero. For now I am paranoid. 395 * already zero. For now I am paranoid.
373 */ 396 */
374static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, 397static long pSeries_lpar_hpte_updatepp(unsigned long slot,
375 unsigned long va, int large, int local) 398 unsigned long newpp,
399 unsigned long va,
400 int psize, int local)
376{ 401{
377 unsigned long lpar_rc; 402 unsigned long lpar_rc;
378 unsigned long flags = (newpp & 7) | H_AVPN; 403 unsigned long flags = (newpp & 7) | H_AVPN;
379 unsigned long avpn = va >> 23; 404 unsigned long want_v;
380 405
381 if (large) 406 want_v = hpte_encode_v(va, psize);
382 avpn &= ~0x1UL;
383 407
384 lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); 408 DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
409 want_v & HPTE_V_AVPN, slot, flags, psize);
385 410
386 if (lpar_rc == H_Not_Found) 411 lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN);
412
413 if (lpar_rc == H_Not_Found) {
414 DBG_LOW("not found !\n");
387 return -1; 415 return -1;
416 }
417
418 DBG_LOW("ok\n");
388 419
389 BUG_ON(lpar_rc != H_Success); 420 BUG_ON(lpar_rc != H_Success);
390 421
@@ -410,21 +441,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
410 return dword0; 441 return dword0;
411} 442}
412 443
413static long pSeries_lpar_hpte_find(unsigned long vpn) 444static long pSeries_lpar_hpte_find(unsigned long va, int psize)
414{ 445{
415 unsigned long hash; 446 unsigned long hash;
416 unsigned long i, j; 447 unsigned long i, j;
417 long slot; 448 long slot;
418 unsigned long hpte_v; 449 unsigned long want_v, hpte_v;
419 450
420 hash = hpt_hash(vpn, 0); 451 hash = hpt_hash(va, mmu_psize_defs[psize].shift);
452 want_v = hpte_encode_v(va, psize);
421 453
422 for (j = 0; j < 2; j++) { 454 for (j = 0; j < 2; j++) {
423 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 455 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
424 for (i = 0; i < HPTES_PER_GROUP; i++) { 456 for (i = 0; i < HPTES_PER_GROUP; i++) {
425 hpte_v = pSeries_lpar_hpte_getword0(slot); 457 hpte_v = pSeries_lpar_hpte_getword0(slot);
426 458
427 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) 459 if (HPTE_V_COMPARE(hpte_v, want_v)
428 && (hpte_v & HPTE_V_VALID) 460 && (hpte_v & HPTE_V_VALID)
429 && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { 461 && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
430 /* HPTE matches */ 462 /* HPTE matches */
@@ -441,17 +473,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
441} 473}
442 474
443static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, 475static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
444 unsigned long ea) 476 unsigned long ea,
477 int psize)
445{ 478{
446 unsigned long lpar_rc; 479 unsigned long lpar_rc, slot, vsid, va, flags;
447 unsigned long vsid, va, vpn, flags;
448 long slot;
449 480
450 vsid = get_kernel_vsid(ea); 481 vsid = get_kernel_vsid(ea);
451 va = (vsid << 28) | (ea & 0x0fffffff); 482 va = (vsid << 28) | (ea & 0x0fffffff);
452 vpn = va >> PAGE_SHIFT;
453 483
454 slot = pSeries_lpar_hpte_find(vpn); 484 slot = pSeries_lpar_hpte_find(va, psize);
455 BUG_ON(slot == -1); 485 BUG_ON(slot == -1);
456 486
457 flags = newpp & 7; 487 flags = newpp & 7;
@@ -461,18 +491,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
461} 491}
462 492
463static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, 493static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
464 int large, int local) 494 int psize, int local)
465{ 495{
466 unsigned long avpn = va >> 23; 496 unsigned long want_v;
467 unsigned long lpar_rc; 497 unsigned long lpar_rc;
468 unsigned long dummy1, dummy2; 498 unsigned long dummy1, dummy2;
469 499
470 if (large) 500 DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d",
471 avpn &= ~0x1UL; 501 slot, va, psize, local);
472
473 lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
474 &dummy2);
475 502
503 want_v = hpte_encode_v(va, psize);
504 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN,
505 &dummy1, &dummy2);
476 if (lpar_rc == H_Not_Found) 506 if (lpar_rc == H_Not_Found)
477 return; 507 return;
478 508
@@ -494,7 +524,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local)
494 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 524 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
495 525
496 for (i = 0; i < number; i++) 526 for (i = 0; i < number; i++)
497 flush_hash_page(batch->vaddr[i], batch->pte[i], local); 527 flush_hash_page(batch->vaddr[i], batch->pte[i],
528 batch->psize, local);
498 529
499 if (lock_tlbie) 530 if (lock_tlbie)
500 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 531 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index b987164fca4c..2130cc315957 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -47,6 +47,10 @@ config ARCH_MAY_HAVE_PC_FDC
47 bool 47 bool
48 default y 48 default y
49 49
50config PPC_STD_MMU
51 bool
52 default y
53
50# We optimistically allocate largepages from the VM, so make the limit 54# We optimistically allocate largepages from the VM, so make the limit
51# large enough (16MB). This badly named config option is actually 55# large enough (16MB). This badly named config option is actually
52# max order + 1 56# max order + 1
@@ -294,6 +298,15 @@ config NODES_SPAN_OTHER_NODES
294 def_bool y 298 def_bool y
295 depends on NEED_MULTIPLE_NODES 299 depends on NEED_MULTIPLE_NODES
296 300
301config PPC_64K_PAGES
302 bool "64k page size"
303 help
304 This option changes the kernel logical page size to 64k. On machines
305 without processor support for 64k pages, the kernel will simulate
306 them by loading each individual 4k page on demand transparently,
307 while on hardware with such support, it will be used to map
308 normal application pages.
309
297config SCHED_SMT 310config SCHED_SMT
298 bool "SMT (Hyperthreading) scheduler support" 311 bool "SMT (Hyperthreading) scheduler support"
299 depends on SMP 312 depends on SMP
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
index 504dee836d29..bce9065da6cb 100644
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -93,6 +93,9 @@ int main(void)
93 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 93 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
94 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 94 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
95 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 95 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
96#ifdef CONFIG_PPC_64K_PAGES
97 DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
98#endif
96#ifdef CONFIG_HUGETLB_PAGE 99#ifdef CONFIG_HUGETLB_PAGE
97 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); 100 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
98 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); 101 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index db1cf397be2d..9e8050ea1225 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -195,11 +195,11 @@ exception_marker:
195#define EX_R12 24 195#define EX_R12 24
196#define EX_R13 32 196#define EX_R13 32
197#define EX_SRR0 40 197#define EX_SRR0 40
198#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
199#define EX_DAR 48 198#define EX_DAR 48
200#define EX_LR 48 /* SLB miss saves LR, but not DAR */
201#define EX_DSISR 56 199#define EX_DSISR 56
202#define EX_CCR 60 200#define EX_CCR 60
201#define EX_R3 64
202#define EX_LR 72
203 203
204#define EXCEPTION_PROLOG_PSERIES(area, label) \ 204#define EXCEPTION_PROLOG_PSERIES(area, label) \
205 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ 205 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +419,22 @@ data_access_slb_pSeries:
419 mtspr SPRN_SPRG1,r13 419 mtspr SPRN_SPRG1,r13
420 RUNLATCH_ON(r13) 420 RUNLATCH_ON(r13)
421 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ 421 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
422 std r3,PACA_EXSLB+EX_R3(r13)
423 mfspr r3,SPRN_DAR
422 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 424 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
425 mfcr r9
426#ifdef __DISABLED__
427 /* Keep that around for when we re-implement dynamic VSIDs */
428 cmpdi r3,0
429 bge slb_miss_user_pseries
430#endif /* __DISABLED__ */
423 std r10,PACA_EXSLB+EX_R10(r13) 431 std r10,PACA_EXSLB+EX_R10(r13)
424 std r11,PACA_EXSLB+EX_R11(r13) 432 std r11,PACA_EXSLB+EX_R11(r13)
425 std r12,PACA_EXSLB+EX_R12(r13) 433 std r12,PACA_EXSLB+EX_R12(r13)
426 std r3,PACA_EXSLB+EX_R3(r13) 434 mfspr r10,SPRN_SPRG1
427 mfspr r9,SPRN_SPRG1 435 std r10,PACA_EXSLB+EX_R13(r13)
428 std r9,PACA_EXSLB+EX_R13(r13)
429 mfcr r9
430 mfspr r12,SPRN_SRR1 /* and SRR1 */ 436 mfspr r12,SPRN_SRR1 /* and SRR1 */
431 mfspr r3,SPRN_DAR 437 b .slb_miss_realmode /* Rel. branch works in real mode */
432 b .do_slb_miss /* Rel. branch works in real mode */
433 438
434 STD_EXCEPTION_PSERIES(0x400, instruction_access) 439 STD_EXCEPTION_PSERIES(0x400, instruction_access)
435 440
@@ -440,17 +445,22 @@ instruction_access_slb_pSeries:
440 mtspr SPRN_SPRG1,r13 445 mtspr SPRN_SPRG1,r13
441 RUNLATCH_ON(r13) 446 RUNLATCH_ON(r13)
442 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ 447 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
448 std r3,PACA_EXSLB+EX_R3(r13)
449 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
443 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 450 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
451 mfcr r9
452#ifdef __DISABLED__
453 /* Keep that around for when we re-implement dynamic VSIDs */
454 cmpdi r3,0
455 bge slb_miss_user_pseries
456#endif /* __DISABLED__ */
444 std r10,PACA_EXSLB+EX_R10(r13) 457 std r10,PACA_EXSLB+EX_R10(r13)
445 std r11,PACA_EXSLB+EX_R11(r13) 458 std r11,PACA_EXSLB+EX_R11(r13)
446 std r12,PACA_EXSLB+EX_R12(r13) 459 std r12,PACA_EXSLB+EX_R12(r13)
447 std r3,PACA_EXSLB+EX_R3(r13) 460 mfspr r10,SPRN_SPRG1
448 mfspr r9,SPRN_SPRG1 461 std r10,PACA_EXSLB+EX_R13(r13)
449 std r9,PACA_EXSLB+EX_R13(r13)
450 mfcr r9
451 mfspr r12,SPRN_SRR1 /* and SRR1 */ 462 mfspr r12,SPRN_SRR1 /* and SRR1 */
452 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 463 b .slb_miss_realmode /* Rel. branch works in real mode */
453 b .do_slb_miss /* Rel. branch works in real mode */
454 464
455 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) 465 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
456 STD_EXCEPTION_PSERIES(0x600, alignment) 466 STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +519,38 @@ _GLOBAL(do_stab_bolted_pSeries)
509 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) 519 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
510 520
511/* 521/*
522 * We have some room here we use that to put
523 * the peries slb miss user trampoline code so it's reasonably
524 * away from slb_miss_user_common to avoid problems with rfid
525 *
526 * This is used for when the SLB miss handler has to go virtual,
527 * which doesn't happen for now anymore but will once we re-implement
528 * dynamic VSIDs for shared page tables
529 */
530#ifdef __DISABLED__
531slb_miss_user_pseries:
532 std r10,PACA_EXGEN+EX_R10(r13)
533 std r11,PACA_EXGEN+EX_R11(r13)
534 std r12,PACA_EXGEN+EX_R12(r13)
535 mfspr r10,SPRG1
536 ld r11,PACA_EXSLB+EX_R9(r13)
537 ld r12,PACA_EXSLB+EX_R3(r13)
538 std r10,PACA_EXGEN+EX_R13(r13)
539 std r11,PACA_EXGEN+EX_R9(r13)
540 std r12,PACA_EXGEN+EX_R3(r13)
541 clrrdi r12,r13,32
542 mfmsr r10
543 mfspr r11,SRR0 /* save SRR0 */
544 ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
545 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
546 mtspr SRR0,r12
547 mfspr r12,SRR1 /* and SRR1 */
548 mtspr SRR1,r10
549 rfid
550 b . /* prevent spec. execution */
551#endif /* __DISABLED__ */
552
553/*
512 * Vectors for the FWNMI option. Share common code. 554 * Vectors for the FWNMI option. Share common code.
513 */ 555 */
514 .globl system_reset_fwnmi 556 .globl system_reset_fwnmi
@@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
559 .globl data_access_slb_iSeries 601 .globl data_access_slb_iSeries
560data_access_slb_iSeries: 602data_access_slb_iSeries:
561 mtspr SPRN_SPRG1,r13 /* save r13 */ 603 mtspr SPRN_SPRG1,r13 /* save r13 */
562 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 604 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
563 std r3,PACA_EXSLB+EX_R3(r13) 605 std r3,PACA_EXSLB+EX_R3(r13)
564 ld r12,PACALPPACA+LPPACASRR1(r13)
565 mfspr r3,SPRN_DAR 606 mfspr r3,SPRN_DAR
566 b .do_slb_miss 607 std r9,PACA_EXSLB+EX_R9(r13)
608 mfcr r9
609#ifdef __DISABLED__
610 cmpdi r3,0
611 bge slb_miss_user_iseries
612#endif
613 std r10,PACA_EXSLB+EX_R10(r13)
614 std r11,PACA_EXSLB+EX_R11(r13)
615 std r12,PACA_EXSLB+EX_R12(r13)
616 mfspr r10,SPRN_SPRG1
617 std r10,PACA_EXSLB+EX_R13(r13)
618 ld r12,PACALPPACA+LPPACASRR1(r13);
619 b .slb_miss_realmode
567 620
568 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) 621 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
569 622
570 .globl instruction_access_slb_iSeries 623 .globl instruction_access_slb_iSeries
571instruction_access_slb_iSeries: 624instruction_access_slb_iSeries:
572 mtspr SPRN_SPRG1,r13 /* save r13 */ 625 mtspr SPRN_SPRG1,r13 /* save r13 */
573 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 626 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
574 std r3,PACA_EXSLB+EX_R3(r13) 627 std r3,PACA_EXSLB+EX_R3(r13)
575 ld r12,PACALPPACA+LPPACASRR1(r13) 628 ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
576 ld r3,PACALPPACA+LPPACASRR0(r13) 629 std r9,PACA_EXSLB+EX_R9(r13)
577 b .do_slb_miss 630 mfcr r9
631#ifdef __DISABLED__
632 cmpdi r3,0
633 bge .slb_miss_user_iseries
634#endif
635 std r10,PACA_EXSLB+EX_R10(r13)
636 std r11,PACA_EXSLB+EX_R11(r13)
637 std r12,PACA_EXSLB+EX_R12(r13)
638 mfspr r10,SPRN_SPRG1
639 std r10,PACA_EXSLB+EX_R13(r13)
640 ld r12,PACALPPACA+LPPACASRR1(r13);
641 b .slb_miss_realmode
642
643#ifdef __DISABLED__
644slb_miss_user_iseries:
645 std r10,PACA_EXGEN+EX_R10(r13)
646 std r11,PACA_EXGEN+EX_R11(r13)
647 std r12,PACA_EXGEN+EX_R12(r13)
648 mfspr r10,SPRG1
649 ld r11,PACA_EXSLB+EX_R9(r13)
650 ld r12,PACA_EXSLB+EX_R3(r13)
651 std r10,PACA_EXGEN+EX_R13(r13)
652 std r11,PACA_EXGEN+EX_R9(r13)
653 std r12,PACA_EXGEN+EX_R3(r13)
654 EXCEPTION_PROLOG_ISERIES_2
655 b slb_miss_user_common
656#endif
578 657
579 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) 658 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
580 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) 659 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +888,126 @@ instruction_access_common:
809 li r5,0x400 888 li r5,0x400
810 b .do_hash_page /* Try to handle as hpte fault */ 889 b .do_hash_page /* Try to handle as hpte fault */
811 890
891/*
892 * Here is the common SLB miss user that is used when going to virtual
893 * mode for SLB misses, that is currently not used
894 */
895#ifdef __DISABLED__
896 .align 7
897 .globl slb_miss_user_common
898slb_miss_user_common:
899 mflr r10
900 std r3,PACA_EXGEN+EX_DAR(r13)
901 stw r9,PACA_EXGEN+EX_CCR(r13)
902 std r10,PACA_EXGEN+EX_LR(r13)
903 std r11,PACA_EXGEN+EX_SRR0(r13)
904 bl .slb_allocate_user
905
906 ld r10,PACA_EXGEN+EX_LR(r13)
907 ld r3,PACA_EXGEN+EX_R3(r13)
908 lwz r9,PACA_EXGEN+EX_CCR(r13)
909 ld r11,PACA_EXGEN+EX_SRR0(r13)
910 mtlr r10
911 beq- slb_miss_fault
912
913 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
914 beq- unrecov_user_slb
915 mfmsr r10
916
917.machine push
918.machine "power4"
919 mtcrf 0x80,r9
920.machine pop
921
922 clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
923 mtmsrd r10,1
924
925 mtspr SRR0,r11
926 mtspr SRR1,r12
927
928 ld r9,PACA_EXGEN+EX_R9(r13)
929 ld r10,PACA_EXGEN+EX_R10(r13)
930 ld r11,PACA_EXGEN+EX_R11(r13)
931 ld r12,PACA_EXGEN+EX_R12(r13)
932 ld r13,PACA_EXGEN+EX_R13(r13)
933 rfid
934 b .
935
936slb_miss_fault:
937 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
938 ld r4,PACA_EXGEN+EX_DAR(r13)
939 li r5,0
940 std r4,_DAR(r1)
941 std r5,_DSISR(r1)
942 b .handle_page_fault
943
944unrecov_user_slb:
945 EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
946 DISABLE_INTS
947 bl .save_nvgprs
9481: addi r3,r1,STACK_FRAME_OVERHEAD
949 bl .unrecoverable_exception
950 b 1b
951
952#endif /* __DISABLED__ */
953
954
955/*
956 * r13 points to the PACA, r9 contains the saved CR,
957 * r12 contain the saved SRR1, SRR0 is still ready for return
958 * r3 has the faulting address
959 * r9 - r13 are saved in paca->exslb.
960 * r3 is saved in paca->slb_r3
961 * We assume we aren't going to take any exceptions during this procedure.
962 */
963_GLOBAL(slb_miss_realmode)
964 mflr r10
965
966 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
967 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
968
969 bl .slb_allocate_realmode
970
971 /* All done -- return from exception. */
972
973 ld r10,PACA_EXSLB+EX_LR(r13)
974 ld r3,PACA_EXSLB+EX_R3(r13)
975 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
976#ifdef CONFIG_PPC_ISERIES
977 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
978#endif /* CONFIG_PPC_ISERIES */
979
980 mtlr r10
981
982 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
983 beq- unrecov_slb
984
985.machine push
986.machine "power4"
987 mtcrf 0x80,r9
988 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
989.machine pop
990
991#ifdef CONFIG_PPC_ISERIES
992 mtspr SPRN_SRR0,r11
993 mtspr SPRN_SRR1,r12
994#endif /* CONFIG_PPC_ISERIES */
995 ld r9,PACA_EXSLB+EX_R9(r13)
996 ld r10,PACA_EXSLB+EX_R10(r13)
997 ld r11,PACA_EXSLB+EX_R11(r13)
998 ld r12,PACA_EXSLB+EX_R12(r13)
999 ld r13,PACA_EXSLB+EX_R13(r13)
1000 rfid
1001 b . /* prevent speculative execution */
1002
1003unrecov_slb:
1004 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1005 DISABLE_INTS
1006 bl .save_nvgprs
10071: addi r3,r1,STACK_FRAME_OVERHEAD
1008 bl .unrecoverable_exception
1009 b 1b
1010
812 .align 7 1011 .align 7
813 .globl hardware_interrupt_common 1012 .globl hardware_interrupt_common
814 .globl hardware_interrupt_entry 1013 .globl hardware_interrupt_entry
@@ -1139,62 +1338,6 @@ _GLOBAL(do_stab_bolted)
1139 b . /* prevent speculative execution */ 1338 b . /* prevent speculative execution */
1140 1339
1141/* 1340/*
1142 * r13 points to the PACA, r9 contains the saved CR,
1143 * r11 and r12 contain the saved SRR0 and SRR1.
1144 * r3 has the faulting address
1145 * r9 - r13 are saved in paca->exslb.
1146 * r3 is saved in paca->slb_r3
1147 * We assume we aren't going to take any exceptions during this procedure.
1148 */
1149_GLOBAL(do_slb_miss)
1150 mflr r10
1151
1152 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
1153 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
1154
1155 bl .slb_allocate /* handle it */
1156
1157 /* All done -- return from exception. */
1158
1159 ld r10,PACA_EXSLB+EX_LR(r13)
1160 ld r3,PACA_EXSLB+EX_R3(r13)
1161 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
1162#ifdef CONFIG_PPC_ISERIES
1163 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
1164#endif /* CONFIG_PPC_ISERIES */
1165
1166 mtlr r10
1167
1168 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
1169 beq- unrecov_slb
1170
1171.machine push
1172.machine "power4"
1173 mtcrf 0x80,r9
1174 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
1175.machine pop
1176
1177#ifdef CONFIG_PPC_ISERIES
1178 mtspr SPRN_SRR0,r11
1179 mtspr SPRN_SRR1,r12
1180#endif /* CONFIG_PPC_ISERIES */
1181 ld r9,PACA_EXSLB+EX_R9(r13)
1182 ld r10,PACA_EXSLB+EX_R10(r13)
1183 ld r11,PACA_EXSLB+EX_R11(r13)
1184 ld r12,PACA_EXSLB+EX_R12(r13)
1185 ld r13,PACA_EXSLB+EX_R13(r13)
1186 rfid
1187 b . /* prevent speculative execution */
1188
1189unrecov_slb:
1190 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1191 DISABLE_INTS
1192 bl .save_nvgprs
11931: addi r3,r1,STACK_FRAME_OVERHEAD
1194 bl .unrecoverable_exception
1195 b 1b
1196
1197/*
1198 * Space for CPU0's segment table. 1341 * Space for CPU0's segment table.
1199 * 1342 *
1200 * On iSeries, the hypervisor must fill in at least one entry before 1343 * On iSeries, the hypervisor must fill in at least one entry before
@@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start)
1569#endif 1712#endif
1570 /* Initialize the first segment table (or SLB) entry */ 1713 /* Initialize the first segment table (or SLB) entry */
1571 ld r3,PACASTABVIRT(r13) /* get addr of segment table */ 1714 ld r3,PACASTABVIRT(r13) /* get addr of segment table */
1715BEGIN_FTR_SECTION
1572 bl .stab_initialize 1716 bl .stab_initialize
1717END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
1718 bl .slb_initialize
1573 1719
1574 /* Initialize the kernel stack. Just a repeat for iSeries. */ 1720 /* Initialize the kernel stack. Just a repeat for iSeries. */
1575 LOADADDR(r3,current_set) 1721 LOADADDR(r3,current_set)
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c
index 5e27e5a6a35d..3133c72b28ec 100644
--- a/arch/ppc64/kernel/pacaData.c
+++ b/arch/ppc64/kernel/pacaData.c
@@ -23,7 +23,7 @@
23static union { 23static union {
24 struct systemcfg data; 24 struct systemcfg data;
25 u8 page[PAGE_SIZE]; 25 u8 page[PAGE_SIZE];
26} systemcfg_store __page_aligned; 26} systemcfg_store __attribute__((__section__(".data.page.aligned")));
27struct systemcfg *systemcfg = &systemcfg_store.data; 27struct systemcfg *systemcfg = &systemcfg_store.data;
28EXPORT_SYMBOL(systemcfg); 28EXPORT_SYMBOL(systemcfg);
29 29
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index 97bfceb5353b..dece31e58bc4 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -635,10 +635,10 @@ static inline char *find_flat_dt_string(u32 offset)
635 * used to extract the memory informations at boot before we can 635 * used to extract the memory informations at boot before we can
636 * unflatten the tree 636 * unflatten the tree
637 */ 637 */
638static int __init scan_flat_dt(int (*it)(unsigned long node, 638int __init of_scan_flat_dt(int (*it)(unsigned long node,
639 const char *uname, int depth, 639 const char *uname, int depth,
640 void *data), 640 void *data),
641 void *data) 641 void *data)
642{ 642{
643 unsigned long p = ((unsigned long)initial_boot_params) + 643 unsigned long p = ((unsigned long)initial_boot_params) +
644 initial_boot_params->off_dt_struct; 644 initial_boot_params->off_dt_struct;
@@ -695,8 +695,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
695 * This function can be used within scan_flattened_dt callback to get 695 * This function can be used within scan_flattened_dt callback to get
696 * access to properties 696 * access to properties
697 */ 697 */
698static void* __init get_flat_dt_prop(unsigned long node, const char *name, 698void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
699 unsigned long *size) 699 unsigned long *size)
700{ 700{
701 unsigned long p = node; 701 unsigned long p = node;
702 702
@@ -996,7 +996,7 @@ void __init unflatten_device_tree(void)
996static int __init early_init_dt_scan_cpus(unsigned long node, 996static int __init early_init_dt_scan_cpus(unsigned long node,
997 const char *uname, int depth, void *data) 997 const char *uname, int depth, void *data)
998{ 998{
999 char *type = get_flat_dt_prop(node, "device_type", NULL); 999 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
1000 u32 *prop; 1000 u32 *prop;
1001 unsigned long size; 1001 unsigned long size;
1002 1002
@@ -1004,17 +1004,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1004 if (type == NULL || strcmp(type, "cpu") != 0) 1004 if (type == NULL || strcmp(type, "cpu") != 0)
1005 return 0; 1005 return 0;
1006 1006
1007 /* On LPAR, look for the first ibm,pft-size property for the hash table size
1008 */
1009 if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
1010 u32 *pft_size;
1011 pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL);
1012 if (pft_size != NULL) {
1013 /* pft_size[0] is the NUMA CEC cookie */
1014 ppc64_pft_size = pft_size[1];
1015 }
1016 }
1017
1018 if (initial_boot_params && initial_boot_params->version >= 2) { 1007 if (initial_boot_params && initial_boot_params->version >= 2) {
1019 /* version 2 of the kexec param format adds the phys cpuid 1008 /* version 2 of the kexec param format adds the phys cpuid
1020 * of booted proc. 1009 * of booted proc.
@@ -1023,8 +1012,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1023 boot_cpuid = 0; 1012 boot_cpuid = 0;
1024 } else { 1013 } else {
1025 /* Check if it's the boot-cpu, set it's hw index in paca now */ 1014 /* Check if it's the boot-cpu, set it's hw index in paca now */
1026 if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { 1015 if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL)
1027 u32 *prop = get_flat_dt_prop(node, "reg", NULL); 1016 != NULL) {
1017 u32 *prop = of_get_flat_dt_prop(node, "reg", NULL);
1028 set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop); 1018 set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop);
1029 boot_cpuid_phys = get_hard_smp_processor_id(0); 1019 boot_cpuid_phys = get_hard_smp_processor_id(0);
1030 } 1020 }
@@ -1032,14 +1022,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1032 1022
1033#ifdef CONFIG_ALTIVEC 1023#ifdef CONFIG_ALTIVEC
1034 /* Check if we have a VMX and eventually update CPU features */ 1024 /* Check if we have a VMX and eventually update CPU features */
1035 prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL); 1025 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
1036 if (prop && (*prop) > 0) { 1026 if (prop && (*prop) > 0) {
1037 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; 1027 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
1038 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; 1028 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
1039 } 1029 }
1040 1030
1041 /* Same goes for Apple's "altivec" property */ 1031 /* Same goes for Apple's "altivec" property */
1042 prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); 1032 prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
1043 if (prop) { 1033 if (prop) {
1044 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; 1034 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
1045 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; 1035 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1051,7 +1041,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1051 * this by looking at the size of the ibm,ppc-interrupt-server#s 1041 * this by looking at the size of the ibm,ppc-interrupt-server#s
1052 * property 1042 * property
1053 */ 1043 */
1054 prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", 1044 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
1055 &size); 1045 &size);
1056 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; 1046 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
1057 if (prop && ((size / sizeof(u32)) > 1)) 1047 if (prop && ((size / sizeof(u32)) > 1))
@@ -1072,26 +1062,26 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1072 return 0; 1062 return 0;
1073 1063
1074 /* get platform type */ 1064 /* get platform type */
1075 prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); 1065 prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
1076 if (prop == NULL) 1066 if (prop == NULL)
1077 return 0; 1067 return 0;
1078 systemcfg->platform = *prop; 1068 systemcfg->platform = *prop;
1079 1069
1080 /* check if iommu is forced on or off */ 1070 /* check if iommu is forced on or off */
1081 if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) 1071 if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
1082 iommu_is_off = 1; 1072 iommu_is_off = 1;
1083 if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) 1073 if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
1084 iommu_force_on = 1; 1074 iommu_force_on = 1;
1085 1075
1086 prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL); 1076 prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
1087 if (prop64) 1077 if (prop64)
1088 memory_limit = *prop64; 1078 memory_limit = *prop64;
1089 1079
1090 prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); 1080 prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL);
1091 if (prop64) 1081 if (prop64)
1092 tce_alloc_start = *prop64; 1082 tce_alloc_start = *prop64;
1093 1083
1094 prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); 1084 prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
1095 if (prop64) 1085 if (prop64)
1096 tce_alloc_end = *prop64; 1086 tce_alloc_end = *prop64;
1097 1087
@@ -1102,9 +1092,12 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1102 { 1092 {
1103 u64 *basep, *entryp; 1093 u64 *basep, *entryp;
1104 1094
1105 basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL); 1095 basep = (u64*)of_get_flat_dt_prop(node,
1106 entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL); 1096 "linux,rtas-base", NULL);
1107 prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL); 1097 entryp = (u64*)of_get_flat_dt_prop(node,
1098 "linux,rtas-entry", NULL);
1099 prop = (u32*)of_get_flat_dt_prop(node,
1100 "linux,rtas-size", NULL);
1108 if (basep && entryp && prop) { 1101 if (basep && entryp && prop) {
1109 rtas.base = *basep; 1102 rtas.base = *basep;
1110 rtas.entry = *entryp; 1103 rtas.entry = *entryp;
@@ -1125,11 +1118,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
1125 if (depth != 0) 1118 if (depth != 0)
1126 return 0; 1119 return 0;
1127 1120
1128 prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); 1121 prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL);
1129 dt_root_size_cells = (prop == NULL) ? 1 : *prop; 1122 dt_root_size_cells = (prop == NULL) ? 1 : *prop;
1130 DBG("dt_root_size_cells = %x\n", dt_root_size_cells); 1123 DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
1131 1124
1132 prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); 1125 prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL);
1133 dt_root_addr_cells = (prop == NULL) ? 2 : *prop; 1126 dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
1134 DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); 1127 DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
1135 1128
@@ -1161,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
1161static int __init early_init_dt_scan_memory(unsigned long node, 1154static int __init early_init_dt_scan_memory(unsigned long node,
1162 const char *uname, int depth, void *data) 1155 const char *uname, int depth, void *data)
1163{ 1156{
1164 char *type = get_flat_dt_prop(node, "device_type", NULL); 1157 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
1165 cell_t *reg, *endp; 1158 cell_t *reg, *endp;
1166 unsigned long l; 1159 unsigned long l;
1167 1160
@@ -1169,7 +1162,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
1169 if (type == NULL || strcmp(type, "memory") != 0) 1162 if (type == NULL || strcmp(type, "memory") != 0)
1170 return 0; 1163 return 0;
1171 1164
1172 reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); 1165 reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
1173 if (reg == NULL) 1166 if (reg == NULL)
1174 return 0; 1167 return 0;
1175 1168
@@ -1225,19 +1218,16 @@ void __init early_init_devtree(void *params)
1225 /* Setup flat device-tree pointer */ 1218 /* Setup flat device-tree pointer */
1226 initial_boot_params = params; 1219 initial_boot_params = params;
1227 1220
1228 /* By default, hash size is not set */
1229 ppc64_pft_size = 0;
1230
1231 /* Retreive various informations from the /chosen node of the 1221 /* Retreive various informations from the /chosen node of the
1232 * device-tree, including the platform type, initrd location and 1222 * device-tree, including the platform type, initrd location and
1233 * size, TCE reserve, and more ... 1223 * size, TCE reserve, and more ...
1234 */ 1224 */
1235 scan_flat_dt(early_init_dt_scan_chosen, NULL); 1225 of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
1236 1226
1237 /* Scan memory nodes and rebuild LMBs */ 1227 /* Scan memory nodes and rebuild LMBs */
1238 lmb_init(); 1228 lmb_init();
1239 scan_flat_dt(early_init_dt_scan_root, NULL); 1229 of_scan_flat_dt(early_init_dt_scan_root, NULL);
1240 scan_flat_dt(early_init_dt_scan_memory, NULL); 1230 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
1241 lmb_enforce_memory_limit(memory_limit); 1231 lmb_enforce_memory_limit(memory_limit);
1242 lmb_analyze(); 1232 lmb_analyze();
1243 systemcfg->physicalMemorySize = lmb_phys_mem_size(); 1233 systemcfg->physicalMemorySize = lmb_phys_mem_size();
@@ -1253,26 +1243,8 @@ void __init early_init_devtree(void *params)
1253 /* Retreive hash table size from flattened tree plus other 1243 /* Retreive hash table size from flattened tree plus other
1254 * CPU related informations (altivec support, boot CPU ID, ...) 1244 * CPU related informations (altivec support, boot CPU ID, ...)
1255 */ 1245 */
1256 scan_flat_dt(early_init_dt_scan_cpus, NULL); 1246 of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
1257
1258 /* If hash size wasn't obtained above, we calculate it now based on
1259 * the total RAM size
1260 */
1261 if (ppc64_pft_size == 0) {
1262 unsigned long rnd_mem_size, pteg_count;
1263
1264 /* round mem_size up to next power of 2 */
1265 rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
1266 if (rnd_mem_size < systemcfg->physicalMemorySize)
1267 rnd_mem_size <<= 1;
1268
1269 /* # pages / 2 */
1270 pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
1271
1272 ppc64_pft_size = __ilog2(pteg_count << 7);
1273 }
1274 1247
1275 DBG("Hash pftSize: %x\n", (int)ppc64_pft_size);
1276 DBG(" <- early_init_devtree()\n"); 1248 DBG(" <- early_init_devtree()\n");
1277} 1249}
1278 1250
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index c019501daceb..79a0556a0ab8 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -101,6 +101,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
101#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000020000000000) 101#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000020000000000)
102#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0000040000000000) 102#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0000040000000000)
103#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) 103#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000)
104#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0000100000000000)
104#else 105#else
105/* ensure on 32b processors the flags are available for compiling but 106/* ensure on 32b processors the flags are available for compiling but
106 * don't do anything */ 107 * don't do anything */
@@ -116,6 +117,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
116#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0) 117#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0)
117#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0) 118#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0)
118#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0) 119#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0)
120#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0)
119#endif 121#endif
120 122
121#ifndef __ASSEMBLY__ 123#ifndef __ASSEMBLY__
@@ -339,6 +341,7 @@ enum {
339#ifdef __powerpc64__ 341#ifdef __powerpc64__
340 CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | 342 CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |
341 CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_CELL | 343 CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_CELL |
344 CPU_FTR_CI_LARGE_PAGE |
342#endif 345#endif
343 0, 346 0,
344 347
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 9d91bdd667ae..6a35e6570ccd 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -74,6 +74,11 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn);
74 74
75/* Creates table for an individual device node */ 75/* Creates table for an individual device node */
76extern void iommu_devnode_init_iSeries(struct device_node *dn); 76extern void iommu_devnode_init_iSeries(struct device_node *dn);
77/* Get table parameters from HV */
78extern void iommu_table_getparms_iSeries(unsigned long busno,
79 unsigned char slotno,
80 unsigned char virtbus,
81 struct iommu_table* tbl);
77 82
78#endif /* CONFIG_PPC_ISERIES */ 83#endif /* CONFIG_PPC_ISERIES */
79 84
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 629ca964b974..fa03864d06eb 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -47,20 +47,22 @@ struct machdep_calls {
47#ifdef CONFIG_PPC64 47#ifdef CONFIG_PPC64
48 void (*hpte_invalidate)(unsigned long slot, 48 void (*hpte_invalidate)(unsigned long slot,
49 unsigned long va, 49 unsigned long va,
50 int large, 50 int psize,
51 int local); 51 int local);
52 long (*hpte_updatepp)(unsigned long slot, 52 long (*hpte_updatepp)(unsigned long slot,
53 unsigned long newpp, 53 unsigned long newpp,
54 unsigned long va, 54 unsigned long va,
55 int large, 55 int pize,
56 int local); 56 int local);
57 void (*hpte_updateboltedpp)(unsigned long newpp, 57 void (*hpte_updateboltedpp)(unsigned long newpp,
58 unsigned long ea); 58 unsigned long ea,
59 int psize);
59 long (*hpte_insert)(unsigned long hpte_group, 60 long (*hpte_insert)(unsigned long hpte_group,
60 unsigned long va, 61 unsigned long va,
61 unsigned long prpn, 62 unsigned long prpn,
63 unsigned long rflags,
62 unsigned long vflags, 64 unsigned long vflags,
63 unsigned long rflags); 65 int psize);
64 long (*hpte_remove)(unsigned long hpte_group); 66 long (*hpte_remove)(unsigned long hpte_group);
65 void (*flush_hash_range)(unsigned long number, int local); 67 void (*flush_hash_range)(unsigned long number, int local);
66 68
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 3a0104fa0462..7587bf5f38c6 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -178,6 +178,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
178extern struct device_node *of_node_get(struct device_node *node); 178extern struct device_node *of_node_get(struct device_node *node);
179extern void of_node_put(struct device_node *node); 179extern void of_node_put(struct device_node *node);
180 180
181/* For scanning the flat device-tree at boot time */
182int __init of_scan_flat_dt(int (*it)(unsigned long node,
183 const char *uname, int depth,
184 void *data),
185 void *data);
186void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
187 unsigned long *size);
188
181/* For updating the device tree at runtime */ 189/* For updating the device tree at runtime */
182extern void of_attach_node(struct device_node *); 190extern void of_attach_node(struct device_node *);
183extern void of_detach_node(const struct device_node *); 191extern void of_detach_node(const struct device_node *);
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index b5da0b851e02..3536a5cd7a2d 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -289,7 +289,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
289 289
290#ifdef CONFIG_PPC64 290#ifdef CONFIG_PPC64
291static __inline__ unsigned long 291static __inline__ unsigned long
292__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) 292__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
293{ 293{
294 unsigned long prev; 294 unsigned long prev;
295 295
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index ab17db79f69d..e525f49bd179 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -65,23 +65,27 @@ struct thread_info {
65 65
66/* thread information allocation */ 66/* thread information allocation */
67 67
68#ifdef CONFIG_DEBUG_STACK_USAGE
69#define THREAD_INFO_GFP GFP_KERNEL | __GFP_ZERO
70#else
71#define THREAD_INFO_GFP GFP_KERNEL
72#endif
73
74#if THREAD_SHIFT >= PAGE_SHIFT 68#if THREAD_SHIFT >= PAGE_SHIFT
75 69
76#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT) 70#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT)
77 71
72#ifdef CONFIG_DEBUG_STACK_USAGE
78#define alloc_thread_info(tsk) \ 73#define alloc_thread_info(tsk) \
79 ((struct thread_info *)__get_free_pages(THREAD_INFO_GFP, THREAD_ORDER)) 74 ((struct thread_info *)__get_free_pages(GFP_KERNEL | \
75 __GFP_ZERO, THREAD_ORDER))
76#else
77#define alloc_thread_info(tsk) \
78 ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
79#endif
80#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER) 80#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER)
81 81
82#else /* THREAD_SHIFT < PAGE_SHIFT */ 82#else /* THREAD_SHIFT < PAGE_SHIFT */
83 83
84#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, THREAD_INFO_GFP) 84#ifdef CONFIG_DEBUG_STACK_USAGE
85#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
86#else
87#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
88#endif
85#define free_thread_info(ti) kfree(ti) 89#define free_thread_info(ti) kfree(ti)
86 90
87#endif /* THREAD_SHIFT < PAGE_SHIFT */ 91#endif /* THREAD_SHIFT < PAGE_SHIFT */
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index ca3655672bbc..a2998eee37bb 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -31,9 +31,9 @@ struct mm_struct;
31struct ppc64_tlb_batch { 31struct ppc64_tlb_batch {
32 unsigned long index; 32 unsigned long index;
33 struct mm_struct *mm; 33 struct mm_struct *mm;
34 pte_t pte[PPC64_TLB_BATCH_NR]; 34 real_pte_t pte[PPC64_TLB_BATCH_NR];
35 unsigned long vaddr[PPC64_TLB_BATCH_NR]; 35 unsigned long vaddr[PPC64_TLB_BATCH_NR];
36 unsigned int large; 36 unsigned int psize;
37}; 37};
38DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 38DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
39 39
@@ -48,8 +48,9 @@ static inline void flush_tlb_pending(void)
48 put_cpu_var(ppc64_tlb_batch); 48 put_cpu_var(ppc64_tlb_batch);
49} 49}
50 50
51extern void flush_hash_page(unsigned long va, pte_t pte, int local); 51extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
52void flush_hash_range(unsigned long number, int local); 52 int local);
53extern void flush_hash_range(unsigned long number, int local);
53 54
54#else /* CONFIG_PPC64 */ 55#else /* CONFIG_PPC64 */
55 56
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index e0505acb77d9..4c18a5cb69f5 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -48,13 +48,21 @@ extern char initial_stab[];
48 48
49/* Bits in the SLB VSID word */ 49/* Bits in the SLB VSID word */
50#define SLB_VSID_SHIFT 12 50#define SLB_VSID_SHIFT 12
51#define SLB_VSID_B ASM_CONST(0xc000000000000000)
52#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
53#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
51#define SLB_VSID_KS ASM_CONST(0x0000000000000800) 54#define SLB_VSID_KS ASM_CONST(0x0000000000000800)
52#define SLB_VSID_KP ASM_CONST(0x0000000000000400) 55#define SLB_VSID_KP ASM_CONST(0x0000000000000400)
53#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ 56#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
54#define SLB_VSID_L ASM_CONST(0x0000000000000100) /* largepage */ 57#define SLB_VSID_L ASM_CONST(0x0000000000000100)
55#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ 58#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
56#define SLB_VSID_LS ASM_CONST(0x0000000000000070) /* size of largepage */ 59#define SLB_VSID_LP ASM_CONST(0x0000000000000030)
57 60#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
61#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
62#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
63#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
64#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
65
58#define SLB_VSID_KERNEL (SLB_VSID_KP) 66#define SLB_VSID_KERNEL (SLB_VSID_KP)
59#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) 67#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
60 68
@@ -69,6 +77,7 @@ extern char initial_stab[];
69#define HPTE_V_AVPN_SHIFT 7 77#define HPTE_V_AVPN_SHIFT 7
70#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80) 78#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80)
71#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) 79#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
80#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN))
72#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) 81#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
73#define HPTE_V_LOCK ASM_CONST(0x0000000000000008) 82#define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
74#define HPTE_V_LARGE ASM_CONST(0x0000000000000004) 83#define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
@@ -81,6 +90,7 @@ extern char initial_stab[];
81#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000) 90#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000)
82#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff) 91#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff)
83#define HPTE_R_PP ASM_CONST(0x0000000000000003) 92#define HPTE_R_PP ASM_CONST(0x0000000000000003)
93#define HPTE_R_N ASM_CONST(0x0000000000000004)
84 94
85/* Values for PP (assumes Ks=0, Kp=1) */ 95/* Values for PP (assumes Ks=0, Kp=1) */
86/* pp0 will always be 0 for linux */ 96/* pp0 will always be 0 for linux */
@@ -99,100 +109,120 @@ typedef struct {
99extern hpte_t *htab_address; 109extern hpte_t *htab_address;
100extern unsigned long htab_hash_mask; 110extern unsigned long htab_hash_mask;
101 111
102static inline unsigned long hpt_hash(unsigned long vpn, int large) 112/*
113 * Page size definition
114 *
115 * shift : is the "PAGE_SHIFT" value for that page size
116 * sllp : is a bit mask with the value of SLB L || LP to be or'ed
117 * directly to a slbmte "vsid" value
118 * penc : is the HPTE encoding mask for the "LP" field:
119 *
120 */
121struct mmu_psize_def
103{ 122{
104 unsigned long vsid; 123 unsigned int shift; /* number of bits */
105 unsigned long page; 124 unsigned int penc; /* HPTE encoding */
106 125 unsigned int tlbiel; /* tlbiel supported for that page size */
107 if (large) { 126 unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
108 vsid = vpn >> 4; 127 unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
109 page = vpn & 0xf; 128};
110 } else {
111 vsid = vpn >> 16;
112 page = vpn & 0xffff;
113 }
114 129
115 return (vsid & 0x7fffffffffUL) ^ page; 130#endif /* __ASSEMBLY__ */
116}
117
118static inline void __tlbie(unsigned long va, int large)
119{
120 /* clear top 16 bits, non SLS segment */
121 va &= ~(0xffffULL << 48);
122
123 if (large) {
124 va &= HPAGE_MASK;
125 asm volatile("tlbie %0,1" : : "r"(va) : "memory");
126 } else {
127 va &= PAGE_MASK;
128 asm volatile("tlbie %0,0" : : "r"(va) : "memory");
129 }
130}
131 131
132static inline void tlbie(unsigned long va, int large) 132/*
133{ 133 * The kernel use the constants below to index in the page sizes array.
134 asm volatile("ptesync": : :"memory"); 134 * The use of fixed constants for this purpose is better for performances
135 __tlbie(va, large); 135 * of the low level hash refill handlers.
136 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 136 *
137} 137 * A non supported page size has a "shift" field set to 0
138 *
139 * Any new page size being implemented can get a new entry in here. Whether
140 * the kernel will use it or not is a different matter though. The actual page
141 * size used by hugetlbfs is not defined here and may be made variable
142 */
138 143
139static inline void __tlbiel(unsigned long va) 144#define MMU_PAGE_4K 0 /* 4K */
140{ 145#define MMU_PAGE_64K 1 /* 64K */
141 /* clear top 16 bits, non SLS segment */ 146#define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */
142 va &= ~(0xffffULL << 48); 147#define MMU_PAGE_1M 3 /* 1M */
143 va &= PAGE_MASK; 148#define MMU_PAGE_16M 4 /* 16M */
144 149#define MMU_PAGE_16G 5 /* 16G */
145 /* 150#define MMU_PAGE_COUNT 6
146 * Thanks to Alan Modra we are now able to use machine specific
147 * assembly instructions (like tlbiel) by using the gas -many flag.
148 * However we have to support older toolchains so for the moment
149 * we hardwire it.
150 */
151#if 0
152 asm volatile("tlbiel %0" : : "r"(va) : "memory");
153#else
154 asm volatile(".long 0x7c000224 | (%0 << 11)" : : "r"(va) : "memory");
155#endif
156}
157 151
158static inline void tlbiel(unsigned long va) 152#ifndef __ASSEMBLY__
159{
160 asm volatile("ptesync": : :"memory");
161 __tlbiel(va);
162 asm volatile("ptesync": : :"memory");
163}
164 153
165static inline unsigned long slot2va(unsigned long hpte_v, unsigned long slot) 154/*
166{ 155 * The current system page sizes
167 unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v); 156 */
168 unsigned long va; 157extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
158extern int mmu_linear_psize;
159extern int mmu_virtual_psize;
169 160
170 va = avpn << 23; 161#ifdef CONFIG_HUGETLB_PAGE
162/*
163 * The page size index of the huge pages for use by hugetlbfs
164 */
165extern int mmu_huge_psize;
171 166
172 if (! (hpte_v & HPTE_V_LARGE)) { 167#endif /* CONFIG_HUGETLB_PAGE */
173 unsigned long vpi, pteg;
174 168
175 pteg = slot / HPTES_PER_GROUP; 169/*
176 if (hpte_v & HPTE_V_SECONDARY) 170 * This function sets the AVPN and L fields of the HPTE appropriately
177 pteg = ~pteg; 171 * for the page size
172 */
173static inline unsigned long hpte_encode_v(unsigned long va, int psize)
174{
175 unsigned long v =
176 v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
177 v <<= HPTE_V_AVPN_SHIFT;
178 if (psize != MMU_PAGE_4K)
179 v |= HPTE_V_LARGE;
180 return v;
181}
178 182
179 vpi = ((va >> 28) ^ pteg) & htab_hash_mask; 183/*
184 * This function sets the ARPN, and LP fields of the HPTE appropriately
185 * for the page size. We assume the pa is already "clean" that is properly
186 * aligned for the requested page size
187 */
188static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
189{
190 unsigned long r;
180 191
181 va |= vpi << PAGE_SHIFT; 192 /* A 4K page needs no special encoding */
193 if (psize == MMU_PAGE_4K)
194 return pa & HPTE_R_RPN;
195 else {
196 unsigned int penc = mmu_psize_defs[psize].penc;
197 unsigned int shift = mmu_psize_defs[psize].shift;
198 return (pa & ~((1ul << shift) - 1)) | (penc << 12);
182 } 199 }
183 200 return r;
184 return va;
185} 201}
186 202
187/* 203/*
188 * Handle a fault by adding an HPTE. If the address can't be determined 204 * This hashes a virtual address for a 256Mb segment only for now
189 * to be valid via Linux page tables, return 1. If handled return 0
190 */ 205 */
191extern int __hash_page(unsigned long ea, unsigned long access, 206
192 unsigned long vsid, pte_t *ptep, unsigned long trap, 207static inline unsigned long hpt_hash(unsigned long va, unsigned int shift)
193 int local); 208{
209 return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift);
210}
211
212extern int __hash_page_4K(unsigned long ea, unsigned long access,
213 unsigned long vsid, pte_t *ptep, unsigned long trap,
214 unsigned int local);
215extern int __hash_page_64K(unsigned long ea, unsigned long access,
216 unsigned long vsid, pte_t *ptep, unsigned long trap,
217 unsigned int local);
218struct mm_struct;
219extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
220 unsigned long ea, unsigned long vsid, int local);
194 221
195extern void htab_finish_init(void); 222extern void htab_finish_init(void);
223extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
224 unsigned long pstart, unsigned long mode,
225 int psize);
196 226
197extern void hpte_init_native(void); 227extern void hpte_init_native(void);
198extern void hpte_init_lpar(void); 228extern void hpte_init_lpar(void);
@@ -200,17 +230,21 @@ extern void hpte_init_iSeries(void);
200 230
201extern long pSeries_lpar_hpte_insert(unsigned long hpte_group, 231extern long pSeries_lpar_hpte_insert(unsigned long hpte_group,
202 unsigned long va, unsigned long prpn, 232 unsigned long va, unsigned long prpn,
203 unsigned long vflags, 233 unsigned long rflags,
204 unsigned long rflags); 234 unsigned long vflags, int psize);
205extern long native_hpte_insert(unsigned long hpte_group, unsigned long va, 235
206 unsigned long prpn, 236extern long native_hpte_insert(unsigned long hpte_group,
207 unsigned long vflags, unsigned long rflags); 237 unsigned long va, unsigned long prpn,
238 unsigned long rflags,
239 unsigned long vflags, int psize);
208 240
209extern long iSeries_hpte_bolt_or_insert(unsigned long hpte_group, 241extern long iSeries_hpte_insert(unsigned long hpte_group,
210 unsigned long va, unsigned long prpn, 242 unsigned long va, unsigned long prpn,
211 unsigned long vflags, unsigned long rflags); 243 unsigned long rflags,
244 unsigned long vflags, int psize);
212 245
213extern void stabs_alloc(void); 246extern void stabs_alloc(void);
247extern void slb_initialize(void);
214 248
215#endif /* __ASSEMBLY__ */ 249#endif /* __ASSEMBLY__ */
216 250
diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h
index 820dd729b895..4f512e9fa6b8 100644
--- a/include/asm-ppc64/mmu_context.h
+++ b/include/asm-ppc64/mmu_context.h
@@ -16,8 +16,16 @@
16 * 2 of the License, or (at your option) any later version. 16 * 2 of the License, or (at your option) any later version.
17 */ 17 */
18 18
19static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 19/*
20 * Getting into a kernel thread, there is no valid user segment, mark
21 * paca->pgdir NULL so that SLB miss on user addresses will fault
22 */
23static inline void enter_lazy_tlb(struct mm_struct *mm,
24 struct task_struct *tsk)
20{ 25{
26#ifdef CONFIG_PPC_64K_PAGES
27 get_paca()->pgdir = NULL;
28#endif /* CONFIG_PPC_64K_PAGES */
21} 29}
22 30
23#define NO_CONTEXT 0 31#define NO_CONTEXT 0
@@ -40,8 +48,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
40 cpu_set(smp_processor_id(), next->cpu_vm_mask); 48 cpu_set(smp_processor_id(), next->cpu_vm_mask);
41 49
42 /* No need to flush userspace segments if the mm doesnt change */ 50 /* No need to flush userspace segments if the mm doesnt change */
51#ifdef CONFIG_PPC_64K_PAGES
52 if (prev == next && get_paca()->pgdir == next->pgd)
53 return;
54#else
43 if (prev == next) 55 if (prev == next)
44 return; 56 return;
57#endif /* CONFIG_PPC_64K_PAGES */
45 58
46#ifdef CONFIG_ALTIVEC 59#ifdef CONFIG_ALTIVEC
47 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 60 if (cpu_has_feature(CPU_FTR_ALTIVEC))
diff --git a/include/asm-ppc64/paca.h b/include/asm-ppc64/paca.h
index f68fe91debaf..bccacd6aa93a 100644
--- a/include/asm-ppc64/paca.h
+++ b/include/asm-ppc64/paca.h
@@ -72,10 +72,15 @@ struct paca_struct {
72 /* 72 /*
73 * Now, starting in cacheline 2, the exception save areas 73 * Now, starting in cacheline 2, the exception save areas
74 */ 74 */
75 u64 exgen[8] __attribute__((aligned(0x80))); /* used for most interrupts/exceptions */ 75 /* used for most interrupts/exceptions */
76 u64 exmc[8]; /* used for machine checks */ 76 u64 exgen[10] __attribute__((aligned(0x80)));
77 u64 exslb[8]; /* used for SLB/segment table misses 77 u64 exmc[10]; /* used for machine checks */
78 * on the linear mapping */ 78 u64 exslb[10]; /* used for SLB/segment table misses
79 * on the linear mapping */
80#ifdef CONFIG_PPC_64K_PAGES
81 pgd_t *pgdir;
82#endif /* CONFIG_PPC_64K_PAGES */
83
79 mm_context_t context; 84 mm_context_t context;
80 u16 slb_cache[SLB_CACHE_ENTRIES]; 85 u16 slb_cache[SLB_CACHE_ENTRIES];
81 u16 slb_cache_ptr; 86 u16 slb_cache_ptr;
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index d404431f0a9a..82ce187e5be8 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -13,32 +13,59 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <asm/ppc_asm.h> /* for ASM_CONST */ 14#include <asm/ppc_asm.h> /* for ASM_CONST */
15 15
16/* PAGE_SHIFT determines the page size */ 16/*
17#define PAGE_SHIFT 12 17 * We support either 4k or 64k software page size. When using 64k pages
18#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) 18 * however, wether we are really supporting 64k pages in HW or not is
19#define PAGE_MASK (~(PAGE_SIZE-1)) 19 * irrelevant to those definitions. We always define HW_PAGE_SHIFT to 12
20 * as use of 64k pages remains a linux kernel specific, every notion of
21 * page number shared with the firmware, TCEs, iommu, etc... still assumes
22 * a page size of 4096.
23 */
24#ifdef CONFIG_PPC_64K_PAGES
25#define PAGE_SHIFT 16
26#else
27#define PAGE_SHIFT 12
28#endif
20 29
21#define SID_SHIFT 28 30#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
22#define SID_MASK 0xfffffffffUL 31#define PAGE_MASK (~(PAGE_SIZE-1))
23#define ESID_MASK 0xfffffffff0000000UL
24#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
25 32
26#define HPAGE_SHIFT 24 33/* HW_PAGE_SHIFT is always 4k pages */
27#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 34#define HW_PAGE_SHIFT 12
28#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 35#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT)
36#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1))
29 37
30#ifdef CONFIG_HUGETLB_PAGE 38/* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
39 * HW_PAGE_SHIFT, that is 4k pages
40 */
41#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT)
42
43/* Segment size */
44#define SID_SHIFT 28
45#define SID_MASK 0xfffffffffUL
46#define ESID_MASK 0xfffffffff0000000UL
47#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
31 48
49/* Large pages size */
50
51#ifndef __ASSEMBLY__
52extern unsigned int HPAGE_SHIFT;
53#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
54#define HPAGE_MASK (~(HPAGE_SIZE - 1))
32#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 55#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
56#endif /* __ASSEMBLY__ */
57
58#ifdef CONFIG_HUGETLB_PAGE
59
33 60
34#define HTLB_AREA_SHIFT 40 61#define HTLB_AREA_SHIFT 40
35#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) 62#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT)
36#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) 63#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
37 64
38#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ 65#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \
39 - (1U << GET_ESID(addr))) & 0xffff) 66 - (1U << GET_ESID(addr))) & 0xffff)
40#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ 67#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
41 - (1U << GET_HTLB_AREA(addr))) & 0xffff) 68 - (1U << GET_HTLB_AREA(addr))) & 0xffff)
42 69
43#define ARCH_HAS_HUGEPAGE_ONLY_RANGE 70#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
44#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE 71#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
@@ -114,7 +141,25 @@ static __inline__ void clear_page(void *addr)
114 : "ctr", "memory"); 141 : "ctr", "memory");
115} 142}
116 143
117extern void copy_page(void *to, void *from); 144extern void copy_4K_page(void *to, void *from);
145
146#ifdef CONFIG_PPC_64K_PAGES
147static inline void copy_page(void *to, void *from)
148{
149 unsigned int i;
150 for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
151 copy_4K_page(to, from);
152 to += 4096;
153 from += 4096;
154 }
155}
156#else /* CONFIG_PPC_64K_PAGES */
157static inline void copy_page(void *to, void *from)
158{
159 copy_4K_page(to, from);
160}
161#endif /* CONFIG_PPC_64K_PAGES */
162
118struct page; 163struct page;
119extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); 164extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
120extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p); 165extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p);
@@ -124,43 +169,75 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
124 * These are used to make use of C type-checking. 169 * These are used to make use of C type-checking.
125 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. 170 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
126 */ 171 */
127typedef struct { unsigned long pte; } pte_t;
128typedef struct { unsigned long pmd; } pmd_t;
129typedef struct { unsigned long pud; } pud_t;
130typedef struct { unsigned long pgd; } pgd_t;
131typedef struct { unsigned long pgprot; } pgprot_t;
132 172
173/* PTE level */
174typedef struct { unsigned long pte; } pte_t;
133#define pte_val(x) ((x).pte) 175#define pte_val(x) ((x).pte)
134#define pmd_val(x) ((x).pmd)
135#define pud_val(x) ((x).pud)
136#define pgd_val(x) ((x).pgd)
137#define pgprot_val(x) ((x).pgprot)
138
139#define __pte(x) ((pte_t) { (x) }) 176#define __pte(x) ((pte_t) { (x) })
177
178/* 64k pages additionally define a bigger "real PTE" type that gathers
179 * the "second half" part of the PTE for pseudo 64k pages
180 */
181#ifdef CONFIG_PPC_64K_PAGES
182typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
183#else
184typedef struct { pte_t pte; } real_pte_t;
185#endif
186
187/* PMD level */
188typedef struct { unsigned long pmd; } pmd_t;
189#define pmd_val(x) ((x).pmd)
140#define __pmd(x) ((pmd_t) { (x) }) 190#define __pmd(x) ((pmd_t) { (x) })
191
192/* PUD level exusts only on 4k pages */
193#ifndef CONFIG_PPC_64K_PAGES
194typedef struct { unsigned long pud; } pud_t;
195#define pud_val(x) ((x).pud)
141#define __pud(x) ((pud_t) { (x) }) 196#define __pud(x) ((pud_t) { (x) })
197#endif
198
199/* PGD level */
200typedef struct { unsigned long pgd; } pgd_t;
201#define pgd_val(x) ((x).pgd)
142#define __pgd(x) ((pgd_t) { (x) }) 202#define __pgd(x) ((pgd_t) { (x) })
203
204/* Page protection bits */
205typedef struct { unsigned long pgprot; } pgprot_t;
206#define pgprot_val(x) ((x).pgprot)
143#define __pgprot(x) ((pgprot_t) { (x) }) 207#define __pgprot(x) ((pgprot_t) { (x) })
144 208
145#else 209#else
210
146/* 211/*
147 * .. while these make it easier on the compiler 212 * .. while these make it easier on the compiler
148 */ 213 */
149typedef unsigned long pte_t;
150typedef unsigned long pmd_t;
151typedef unsigned long pud_t;
152typedef unsigned long pgd_t;
153typedef unsigned long pgprot_t;
154 214
215typedef unsigned long pte_t;
155#define pte_val(x) (x) 216#define pte_val(x) (x)
217#define __pte(x) (x)
218
219#ifdef CONFIG_PPC_64K_PAGES
220typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
221#else
222typedef unsigned long real_pte_t;
223#endif
224
225
226typedef unsigned long pmd_t;
156#define pmd_val(x) (x) 227#define pmd_val(x) (x)
228#define __pmd(x) (x)
229
230#ifndef CONFIG_PPC_64K_PAGES
231typedef unsigned long pud_t;
157#define pud_val(x) (x) 232#define pud_val(x) (x)
233#define __pud(x) (x)
234#endif
235
236typedef unsigned long pgd_t;
158#define pgd_val(x) (x) 237#define pgd_val(x) (x)
159#define pgprot_val(x) (x) 238#define pgprot_val(x) (x)
160 239
161#define __pte(x) (x) 240typedef unsigned long pgprot_t;
162#define __pmd(x) (x)
163#define __pud(x) (x)
164#define __pgd(x) (x) 241#define __pgd(x) (x)
165#define __pgprot(x) (x) 242#define __pgprot(x) (x)
166 243
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 26bc49c1108d..98da0e4262bd 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -8,10 +8,16 @@
8 8
9extern kmem_cache_t *pgtable_cache[]; 9extern kmem_cache_t *pgtable_cache[];
10 10
11#ifdef CONFIG_PPC_64K_PAGES
12#define PTE_CACHE_NUM 0
13#define PMD_CACHE_NUM 0
14#define PGD_CACHE_NUM 1
15#else
11#define PTE_CACHE_NUM 0 16#define PTE_CACHE_NUM 0
12#define PMD_CACHE_NUM 1 17#define PMD_CACHE_NUM 1
13#define PUD_CACHE_NUM 1 18#define PUD_CACHE_NUM 1
14#define PGD_CACHE_NUM 0 19#define PGD_CACHE_NUM 0
20#endif
15 21
16/* 22/*
17 * This program is free software; you can redistribute it and/or 23 * This program is free software; you can redistribute it and/or
@@ -30,6 +36,8 @@ static inline void pgd_free(pgd_t *pgd)
30 kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); 36 kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
31} 37}
32 38
39#ifndef CONFIG_PPC_64K_PAGES
40
33#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) 41#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
34 42
35static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 43static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -43,7 +51,30 @@ static inline void pud_free(pud_t *pud)
43 kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); 51 kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
44} 52}
45 53
46#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) 54static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
55{
56 pud_set(pud, (unsigned long)pmd);
57}
58
59#define pmd_populate(mm, pmd, pte_page) \
60 pmd_populate_kernel(mm, pmd, page_address(pte_page))
61#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
62
63
64#else /* CONFIG_PPC_64K_PAGES */
65
66#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
67
68static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
69 pte_t *pte)
70{
71 pmd_set(pmd, (unsigned long)pte);
72}
73
74#define pmd_populate(mm, pmd, pte_page) \
75 pmd_populate_kernel(mm, pmd, page_address(pte_page))
76
77#endif /* CONFIG_PPC_64K_PAGES */
47 78
48static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 79static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
49{ 80{
@@ -56,17 +87,15 @@ static inline void pmd_free(pmd_t *pmd)
56 kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); 87 kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
57} 88}
58 89
59#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) 90static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
60#define pmd_populate(mm, pmd, pte_page) \ 91 unsigned long address)
61 pmd_populate_kernel(mm, pmd, page_address(pte_page))
62
63static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
64{ 92{
65 return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], 93 return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
66 GFP_KERNEL|__GFP_REPEAT); 94 GFP_KERNEL|__GFP_REPEAT);
67} 95}
68 96
69static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 97static inline struct page *pte_alloc_one(struct mm_struct *mm,
98 unsigned long address)
70{ 99{
71 return virt_to_page(pte_alloc_one_kernel(mm, address)); 100 return virt_to_page(pte_alloc_one_kernel(mm, address));
72} 101}
@@ -103,7 +132,7 @@ static inline void pgtable_free(pgtable_free_t pgf)
103 kmem_cache_free(pgtable_cache[cachenum], p); 132 kmem_cache_free(pgtable_cache[cachenum], p);
104} 133}
105 134
106void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); 135extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
107 136
108#define __pte_free_tlb(tlb, ptepage) \ 137#define __pte_free_tlb(tlb, ptepage) \
109 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ 138 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
@@ -111,9 +140,11 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
111#define __pmd_free_tlb(tlb, pmd) \ 140#define __pmd_free_tlb(tlb, pmd) \
112 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ 141 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
113 PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) 142 PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
143#ifndef CONFIG_PPC_64K_PAGES
114#define __pud_free_tlb(tlb, pmd) \ 144#define __pud_free_tlb(tlb, pmd) \
115 pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ 145 pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
116 PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) 146 PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
147#endif /* CONFIG_PPC_64K_PAGES */
117 148
118#define check_pgt_cache() do { } while (0) 149#define check_pgt_cache() do { } while (0)
119 150
diff --git a/include/asm-ppc64/pgtable-4k.h b/include/asm-ppc64/pgtable-4k.h
new file mode 100644
index 000000000000..c883a2748558
--- /dev/null
+++ b/include/asm-ppc64/pgtable-4k.h
@@ -0,0 +1,88 @@
1/*
2 * Entries per page directory level. The PTE level must use a 64b record
3 * for each page table entry. The PMD and PGD level use a 32b record for
4 * each entry by assuming that each entry is page aligned.
5 */
6#define PTE_INDEX_SIZE 9
7#define PMD_INDEX_SIZE 7
8#define PUD_INDEX_SIZE 7
9#define PGD_INDEX_SIZE 9
10
11#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
12#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
13#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
14#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
15
16#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
17#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
18#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
19#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
20
21/* PMD_SHIFT determines what a second-level page table entry can map */
22#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
23#define PMD_SIZE (1UL << PMD_SHIFT)
24#define PMD_MASK (~(PMD_SIZE-1))
25
26/* PUD_SHIFT determines what a third-level page table entry can map */
27#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
28#define PUD_SIZE (1UL << PUD_SHIFT)
29#define PUD_MASK (~(PUD_SIZE-1))
30
31/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
32#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
33#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
34#define PGDIR_MASK (~(PGDIR_SIZE-1))
35
36/* PTE bits */
37#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
38#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
39#define _PAGE_F_SECOND _PAGE_SECONDARY
40#define _PAGE_F_GIX _PAGE_GROUP_IX
41
42/* PTE flags to conserve for HPTE identification */
43#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
44 _PAGE_SECONDARY | _PAGE_GROUP_IX)
45
46/* PAGE_MASK gives the right answer below, but only by accident */
47/* It should be preserving the high 48 bits and then specifically */
48/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
49#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
50 _PAGE_HPTEFLAGS)
51
52/* Bits to mask out from a PMD to get to the PTE page */
53#define PMD_MASKED_BITS 0
54/* Bits to mask out from a PUD to get to the PMD page */
55#define PUD_MASKED_BITS 0
56/* Bits to mask out from a PGD to get to the PUD page */
57#define PGD_MASKED_BITS 0
58
59/* shift to put page number into pte */
60#define PTE_RPN_SHIFT (17)
61
62#define __real_pte(e,p) ((real_pte_t)(e))
63#define __rpte_to_pte(r) (r)
64#define __rpte_to_hidx(r,index) (pte_val((r)) >> 12)
65
66#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
67 do { \
68 index = 0; \
69 shift = mmu_psize_defs[psize].shift; \
70
71#define pte_iterate_hashed_end() } while(0)
72
73/*
74 * 4-level page tables related bits
75 */
76
77#define pgd_none(pgd) (!pgd_val(pgd))
78#define pgd_bad(pgd) (pgd_val(pgd) == 0)
79#define pgd_present(pgd) (pgd_val(pgd) != 0)
80#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
81#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
82
83#define pud_offset(pgdp, addr) \
84 (((pud_t *) pgd_page(*(pgdp))) + \
85 (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
86
87#define pud_ERROR(e) \
88 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
diff --git a/include/asm-ppc64/pgtable-64k.h b/include/asm-ppc64/pgtable-64k.h
new file mode 100644
index 000000000000..c5f437c86b3c
--- /dev/null
+++ b/include/asm-ppc64/pgtable-64k.h
@@ -0,0 +1,87 @@
1#include <asm-generic/pgtable-nopud.h>
2
3
4#define PTE_INDEX_SIZE 12
5#define PMD_INDEX_SIZE 12
6#define PUD_INDEX_SIZE 0
7#define PGD_INDEX_SIZE 4
8
9#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE)
10#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
11#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
12
13#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
14#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
15#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
16
17/* PMD_SHIFT determines what a second-level page table entry can map */
18#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
19#define PMD_SIZE (1UL << PMD_SHIFT)
20#define PMD_MASK (~(PMD_SIZE-1))
21
22/* PGDIR_SHIFT determines what a third-level page table entry can map */
23#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
24#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
25#define PGDIR_MASK (~(PGDIR_SIZE-1))
26
27/* Additional PTE bits (don't change without checking asm in hash_low.S) */
28#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */
29#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
30#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
31#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
32#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
33
34/* PTE flags to conserve for HPTE identification */
35#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_HPTE_SUB |\
36 _PAGE_COMBO)
37
38/* Shift to put page number into pte.
39 *
40 * That gives us a max RPN of 32 bits, which means a max of 48 bits
41 * of addressable physical space.
42 * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but
43 * 32 makes PTEs more readable for debugging for now :)
44 */
45#define PTE_RPN_SHIFT (32)
46#define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT))
47#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
48
49/* _PAGE_CHG_MASK masks of bits that are to be preserved accross
50 * pgprot changes
51 */
52#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
53 _PAGE_ACCESSED)
54
55/* Bits to mask out from a PMD to get to the PTE page */
56#define PMD_MASKED_BITS 0x1ff
57/* Bits to mask out from a PGD/PUD to get to the PMD page */
58#define PUD_MASKED_BITS 0x1ff
59
60#ifndef __ASSEMBLY__
61
62/* Manipulate "rpte" values */
63#define __real_pte(e,p) ((real_pte_t) { \
64 (e), pte_val(*((p) + PTRS_PER_PTE)) })
65#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
66 (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
67#define __rpte_to_pte(r) ((r).pte)
68#define __rpte_sub_valid(rpte, index) \
69 (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
70
71
72/* Trick: we set __end to va + 64k, which happens works for
73 * a 16M page as well as we want only one iteration
74 */
75#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
76 do { \
77 unsigned long __end = va + PAGE_SIZE; \
78 unsigned __split = (psize == MMU_PAGE_4K || \
79 psize == MMU_PAGE_64K_AP); \
80 shift = mmu_psize_defs[psize].shift; \
81 for (index = 0; va < __end; index++, va += (1 << shift)) { \
82 if (!__split || __rpte_sub_valid(rpte, index)) do { \
83
84#define pte_iterate_hashed_end() } while(0); } } while(0)
85
86
87#endif /* __ASSEMBLY__ */
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 8c3f574046b6..fde93ec36abc 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -15,40 +15,11 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#endif /* __ASSEMBLY__ */ 16#endif /* __ASSEMBLY__ */
17 17
18/* 18#ifdef CONFIG_PPC_64K_PAGES
19 * Entries per page directory level. The PTE level must use a 64b record 19#include <asm/pgtable-64k.h>
20 * for each page table entry. The PMD and PGD level use a 32b record for 20#else
21 * each entry by assuming that each entry is page aligned. 21#include <asm/pgtable-4k.h>
22 */ 22#endif
23#define PTE_INDEX_SIZE 9
24#define PMD_INDEX_SIZE 7
25#define PUD_INDEX_SIZE 7
26#define PGD_INDEX_SIZE 9
27
28#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
29#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
30#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
31#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
32
33#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
34#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
35#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
36#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
37
38/* PMD_SHIFT determines what a second-level page table entry can map */
39#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
40#define PMD_SIZE (1UL << PMD_SHIFT)
41#define PMD_MASK (~(PMD_SIZE-1))
42
43/* PUD_SHIFT determines what a third-level page table entry can map */
44#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
45#define PUD_SIZE (1UL << PUD_SHIFT)
46#define PUD_MASK (~(PUD_SIZE-1))
47
48/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
49#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
50#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
51#define PGDIR_MASK (~(PGDIR_SIZE-1))
52 23
53#define FIRST_USER_ADDRESS 0 24#define FIRST_USER_ADDRESS 0
54 25
@@ -75,8 +46,9 @@
75#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) 46#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
76 47
77/* 48/*
78 * Bits in a linux-style PTE. These match the bits in the 49 * Common bits in a linux-style PTE. These match the bits in the
79 * (hardware-defined) PowerPC PTE as closely as possible. 50 * (hardware-defined) PowerPC PTE as closely as possible. Additional
51 * bits may be defined in pgtable-*.h
80 */ 52 */
81#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ 53#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
82#define _PAGE_USER 0x0002 /* matches one of the PP bits */ 54#define _PAGE_USER 0x0002 /* matches one of the PP bits */
@@ -91,15 +63,6 @@
91#define _PAGE_RW 0x0200 /* software: user write access allowed */ 63#define _PAGE_RW 0x0200 /* software: user write access allowed */
92#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */ 64#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
93#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ 65#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
94#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
95#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
96#define _PAGE_HUGE 0x10000 /* 16MB page */
97/* Bits 0x7000 identify the index within an HPT Group */
98#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
99/* PAGE_MASK gives the right answer below, but only by accident */
100/* It should be preserving the high 48 bits and then specifically */
101/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
102#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
103 66
104#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT) 67#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)
105 68
@@ -122,10 +85,10 @@
122#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE) 85#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE)
123#define HAVE_PAGE_AGP 86#define HAVE_PAGE_AGP
124 87
125/* 88/* PTEIDX nibble */
126 * This bit in a hardware PTE indicates that the page is *not* executable. 89#define _PTEIDX_SECONDARY 0x8
127 */ 90#define _PTEIDX_GROUP_IX 0x7
128#define HW_NO_EXEC _PAGE_EXEC 91
129 92
130/* 93/*
131 * POWER4 and newer have per page execute protection, older chips can only 94 * POWER4 and newer have per page execute protection, older chips can only
@@ -164,21 +127,10 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
164#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) 127#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
165#endif /* __ASSEMBLY__ */ 128#endif /* __ASSEMBLY__ */
166 129
167/* shift to put page number into pte */
168#define PTE_SHIFT (17)
169
170#ifdef CONFIG_HUGETLB_PAGE 130#ifdef CONFIG_HUGETLB_PAGE
171 131
172#ifndef __ASSEMBLY__
173int hash_huge_page(struct mm_struct *mm, unsigned long access,
174 unsigned long ea, unsigned long vsid, int local);
175#endif /* __ASSEMBLY__ */
176
177#define HAVE_ARCH_UNMAPPED_AREA 132#define HAVE_ARCH_UNMAPPED_AREA
178#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 133#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
179#else
180
181#define hash_huge_page(mm,a,ea,vsid,local) -1
182 134
183#endif 135#endif
184 136
@@ -197,7 +149,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
197 pte_t pte; 149 pte_t pte;
198 150
199 151
200 pte_val(pte) = (pfn << PTE_SHIFT) | pgprot_val(pgprot); 152 pte_val(pte) = (pfn << PTE_RPN_SHIFT) | pgprot_val(pgprot);
201 return pte; 153 return pte;
202} 154}
203 155
@@ -209,30 +161,25 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
209 161
210/* pte_clear moved to later in this file */ 162/* pte_clear moved to later in this file */
211 163
212#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) 164#define pte_pfn(x) ((unsigned long)((pte_val(x)>>PTE_RPN_SHIFT)))
213#define pte_page(x) pfn_to_page(pte_pfn(x)) 165#define pte_page(x) pfn_to_page(pte_pfn(x))
214 166
215#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) 167#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval))
216#define pmd_none(pmd) (!pmd_val(pmd)) 168#define pmd_none(pmd) (!pmd_val(pmd))
217#define pmd_bad(pmd) (pmd_val(pmd) == 0) 169#define pmd_bad(pmd) (pmd_val(pmd) == 0)
218#define pmd_present(pmd) (pmd_val(pmd) != 0) 170#define pmd_present(pmd) (pmd_val(pmd) != 0)
219#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 171#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
220#define pmd_page_kernel(pmd) (pmd_val(pmd)) 172#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
221#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) 173#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
222 174
223#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) 175#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
224#define pud_none(pud) (!pud_val(pud)) 176#define pud_none(pud) (!pud_val(pud))
225#define pud_bad(pud) ((pud_val(pud)) == 0) 177#define pud_bad(pud) ((pud_val(pud)) == 0)
226#define pud_present(pud) (pud_val(pud) != 0) 178#define pud_present(pud) (pud_val(pud) != 0)
227#define pud_clear(pudp) (pud_val(*(pudp)) = 0) 179#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
228#define pud_page(pud) (pud_val(pud)) 180#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
229 181
230#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) 182#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
231#define pgd_none(pgd) (!pgd_val(pgd))
232#define pgd_bad(pgd) (pgd_val(pgd) == 0)
233#define pgd_present(pgd) (pgd_val(pgd) != 0)
234#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
235#define pgd_page(pgd) (pgd_val(pgd))
236 183
237/* 184/*
238 * Find an entry in a page-table-directory. We combine the address region 185 * Find an entry in a page-table-directory. We combine the address region
@@ -243,9 +190,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
243 190
244#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) 191#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
245 192
246#define pud_offset(pgdp, addr) \
247 (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
248
249#define pmd_offset(pudp,addr) \ 193#define pmd_offset(pudp,addr) \
250 (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 194 (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
251 195
@@ -271,7 +215,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;}
271static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} 215static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;}
272static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} 216static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;}
273static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} 217static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
274static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;}
275 218
276static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } 219static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
277static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } 220static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
@@ -286,7 +229,6 @@ static inline pte_t pte_mkclean(pte_t pte) {
286 pte_val(pte) &= ~(_PAGE_DIRTY); return pte; } 229 pte_val(pte) &= ~(_PAGE_DIRTY); return pte; }
287static inline pte_t pte_mkold(pte_t pte) { 230static inline pte_t pte_mkold(pte_t pte) {
288 pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } 231 pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
289
290static inline pte_t pte_mkread(pte_t pte) { 232static inline pte_t pte_mkread(pte_t pte) {
291 pte_val(pte) |= _PAGE_USER; return pte; } 233 pte_val(pte) |= _PAGE_USER; return pte; }
292static inline pte_t pte_mkexec(pte_t pte) { 234static inline pte_t pte_mkexec(pte_t pte) {
@@ -298,7 +240,7 @@ static inline pte_t pte_mkdirty(pte_t pte) {
298static inline pte_t pte_mkyoung(pte_t pte) { 240static inline pte_t pte_mkyoung(pte_t pte) {
299 pte_val(pte) |= _PAGE_ACCESSED; return pte; } 241 pte_val(pte) |= _PAGE_ACCESSED; return pte; }
300static inline pte_t pte_mkhuge(pte_t pte) { 242static inline pte_t pte_mkhuge(pte_t pte) {
301 pte_val(pte) |= _PAGE_HUGE; return pte; } 243 return pte; }
302 244
303/* Atomic PTE updates */ 245/* Atomic PTE updates */
304static inline unsigned long pte_update(pte_t *p, unsigned long clr) 246static inline unsigned long pte_update(pte_t *p, unsigned long clr)
@@ -321,11 +263,13 @@ static inline unsigned long pte_update(pte_t *p, unsigned long clr)
321/* PTE updating functions, this function puts the PTE in the 263/* PTE updating functions, this function puts the PTE in the
322 * batch, doesn't actually triggers the hash flush immediately, 264 * batch, doesn't actually triggers the hash flush immediately,
323 * you need to call flush_tlb_pending() to do that. 265 * you need to call flush_tlb_pending() to do that.
266 * Pass -1 for "normal" size (4K or 64K)
324 */ 267 */
325extern void hpte_update(struct mm_struct *mm, unsigned long addr, unsigned long pte, 268extern void hpte_update(struct mm_struct *mm, unsigned long addr,
326 int wrprot); 269 pte_t *ptep, unsigned long pte, int huge);
327 270
328static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 271static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
272 unsigned long addr, pte_t *ptep)
329{ 273{
330 unsigned long old; 274 unsigned long old;
331 275
@@ -333,7 +277,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon
333 return 0; 277 return 0;
334 old = pte_update(ptep, _PAGE_ACCESSED); 278 old = pte_update(ptep, _PAGE_ACCESSED);
335 if (old & _PAGE_HASHPTE) { 279 if (old & _PAGE_HASHPTE) {
336 hpte_update(mm, addr, old, 0); 280 hpte_update(mm, addr, ptep, old, 0);
337 flush_tlb_pending(); 281 flush_tlb_pending();
338 } 282 }
339 return (old & _PAGE_ACCESSED) != 0; 283 return (old & _PAGE_ACCESSED) != 0;
@@ -351,7 +295,8 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon
351 * moment we always flush but we need to fix hpte_update and test if the 295 * moment we always flush but we need to fix hpte_update and test if the
352 * optimisation is worth it. 296 * optimisation is worth it.
353 */ 297 */
354static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 298static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm,
299 unsigned long addr, pte_t *ptep)
355{ 300{
356 unsigned long old; 301 unsigned long old;
357 302
@@ -359,7 +304,7 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon
359 return 0; 304 return 0;
360 old = pte_update(ptep, _PAGE_DIRTY); 305 old = pte_update(ptep, _PAGE_DIRTY);
361 if (old & _PAGE_HASHPTE) 306 if (old & _PAGE_HASHPTE)
362 hpte_update(mm, addr, old, 0); 307 hpte_update(mm, addr, ptep, old, 0);
363 return (old & _PAGE_DIRTY) != 0; 308 return (old & _PAGE_DIRTY) != 0;
364} 309}
365#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY 310#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
@@ -371,7 +316,8 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon
371}) 316})
372 317
373#define __HAVE_ARCH_PTEP_SET_WRPROTECT 318#define __HAVE_ARCH_PTEP_SET_WRPROTECT
374static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 319static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
320 pte_t *ptep)
375{ 321{
376 unsigned long old; 322 unsigned long old;
377 323
@@ -379,7 +325,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
379 return; 325 return;
380 old = pte_update(ptep, _PAGE_RW); 326 old = pte_update(ptep, _PAGE_RW);
381 if (old & _PAGE_HASHPTE) 327 if (old & _PAGE_HASHPTE)
382 hpte_update(mm, addr, old, 0); 328 hpte_update(mm, addr, ptep, old, 0);
383} 329}
384 330
385/* 331/*
@@ -408,21 +354,23 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
408}) 354})
409 355
410#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 356#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
411static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 357static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
358 unsigned long addr, pte_t *ptep)
412{ 359{
413 unsigned long old = pte_update(ptep, ~0UL); 360 unsigned long old = pte_update(ptep, ~0UL);
414 361
415 if (old & _PAGE_HASHPTE) 362 if (old & _PAGE_HASHPTE)
416 hpte_update(mm, addr, old, 0); 363 hpte_update(mm, addr, ptep, old, 0);
417 return __pte(old); 364 return __pte(old);
418} 365}
419 366
420static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t * ptep) 367static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
368 pte_t * ptep)
421{ 369{
422 unsigned long old = pte_update(ptep, ~0UL); 370 unsigned long old = pte_update(ptep, ~0UL);
423 371
424 if (old & _PAGE_HASHPTE) 372 if (old & _PAGE_HASHPTE)
425 hpte_update(mm, addr, old, 0); 373 hpte_update(mm, addr, ptep, old, 0);
426} 374}
427 375
428/* 376/*
@@ -435,7 +383,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
435 pte_clear(mm, addr, ptep); 383 pte_clear(mm, addr, ptep);
436 flush_tlb_pending(); 384 flush_tlb_pending();
437 } 385 }
438 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 386 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
387
388#ifdef CONFIG_PPC_64K_PAGES
389 if (mmu_virtual_psize != MMU_PAGE_64K)
390 pte = __pte(pte_val(pte) | _PAGE_COMBO);
391#endif /* CONFIG_PPC_64K_PAGES */
392
393 *ptep = pte;
439} 394}
440 395
441/* Set the dirty and/or accessed bits atomically in a linux PTE, this 396/* Set the dirty and/or accessed bits atomically in a linux PTE, this
@@ -482,8 +437,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
482 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) 437 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
483#define pmd_ERROR(e) \ 438#define pmd_ERROR(e) \
484 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) 439 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
485#define pud_ERROR(e) \
486 printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
487#define pgd_ERROR(e) \ 440#define pgd_ERROR(e) \
488 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 441 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
489 442
@@ -509,12 +462,12 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
509/* Encode and de-code a swap entry */ 462/* Encode and de-code a swap entry */
510#define __swp_type(entry) (((entry).val >> 1) & 0x3f) 463#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
511#define __swp_offset(entry) ((entry).val >> 8) 464#define __swp_offset(entry) ((entry).val >> 8)
512#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) 465#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
513#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT }) 466#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
514#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT }) 467#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
515#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT) 468#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
516#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_SHIFT)|_PAGE_FILE}) 469#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
517#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT) 470#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
518 471
519/* 472/*
520 * kern_addr_valid is intended to indicate whether an address is a valid 473 * kern_addr_valid is intended to indicate whether an address is a valid
@@ -532,29 +485,22 @@ void pgtable_cache_init(void);
532/* 485/*
533 * find_linux_pte returns the address of a linux pte for a given 486 * find_linux_pte returns the address of a linux pte for a given
534 * effective address and directory. If not found, it returns zero. 487 * effective address and directory. If not found, it returns zero.
535 */ 488 */static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
536static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
537{ 489{
538 pgd_t *pg; 490 pgd_t *pg;
539 pud_t *pu; 491 pud_t *pu;
540 pmd_t *pm; 492 pmd_t *pm;
541 pte_t *pt = NULL; 493 pte_t *pt = NULL;
542 pte_t pte;
543 494
544 pg = pgdir + pgd_index(ea); 495 pg = pgdir + pgd_index(ea);
545 if (!pgd_none(*pg)) { 496 if (!pgd_none(*pg)) {
546 pu = pud_offset(pg, ea); 497 pu = pud_offset(pg, ea);
547 if (!pud_none(*pu)) { 498 if (!pud_none(*pu)) {
548 pm = pmd_offset(pu, ea); 499 pm = pmd_offset(pu, ea);
549 if (pmd_present(*pm)) { 500 if (pmd_present(*pm))
550 pt = pte_offset_kernel(pm, ea); 501 pt = pte_offset_kernel(pm, ea);
551 pte = *pt;
552 if (!pte_present(pte))
553 pt = NULL;
554 }
555 } 502 }
556 } 503 }
557
558 return pt; 504 return pt;
559} 505}
560 506
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h
index e8d0d2ab4c0f..bdb47174ff0e 100644
--- a/include/asm-ppc64/prom.h
+++ b/include/asm-ppc64/prom.h
@@ -188,6 +188,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
188extern struct device_node *of_node_get(struct device_node *node); 188extern struct device_node *of_node_get(struct device_node *node);
189extern void of_node_put(struct device_node *node); 189extern void of_node_put(struct device_node *node);
190 190
191/* For scanning the flat device-tree at boot time */
192int __init of_scan_flat_dt(int (*it)(unsigned long node,
193 const char *uname, int depth,
194 void *data),
195 void *data);
196void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
197 unsigned long *size);
198
191/* For updating the device tree at runtime */ 199/* For updating the device tree at runtime */
192extern void of_attach_node(struct device_node *); 200extern void of_attach_node(struct device_node *);
193extern void of_detach_node(const struct device_node *); 201extern void of_detach_node(const struct device_node *);
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index 99b8ca52f101..0cdd66c9f4b7 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -248,7 +248,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
248} 248}
249 249
250static __inline__ unsigned long 250static __inline__ unsigned long
251__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) 251__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
252{ 252{
253 unsigned long prev; 253 unsigned long prev;
254 254
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c9b43360fd33..9a565808da3f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -103,6 +103,9 @@ static int __init hugetlb_init(void)
103 unsigned long i; 103 unsigned long i;
104 struct page *page; 104 struct page *page;
105 105
106 if (HPAGE_SHIFT == 0)
107 return 0;
108
106 for (i = 0; i < MAX_NUMNODES; ++i) 109 for (i = 0; i < MAX_NUMNODES; ++i)
107 INIT_LIST_HEAD(&hugepage_freelists[i]); 110 INIT_LIST_HEAD(&hugepage_freelists[i]);
108 111