aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2005-11-06 22:42:09 -0500
committerPaul Mackerras <paulus@samba.org>2005-11-06 22:42:09 -0500
commitc6135234550ed89a6fd0e8cb229633967e41d649 (patch)
tree22cef33e314839c4fb30d6fc888c0caa2a0f6602 /arch/powerpc
parent76032de898f34db55b5048349db56557828a1390 (diff)
parent0b154bb7d0cce80e9c0bcf11d4f9e71b59409d26 (diff)
Merge ../linux-2.6
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/head_64.S300
-rw-r--r--arch/powerpc/kernel/lparmap.c2
-rw-r--r--arch/powerpc/kernel/process.c6
-rw-r--r--arch/powerpc/kernel/prom.c76
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_64.S4
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c532
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
-rw-r--r--arch/powerpc/mm/init_64.c18
-rw-r--r--arch/powerpc/mm/mem.c56
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S220
-rw-r--r--arch/powerpc/mm/stab.c30
-rw-r--r--arch/powerpc/mm/tlb_64.c32
-rw-r--r--arch/powerpc/platforms/iseries/htab.c65
-rw-r--r--arch/powerpc/platforms/iseries/hvlog.c4
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c74
-rw-r--r--arch/powerpc/platforms/iseries/setup.c13
-rw-r--r--arch/powerpc/platforms/iseries/vio.c39
-rw-r--r--arch/powerpc/platforms/iseries/viopath.c16
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c115
29 files changed, 2101 insertions, 813 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f4e25c648fbb..ca7acb0c79f0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -603,6 +603,15 @@ config NODES_SPAN_OTHER_NODES
603 def_bool y 603 def_bool y
604 depends on NEED_MULTIPLE_NODES 604 depends on NEED_MULTIPLE_NODES
605 605
606config PPC_64K_PAGES
607 bool "64k page size"
608 help
609 This option changes the kernel logical page size to 64k. On machines
610 without processor support for 64k pages, the kernel will simulate
611 them by loading each individual 4k page on demand transparently,
612 while on hardware with such support, it will be used to map
613 normal application pages.
614
606config SCHED_SMT 615config SCHED_SMT
607 bool "SMT (Hyperthreading) scheduler support" 616 bool "SMT (Hyperthreading) scheduler support"
608 depends on PPC64 && SMP 617 depends on PPC64 && SMP
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bc5a3689cc05..b75757251994 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -125,6 +125,9 @@ int main(void)
125 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 125 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
126 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 126 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
127 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 127 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
128#ifdef CONFIG_PPC_64K_PAGES
129 DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
130#endif
128#ifdef CONFIG_HUGETLB_PAGE 131#ifdef CONFIG_HUGETLB_PAGE
129 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); 132 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
130 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); 133 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b91345fa0805..33c63bcf69f8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = {
240 .oprofile_model = &op_model_power4, 240 .oprofile_model = &op_model_power4,
241#endif 241#endif
242 }, 242 },
243 { /* Power5 */ 243 { /* Power5 GR */
244 .pvr_mask = 0xffff0000, 244 .pvr_mask = 0xffff0000,
245 .pvr_value = 0x003a0000, 245 .pvr_value = 0x003a0000,
246 .cpu_name = "POWER5 (gr)", 246 .cpu_name = "POWER5 (gr)",
@@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = {
255 .oprofile_model = &op_model_power4, 255 .oprofile_model = &op_model_power4,
256#endif 256#endif
257 }, 257 },
258 { /* Power5 */ 258 { /* Power5 GS */
259 .pvr_mask = 0xffff0000, 259 .pvr_mask = 0xffff0000,
260 .pvr_value = 0x003b0000, 260 .pvr_value = 0x003b0000,
261 .cpu_name = "POWER5 (gs)", 261 .cpu_name = "POWER5 (gs)",
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 45d81976987f..16ab40daa738 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -195,11 +195,11 @@ exception_marker:
195#define EX_R12 24 195#define EX_R12 24
196#define EX_R13 32 196#define EX_R13 32
197#define EX_SRR0 40 197#define EX_SRR0 40
198#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
199#define EX_DAR 48 198#define EX_DAR 48
200#define EX_LR 48 /* SLB miss saves LR, but not DAR */
201#define EX_DSISR 56 199#define EX_DSISR 56
202#define EX_CCR 60 200#define EX_CCR 60
201#define EX_R3 64
202#define EX_LR 72
203 203
204#define EXCEPTION_PROLOG_PSERIES(area, label) \ 204#define EXCEPTION_PROLOG_PSERIES(area, label) \
205 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ 205 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +419,22 @@ data_access_slb_pSeries:
419 mtspr SPRN_SPRG1,r13 419 mtspr SPRN_SPRG1,r13
420 RUNLATCH_ON(r13) 420 RUNLATCH_ON(r13)
421 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ 421 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
422 std r3,PACA_EXSLB+EX_R3(r13)
423 mfspr r3,SPRN_DAR
422 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 424 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
425 mfcr r9
426#ifdef __DISABLED__
427 /* Keep that around for when we re-implement dynamic VSIDs */
428 cmpdi r3,0
429 bge slb_miss_user_pseries
430#endif /* __DISABLED__ */
423 std r10,PACA_EXSLB+EX_R10(r13) 431 std r10,PACA_EXSLB+EX_R10(r13)
424 std r11,PACA_EXSLB+EX_R11(r13) 432 std r11,PACA_EXSLB+EX_R11(r13)
425 std r12,PACA_EXSLB+EX_R12(r13) 433 std r12,PACA_EXSLB+EX_R12(r13)
426 std r3,PACA_EXSLB+EX_R3(r13) 434 mfspr r10,SPRN_SPRG1
427 mfspr r9,SPRN_SPRG1 435 std r10,PACA_EXSLB+EX_R13(r13)
428 std r9,PACA_EXSLB+EX_R13(r13)
429 mfcr r9
430 mfspr r12,SPRN_SRR1 /* and SRR1 */ 436 mfspr r12,SPRN_SRR1 /* and SRR1 */
431 mfspr r3,SPRN_DAR 437 b .slb_miss_realmode /* Rel. branch works in real mode */
432 b .do_slb_miss /* Rel. branch works in real mode */
433 438
434 STD_EXCEPTION_PSERIES(0x400, instruction_access) 439 STD_EXCEPTION_PSERIES(0x400, instruction_access)
435 440
@@ -440,17 +445,22 @@ instruction_access_slb_pSeries:
440 mtspr SPRN_SPRG1,r13 445 mtspr SPRN_SPRG1,r13
441 RUNLATCH_ON(r13) 446 RUNLATCH_ON(r13)
442 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ 447 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
448 std r3,PACA_EXSLB+EX_R3(r13)
449 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
443 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 450 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
451 mfcr r9
452#ifdef __DISABLED__
453 /* Keep that around for when we re-implement dynamic VSIDs */
454 cmpdi r3,0
455 bge slb_miss_user_pseries
456#endif /* __DISABLED__ */
444 std r10,PACA_EXSLB+EX_R10(r13) 457 std r10,PACA_EXSLB+EX_R10(r13)
445 std r11,PACA_EXSLB+EX_R11(r13) 458 std r11,PACA_EXSLB+EX_R11(r13)
446 std r12,PACA_EXSLB+EX_R12(r13) 459 std r12,PACA_EXSLB+EX_R12(r13)
447 std r3,PACA_EXSLB+EX_R3(r13) 460 mfspr r10,SPRN_SPRG1
448 mfspr r9,SPRN_SPRG1 461 std r10,PACA_EXSLB+EX_R13(r13)
449 std r9,PACA_EXSLB+EX_R13(r13)
450 mfcr r9
451 mfspr r12,SPRN_SRR1 /* and SRR1 */ 462 mfspr r12,SPRN_SRR1 /* and SRR1 */
452 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 463 b .slb_miss_realmode /* Rel. branch works in real mode */
453 b .do_slb_miss /* Rel. branch works in real mode */
454 464
455 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) 465 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
456 STD_EXCEPTION_PSERIES(0x600, alignment) 466 STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +519,38 @@ _GLOBAL(do_stab_bolted_pSeries)
509 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) 519 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
510 520
511/* 521/*
522 * We have some room here we use that to put
523 * the peries slb miss user trampoline code so it's reasonably
524 * away from slb_miss_user_common to avoid problems with rfid
525 *
526 * This is used for when the SLB miss handler has to go virtual,
527 * which doesn't happen for now anymore but will once we re-implement
528 * dynamic VSIDs for shared page tables
529 */
530#ifdef __DISABLED__
531slb_miss_user_pseries:
532 std r10,PACA_EXGEN+EX_R10(r13)
533 std r11,PACA_EXGEN+EX_R11(r13)
534 std r12,PACA_EXGEN+EX_R12(r13)
535 mfspr r10,SPRG1
536 ld r11,PACA_EXSLB+EX_R9(r13)
537 ld r12,PACA_EXSLB+EX_R3(r13)
538 std r10,PACA_EXGEN+EX_R13(r13)
539 std r11,PACA_EXGEN+EX_R9(r13)
540 std r12,PACA_EXGEN+EX_R3(r13)
541 clrrdi r12,r13,32
542 mfmsr r10
543 mfspr r11,SRR0 /* save SRR0 */
544 ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
545 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
546 mtspr SRR0,r12
547 mfspr r12,SRR1 /* and SRR1 */
548 mtspr SRR1,r10
549 rfid
550 b . /* prevent spec. execution */
551#endif /* __DISABLED__ */
552
553/*
512 * Vectors for the FWNMI option. Share common code. 554 * Vectors for the FWNMI option. Share common code.
513 */ 555 */
514 .globl system_reset_fwnmi 556 .globl system_reset_fwnmi
@@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
559 .globl data_access_slb_iSeries 601 .globl data_access_slb_iSeries
560data_access_slb_iSeries: 602data_access_slb_iSeries:
561 mtspr SPRN_SPRG1,r13 /* save r13 */ 603 mtspr SPRN_SPRG1,r13 /* save r13 */
562 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 604 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
563 std r3,PACA_EXSLB+EX_R3(r13) 605 std r3,PACA_EXSLB+EX_R3(r13)
564 ld r12,PACALPPACA+LPPACASRR1(r13)
565 mfspr r3,SPRN_DAR 606 mfspr r3,SPRN_DAR
566 b .do_slb_miss 607 std r9,PACA_EXSLB+EX_R9(r13)
608 mfcr r9
609#ifdef __DISABLED__
610 cmpdi r3,0
611 bge slb_miss_user_iseries
612#endif
613 std r10,PACA_EXSLB+EX_R10(r13)
614 std r11,PACA_EXSLB+EX_R11(r13)
615 std r12,PACA_EXSLB+EX_R12(r13)
616 mfspr r10,SPRN_SPRG1
617 std r10,PACA_EXSLB+EX_R13(r13)
618 ld r12,PACALPPACA+LPPACASRR1(r13);
619 b .slb_miss_realmode
567 620
568 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) 621 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
569 622
570 .globl instruction_access_slb_iSeries 623 .globl instruction_access_slb_iSeries
571instruction_access_slb_iSeries: 624instruction_access_slb_iSeries:
572 mtspr SPRN_SPRG1,r13 /* save r13 */ 625 mtspr SPRN_SPRG1,r13 /* save r13 */
573 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 626 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
574 std r3,PACA_EXSLB+EX_R3(r13) 627 std r3,PACA_EXSLB+EX_R3(r13)
575 ld r12,PACALPPACA+LPPACASRR1(r13) 628 ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
576 ld r3,PACALPPACA+LPPACASRR0(r13) 629 std r9,PACA_EXSLB+EX_R9(r13)
577 b .do_slb_miss 630 mfcr r9
631#ifdef __DISABLED__
632 cmpdi r3,0
633 bge .slb_miss_user_iseries
634#endif
635 std r10,PACA_EXSLB+EX_R10(r13)
636 std r11,PACA_EXSLB+EX_R11(r13)
637 std r12,PACA_EXSLB+EX_R12(r13)
638 mfspr r10,SPRN_SPRG1
639 std r10,PACA_EXSLB+EX_R13(r13)
640 ld r12,PACALPPACA+LPPACASRR1(r13);
641 b .slb_miss_realmode
642
643#ifdef __DISABLED__
644slb_miss_user_iseries:
645 std r10,PACA_EXGEN+EX_R10(r13)
646 std r11,PACA_EXGEN+EX_R11(r13)
647 std r12,PACA_EXGEN+EX_R12(r13)
648 mfspr r10,SPRG1
649 ld r11,PACA_EXSLB+EX_R9(r13)
650 ld r12,PACA_EXSLB+EX_R3(r13)
651 std r10,PACA_EXGEN+EX_R13(r13)
652 std r11,PACA_EXGEN+EX_R9(r13)
653 std r12,PACA_EXGEN+EX_R3(r13)
654 EXCEPTION_PROLOG_ISERIES_2
655 b slb_miss_user_common
656#endif
578 657
579 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) 658 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
580 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) 659 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +888,126 @@ instruction_access_common:
809 li r5,0x400 888 li r5,0x400
810 b .do_hash_page /* Try to handle as hpte fault */ 889 b .do_hash_page /* Try to handle as hpte fault */
811 890
891/*
892 * Here is the common SLB miss user that is used when going to virtual
893 * mode for SLB misses, that is currently not used
894 */
895#ifdef __DISABLED__
896 .align 7
897 .globl slb_miss_user_common
898slb_miss_user_common:
899 mflr r10
900 std r3,PACA_EXGEN+EX_DAR(r13)
901 stw r9,PACA_EXGEN+EX_CCR(r13)
902 std r10,PACA_EXGEN+EX_LR(r13)
903 std r11,PACA_EXGEN+EX_SRR0(r13)
904 bl .slb_allocate_user
905
906 ld r10,PACA_EXGEN+EX_LR(r13)
907 ld r3,PACA_EXGEN+EX_R3(r13)
908 lwz r9,PACA_EXGEN+EX_CCR(r13)
909 ld r11,PACA_EXGEN+EX_SRR0(r13)
910 mtlr r10
911 beq- slb_miss_fault
912
913 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
914 beq- unrecov_user_slb
915 mfmsr r10
916
917.machine push
918.machine "power4"
919 mtcrf 0x80,r9
920.machine pop
921
922 clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
923 mtmsrd r10,1
924
925 mtspr SRR0,r11
926 mtspr SRR1,r12
927
928 ld r9,PACA_EXGEN+EX_R9(r13)
929 ld r10,PACA_EXGEN+EX_R10(r13)
930 ld r11,PACA_EXGEN+EX_R11(r13)
931 ld r12,PACA_EXGEN+EX_R12(r13)
932 ld r13,PACA_EXGEN+EX_R13(r13)
933 rfid
934 b .
935
936slb_miss_fault:
937 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
938 ld r4,PACA_EXGEN+EX_DAR(r13)
939 li r5,0
940 std r4,_DAR(r1)
941 std r5,_DSISR(r1)
942 b .handle_page_fault
943
944unrecov_user_slb:
945 EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
946 DISABLE_INTS
947 bl .save_nvgprs
9481: addi r3,r1,STACK_FRAME_OVERHEAD
949 bl .unrecoverable_exception
950 b 1b
951
952#endif /* __DISABLED__ */
953
954
955/*
956 * r13 points to the PACA, r9 contains the saved CR,
957 * r12 contain the saved SRR1, SRR0 is still ready for return
958 * r3 has the faulting address
959 * r9 - r13 are saved in paca->exslb.
960 * r3 is saved in paca->slb_r3
961 * We assume we aren't going to take any exceptions during this procedure.
962 */
963_GLOBAL(slb_miss_realmode)
964 mflr r10
965
966 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
967 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
968
969 bl .slb_allocate_realmode
970
971 /* All done -- return from exception. */
972
973 ld r10,PACA_EXSLB+EX_LR(r13)
974 ld r3,PACA_EXSLB+EX_R3(r13)
975 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
976#ifdef CONFIG_PPC_ISERIES
977 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
978#endif /* CONFIG_PPC_ISERIES */
979
980 mtlr r10
981
982 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
983 beq- unrecov_slb
984
985.machine push
986.machine "power4"
987 mtcrf 0x80,r9
988 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
989.machine pop
990
991#ifdef CONFIG_PPC_ISERIES
992 mtspr SPRN_SRR0,r11
993 mtspr SPRN_SRR1,r12
994#endif /* CONFIG_PPC_ISERIES */
995 ld r9,PACA_EXSLB+EX_R9(r13)
996 ld r10,PACA_EXSLB+EX_R10(r13)
997 ld r11,PACA_EXSLB+EX_R11(r13)
998 ld r12,PACA_EXSLB+EX_R12(r13)
999 ld r13,PACA_EXSLB+EX_R13(r13)
1000 rfid
1001 b . /* prevent speculative execution */
1002
1003unrecov_slb:
1004 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1005 DISABLE_INTS
1006 bl .save_nvgprs
10071: addi r3,r1,STACK_FRAME_OVERHEAD
1008 bl .unrecoverable_exception
1009 b 1b
1010
812 .align 7 1011 .align 7
813 .globl hardware_interrupt_common 1012 .globl hardware_interrupt_common
814 .globl hardware_interrupt_entry 1013 .globl hardware_interrupt_entry
@@ -1139,62 +1338,6 @@ _GLOBAL(do_stab_bolted)
1139 b . /* prevent speculative execution */ 1338 b . /* prevent speculative execution */
1140 1339
1141/* 1340/*
1142 * r13 points to the PACA, r9 contains the saved CR,
1143 * r11 and r12 contain the saved SRR0 and SRR1.
1144 * r3 has the faulting address
1145 * r9 - r13 are saved in paca->exslb.
1146 * r3 is saved in paca->slb_r3
1147 * We assume we aren't going to take any exceptions during this procedure.
1148 */
1149_GLOBAL(do_slb_miss)
1150 mflr r10
1151
1152 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
1153 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
1154
1155 bl .slb_allocate /* handle it */
1156
1157 /* All done -- return from exception. */
1158
1159 ld r10,PACA_EXSLB+EX_LR(r13)
1160 ld r3,PACA_EXSLB+EX_R3(r13)
1161 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
1162#ifdef CONFIG_PPC_ISERIES
1163 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
1164#endif /* CONFIG_PPC_ISERIES */
1165
1166 mtlr r10
1167
1168 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
1169 beq- unrecov_slb
1170
1171.machine push
1172.machine "power4"
1173 mtcrf 0x80,r9
1174 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
1175.machine pop
1176
1177#ifdef CONFIG_PPC_ISERIES
1178 mtspr SPRN_SRR0,r11
1179 mtspr SPRN_SRR1,r12
1180#endif /* CONFIG_PPC_ISERIES */
1181 ld r9,PACA_EXSLB+EX_R9(r13)
1182 ld r10,PACA_EXSLB+EX_R10(r13)
1183 ld r11,PACA_EXSLB+EX_R11(r13)
1184 ld r12,PACA_EXSLB+EX_R12(r13)
1185 ld r13,PACA_EXSLB+EX_R13(r13)
1186 rfid
1187 b . /* prevent speculative execution */
1188
1189unrecov_slb:
1190 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1191 DISABLE_INTS
1192 bl .save_nvgprs
11931: addi r3,r1,STACK_FRAME_OVERHEAD
1194 bl .unrecoverable_exception
1195 b 1b
1196
1197/*
1198 * Space for CPU0's segment table. 1341 * Space for CPU0's segment table.
1199 * 1342 *
1200 * On iSeries, the hypervisor must fill in at least one entry before 1343 * On iSeries, the hypervisor must fill in at least one entry before
@@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start)
1569#endif 1712#endif
1570 /* Initialize the first segment table (or SLB) entry */ 1713 /* Initialize the first segment table (or SLB) entry */
1571 ld r3,PACASTABVIRT(r13) /* get addr of segment table */ 1714 ld r3,PACASTABVIRT(r13) /* get addr of segment table */
1715BEGIN_FTR_SECTION
1572 bl .stab_initialize 1716 bl .stab_initialize
1717END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
1718 bl .slb_initialize
1573 1719
1574 /* Initialize the kernel stack. Just a repeat for iSeries. */ 1720 /* Initialize the kernel stack. Just a repeat for iSeries. */
1575 LOADADDR(r3,current_set) 1721 LOADADDR(r3,current_set)
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index eded971d1bf9..5a05a797485f 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
25 .xRanges = { 25 .xRanges = {
26 { .xPages = HvPagesToMap, 26 { .xPages = HvPagesToMap,
27 .xOffset = 0, 27 .xOffset = 0,
28 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), 28 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
29 }, 29 },
30 }, 30 },
31}; 31};
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 29f6e875cf1c..de69fb37c731 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -552,12 +552,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
552#ifdef CONFIG_PPC64 552#ifdef CONFIG_PPC64
553 if (cpu_has_feature(CPU_FTR_SLB)) { 553 if (cpu_has_feature(CPU_FTR_SLB)) {
554 unsigned long sp_vsid = get_kernel_vsid(sp); 554 unsigned long sp_vsid = get_kernel_vsid(sp);
555 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
555 556
556 sp_vsid <<= SLB_VSID_SHIFT; 557 sp_vsid <<= SLB_VSID_SHIFT;
557 sp_vsid |= SLB_VSID_KERNEL; 558 sp_vsid |= SLB_VSID_KERNEL | llp;
558 if (cpu_has_feature(CPU_FTR_16M_PAGE))
559 sp_vsid |= SLB_VSID_L;
560
561 p->thread.ksp_vsid = sp_vsid; 559 p->thread.ksp_vsid = sp_vsid;
562 } 560 }
563 561
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eec2da695508..3675ef4bac90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset)
724 * used to extract the memory informations at boot before we can 724 * used to extract the memory informations at boot before we can
725 * unflatten the tree 725 * unflatten the tree
726 */ 726 */
727static int __init scan_flat_dt(int (*it)(unsigned long node, 727int __init of_scan_flat_dt(int (*it)(unsigned long node,
728 const char *uname, int depth, 728 const char *uname, int depth,
729 void *data), 729 void *data),
730 void *data) 730 void *data)
731{ 731{
732 unsigned long p = ((unsigned long)initial_boot_params) + 732 unsigned long p = ((unsigned long)initial_boot_params) +
733 initial_boot_params->off_dt_struct; 733 initial_boot_params->off_dt_struct;
@@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
784 * This function can be used within scan_flattened_dt callback to get 784 * This function can be used within scan_flattened_dt callback to get
785 * access to properties 785 * access to properties
786 */ 786 */
787static void* __init get_flat_dt_prop(unsigned long node, const char *name, 787void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
788 unsigned long *size) 788 unsigned long *size)
789{ 789{
790 unsigned long p = node; 790 unsigned long p = node;
791 791
@@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void)
1087static int __init early_init_dt_scan_cpus(unsigned long node, 1087static int __init early_init_dt_scan_cpus(unsigned long node,
1088 const char *uname, int depth, void *data) 1088 const char *uname, int depth, void *data)
1089{ 1089{
1090 char *type = get_flat_dt_prop(node, "device_type", NULL); 1090 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
1091 u32 *prop; 1091 u32 *prop;
1092 unsigned long size = 0; 1092 unsigned long size = 0;
1093 1093
@@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1095 if (type == NULL || strcmp(type, "cpu") != 0) 1095 if (type == NULL || strcmp(type, "cpu") != 0)
1096 return 0; 1096 return 0;
1097 1097
1098#ifdef CONFIG_PPC_PSERIES
1099 /* On LPAR, look for the first ibm,pft-size property for the hash table size
1100 */
1101 if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
1102 u32 *pft_size;
1103 pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
1104 if (pft_size != NULL) {
1105 /* pft_size[0] is the NUMA CEC cookie */
1106 ppc64_pft_size = pft_size[1];
1107 }
1108 }
1109#endif
1110
1111 boot_cpuid = 0; 1098 boot_cpuid = 0;
1112 boot_cpuid_phys = 0; 1099 boot_cpuid_phys = 0;
1113 if (initial_boot_params && initial_boot_params->version >= 2) { 1100 if (initial_boot_params && initial_boot_params->version >= 2) {
@@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1117 boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; 1104 boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
1118 } else { 1105 } else {
1119 /* Check if it's the boot-cpu, set it's hw index now */ 1106 /* Check if it's the boot-cpu, set it's hw index now */
1120 if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { 1107 if (of_get_flat_dt_prop(node,
1121 prop = get_flat_dt_prop(node, "reg", NULL); 1108 "linux,boot-cpu", NULL) != NULL) {
1109 prop = of_get_flat_dt_prop(node, "reg", NULL);
1122 if (prop != NULL) 1110 if (prop != NULL)
1123 boot_cpuid_phys = *prop; 1111 boot_cpuid_phys = *prop;
1124 } 1112 }
@@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1127 1115
1128#ifdef CONFIG_ALTIVEC 1116#ifdef CONFIG_ALTIVEC
1129 /* Check if we have a VMX and eventually update CPU features */ 1117 /* Check if we have a VMX and eventually update CPU features */
1130 prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size); 1118 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
1131 if (prop && (*prop) > 0) { 1119 if (prop && (*prop) > 0) {
1132 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; 1120 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
1133 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; 1121 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
1134 } 1122 }
1135 1123
1136 /* Same goes for Apple's "altivec" property */ 1124 /* Same goes for Apple's "altivec" property */
1137 prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); 1125 prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
1138 if (prop) { 1126 if (prop) {
1139 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; 1127 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
1140 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; 1128 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
1147 * this by looking at the size of the ibm,ppc-interrupt-server#s 1135 * this by looking at the size of the ibm,ppc-interrupt-server#s
1148 * property 1136 * property
1149 */ 1137 */
1150 prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", 1138 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
1151 &size); 1139 &size);
1152 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; 1140 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
1153 if (prop && ((size / sizeof(u32)) > 1)) 1141 if (prop && ((size / sizeof(u32)) > 1))
@@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1170 return 0; 1158 return 0;
1171 1159
1172 /* get platform type */ 1160 /* get platform type */
1173 prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); 1161 prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
1174 if (prop == NULL) 1162 if (prop == NULL)
1175 return 0; 1163 return 0;
1176#ifdef CONFIG_PPC64 1164#ifdef CONFIG_PPC64
@@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1183 1171
1184#ifdef CONFIG_PPC64 1172#ifdef CONFIG_PPC64
1185 /* check if iommu is forced on or off */ 1173 /* check if iommu is forced on or off */
1186 if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) 1174 if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
1187 iommu_is_off = 1; 1175 iommu_is_off = 1;
1188 if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) 1176 if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
1189 iommu_force_on = 1; 1177 iommu_force_on = 1;
1190#endif 1178#endif
1191 1179
1192 lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL); 1180 lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
1193 if (lprop) 1181 if (lprop)
1194 memory_limit = *lprop; 1182 memory_limit = *lprop;
1195 1183
1196#ifdef CONFIG_PPC64 1184#ifdef CONFIG_PPC64
1197 lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); 1185 lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
1198 if (lprop) 1186 if (lprop)
1199 tce_alloc_start = *lprop; 1187 tce_alloc_start = *lprop;
1200 lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); 1188 lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
1201 if (lprop) 1189 if (lprop)
1202 tce_alloc_end = *lprop; 1190 tce_alloc_end = *lprop;
1203#endif 1191#endif
@@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1209 { 1197 {
1210 u64 *basep, *entryp; 1198 u64 *basep, *entryp;
1211 1199
1212 basep = get_flat_dt_prop(node, "linux,rtas-base", NULL); 1200 basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
1213 entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL); 1201 entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
1214 prop = get_flat_dt_prop(node, "linux,rtas-size", NULL); 1202 prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
1215 if (basep && entryp && prop) { 1203 if (basep && entryp && prop) {
1216 rtas.base = *basep; 1204 rtas.base = *basep;
1217 rtas.entry = *entryp; 1205 rtas.entry = *entryp;
@@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
1232 if (depth != 0) 1220 if (depth != 0)
1233 return 0; 1221 return 0;
1234 1222
1235 prop = get_flat_dt_prop(node, "#size-cells", NULL); 1223 prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
1236 dt_root_size_cells = (prop == NULL) ? 1 : *prop; 1224 dt_root_size_cells = (prop == NULL) ? 1 : *prop;
1237 DBG("dt_root_size_cells = %x\n", dt_root_size_cells); 1225 DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
1238 1226
1239 prop = get_flat_dt_prop(node, "#address-cells", NULL); 1227 prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
1240 dt_root_addr_cells = (prop == NULL) ? 2 : *prop; 1228 dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
1241 DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); 1229 DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
1242 1230
@@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
1271static int __init early_init_dt_scan_memory(unsigned long node, 1259static int __init early_init_dt_scan_memory(unsigned long node,
1272 const char *uname, int depth, void *data) 1260 const char *uname, int depth, void *data)
1273{ 1261{
1274 char *type = get_flat_dt_prop(node, "device_type", NULL); 1262 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
1275 cell_t *reg, *endp; 1263 cell_t *reg, *endp;
1276 unsigned long l; 1264 unsigned long l;
1277 1265
@@ -1279,7 +1267,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
1279 if (type == NULL || strcmp(type, "memory") != 0) 1267 if (type == NULL || strcmp(type, "memory") != 0)
1280 return 0; 1268 return 0;
1281 1269
1282 reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); 1270 reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
1283 if (reg == NULL) 1271 if (reg == NULL)
1284 return 0; 1272 return 0;
1285 1273
@@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params)
1343 * device-tree, including the platform type, initrd location and 1331 * device-tree, including the platform type, initrd location and
1344 * size, TCE reserve, and more ... 1332 * size, TCE reserve, and more ...
1345 */ 1333 */
1346 scan_flat_dt(early_init_dt_scan_chosen, NULL); 1334 of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
1347 1335
1348 /* Scan memory nodes and rebuild LMBs */ 1336 /* Scan memory nodes and rebuild LMBs */
1349 lmb_init(); 1337 lmb_init();
1350 scan_flat_dt(early_init_dt_scan_root, NULL); 1338 of_scan_flat_dt(early_init_dt_scan_root, NULL);
1351 scan_flat_dt(early_init_dt_scan_memory, NULL); 1339 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
1352 lmb_enforce_memory_limit(memory_limit); 1340 lmb_enforce_memory_limit(memory_limit);
1353 lmb_analyze(); 1341 lmb_analyze();
1354#ifdef CONFIG_PPC64 1342#ifdef CONFIG_PPC64
@@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params)
1363 1351
1364 DBG("Scanning CPUs ...\n"); 1352 DBG("Scanning CPUs ...\n");
1365 1353
1366 /* Retreive hash table size from flattened tree plus other 1354 /* Retreive CPU related informations from the flat tree
1367 * CPU related informations (altivec support, boot CPU ID, ...) 1355 * (altivec support, boot CPU ID, ...)
1368 */ 1356 */
1369 scan_flat_dt(early_init_dt_scan_cpus, NULL); 1357 of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
1370 1358
1371 DBG(" <- early_init_devtree()\n"); 1359 DBG(" <- early_init_devtree()\n");
1372} 1360}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5f8154f95f96..785fd9d7b386 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -271,16 +271,21 @@ void __init early_setup(unsigned long dt_ptr)
271 DBG("Found, Initializing memory management...\n"); 271 DBG("Found, Initializing memory management...\n");
272 272
273 /* 273 /*
274 * Initialize stab / SLB management 274 * Initialize the MMU Hash table and create the linear mapping
275 * of memory. Has to be done before stab/slb initialization as
276 * this is currently where the page size encoding is obtained
275 */ 277 */
276 if (!firmware_has_feature(FW_FEATURE_ISERIES)) 278 htab_initialize();
277 stab_initialize(lpaca->stab_real);
278 279
279 /* 280 /*
280 * Initialize the MMU Hash table and create the linear mapping 281 * Initialize stab / SLB management except on iSeries
281 * of memory
282 */ 282 */
283 htab_initialize(); 283 if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
284 if (cpu_has_feature(CPU_FTR_SLB))
285 slb_initialize();
286 else
287 stab_initialize(lpaca->stab_real);
288 }
284 289
285 DBG(" <- early_setup()\n"); 290 DBG(" <- early_setup()\n");
286} 291}
@@ -545,10 +550,12 @@ static void __init irqstack_early_init(void)
545 * SLB misses on them. 550 * SLB misses on them.
546 */ 551 */
547 for_each_cpu(i) { 552 for_each_cpu(i) {
548 softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, 553 softirq_ctx[i] = (struct thread_info *)
549 THREAD_SIZE, 0x10000000)); 554 __va(lmb_alloc_base(THREAD_SIZE,
550 hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, 555 THREAD_SIZE, 0x10000000));
551 THREAD_SIZE, 0x10000000)); 556 hardirq_ctx[i] = (struct thread_info *)
557 __va(lmb_alloc_base(THREAD_SIZE,
558 THREAD_SIZE, 0x10000000));
552 } 559 }
553} 560}
554#else 561#else
@@ -576,8 +583,8 @@ static void __init emergency_stack_init(void)
576 limit = min(0x10000000UL, lmb.rmo_size); 583 limit = min(0x10000000UL, lmb.rmo_size);
577 584
578 for_each_cpu(i) 585 for_each_cpu(i)
579 paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, 586 paca[i].emergency_sp =
580 limit)) + PAGE_SIZE; 587 __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
581} 588}
582 589
583/* 590/*
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 733d61618bbf..40523b140109 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -11,7 +11,7 @@
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/ppc_asm.h> 12#include <asm/ppc_asm.h>
13 13
14_GLOBAL(copy_page) 14_GLOBAL(copy_4K_page)
15 std r31,-8(1) 15 std r31,-8(1)
16 std r30,-16(1) 16 std r30,-16(1)
17 std r29,-24(1) 17 std r29,-24(1)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a0b3fbbd6fb1..6d69ef39b7df 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
24 std r4,-16(r1) 24 std r4,-16(r1)
25 std r5,-8(r1) 25 std r5,-8(r1)
26 dcbt 0,r4 26 dcbt 0,r4
27 beq .Lcopy_page 27 beq .Lcopy_page_4K
28 andi. r6,r6,7 28 andi. r6,r6,7
29 mtcrf 0x01,r5 29 mtcrf 0x01,r5
30 blt cr1,.Lshort_copy 30 blt cr1,.Lshort_copy
@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
366 * above (following the .Ldst_aligned label) but it runs slightly 366 * above (following the .Ldst_aligned label) but it runs slightly
367 * slower on POWER3. 367 * slower on POWER3.
368 */ 368 */
369.Lcopy_page: 369.Lcopy_page_4K:
370 std r31,-32(1) 370 std r31,-32(1)
371 std r30,-40(1) 371 std r30,-40(1)
372 std r29,-48(1) 372 std r29,-48(1)
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eeea..e0d02c4a2615 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * ppc64 MMU hashtable management routines 2 * ppc64 MMU hashtable management routines
3 * 3 *
4 * (c) Copyright IBM Corp. 2003 4 * (c) Copyright IBM Corp. 2003, 2005
5 * 5 *
6 * Maintained by: Benjamin Herrenschmidt 6 * Maintained by: Benjamin Herrenschmidt
7 * <benh@kernel.crashing.org> 7 * <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
10 * described in the kernel's COPYING file. 10 * described in the kernel's COPYING file.
11 */ 11 */
12 12
13#include <linux/config.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/mmu.h> 16#include <asm/mmu.h>
@@ -42,14 +43,24 @@
42/* Save non-volatile offsets */ 43/* Save non-volatile offsets */
43#define STK_REG(i) (112 + ((i)-14)*8) 44#define STK_REG(i) (112 + ((i)-14)*8)
44 45
46
47#ifndef CONFIG_PPC_64K_PAGES
48
49/*****************************************************************************
50 * *
51 * 4K SW & 4K HW pages implementation *
52 * *
53 *****************************************************************************/
54
55
45/* 56/*
46 * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, 57 * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
47 * pte_t *ptep, unsigned long trap, int local) 58 * pte_t *ptep, unsigned long trap, int local)
48 * 59 *
49 * Adds a page to the hash table. This is the non-LPAR version for now 60 * Adds a 4K page to the hash table in a segment of 4K pages only
50 */ 61 */
51 62
52_GLOBAL(__hash_page) 63_GLOBAL(__hash_page_4K)
53 mflr r0 64 mflr r0
54 std r0,16(r1) 65 std r0,16(r1)
55 stdu r1,-STACKFRAMESIZE(r1) 66 stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
88 /* If so, just bail out and refault if needed. Someone else 99 /* If so, just bail out and refault if needed. Someone else
89 * is changing this PTE anyway and might hash it. 100 * is changing this PTE anyway and might hash it.
90 */ 101 */
91 bne- bail_ok 102 bne- htab_bail_ok
103
92 /* Prepare new PTE value (turn access RW into DIRTY, then 104 /* Prepare new PTE value (turn access RW into DIRTY, then
93 * add BUSY,HASHPTE and ACCESSED) 105 * add BUSY,HASHPTE and ACCESSED)
94 */ 106 */
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
118 130
119 /* Convert linux PTE bits into HW equivalents */ 131 /* Convert linux PTE bits into HW equivalents */
120 andi. r3,r30,0x1fe /* Get basic set of flags */ 132 andi. r3,r30,0x1fe /* Get basic set of flags */
121 xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ 133 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
122 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ 134 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
123 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ 135 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
124 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ 136 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
125 andc r0,r30,r0 /* r0 = pte & ~r0 */ 137 andc r0,r30,r0 /* r0 = pte & ~r0 */
126 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ 138 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
127 139
@@ -158,19 +170,21 @@ htab_insert_pte:
158 andc r30,r30,r0 170 andc r30,r30,r0
159 ori r30,r30,_PAGE_HASHPTE 171 ori r30,r30,_PAGE_HASHPTE
160 172
161 /* page number in r5 */ 173 /* physical address r5 */
162 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 174 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
175 sldi r5,r5,PAGE_SHIFT
163 176
164 /* Calculate primary group hash */ 177 /* Calculate primary group hash */
165 and r0,r28,r27 178 and r0,r28,r27
166 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 179 rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
167 180
168 /* Call ppc_md.hpte_insert */ 181 /* Call ppc_md.hpte_insert */
169 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 182 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
170 mr r4,r29 /* Retreive va */ 183 mr r4,r29 /* Retreive va */
171 li r6,0 /* no vflags */ 184 li r7,0 /* !bolted, !secondary */
185 li r8,MMU_PAGE_4K /* page size */
172_GLOBAL(htab_call_hpte_insert1) 186_GLOBAL(htab_call_hpte_insert1)
173 bl . /* Will be patched by htab_finish_init() */ 187 bl . /* Patched by htab_finish_init() */
174 cmpdi 0,r3,0 188 cmpdi 0,r3,0
175 bge htab_pte_insert_ok /* Insertion successful */ 189 bge htab_pte_insert_ok /* Insertion successful */
176 cmpdi 0,r3,-2 /* Critical failure */ 190 cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
178 192
179 /* Now try secondary slot */ 193 /* Now try secondary slot */
180 194
181 /* page number in r5 */ 195 /* physical address r5 */
182 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT 196 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
197 sldi r5,r5,PAGE_SHIFT
183 198
184 /* Calculate secondary group hash */ 199 /* Calculate secondary group hash */
185 andc r0,r27,r28 200 andc r0,r27,r28
186 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 201 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
187 202
188 /* Call ppc_md.hpte_insert */ 203 /* Call ppc_md.hpte_insert */
189 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ 204 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
190 mr r4,r29 /* Retreive va */ 205 mr r4,r29 /* Retreive va */
191 li r6,HPTE_V_SECONDARY@l /* secondary slot */ 206 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
207 li r8,MMU_PAGE_4K /* page size */
192_GLOBAL(htab_call_hpte_insert2) 208_GLOBAL(htab_call_hpte_insert2)
193 bl . /* Will be patched by htab_finish_init() */ 209 bl . /* Patched by htab_finish_init() */
194 cmpdi 0,r3,0 210 cmpdi 0,r3,0
195 bge+ htab_pte_insert_ok /* Insertion successful */ 211 bge+ htab_pte_insert_ok /* Insertion successful */
196 cmpdi 0,r3,-2 /* Critical failure */ 212 cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
207 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 223 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
208 /* Call ppc_md.hpte_remove */ 224 /* Call ppc_md.hpte_remove */
209_GLOBAL(htab_call_hpte_remove) 225_GLOBAL(htab_call_hpte_remove)
210 bl . /* Will be patched by htab_finish_init() */ 226 bl . /* Patched by htab_finish_init() */
211 227
212 /* Try all again */ 228 /* Try all again */
213 b htab_insert_pte 229 b htab_insert_pte
214 230
215bail_ok: 231htab_bail_ok:
216 li r3,0 232 li r3,0
217 b bail 233 b htab_bail
218 234
219htab_pte_insert_ok: 235htab_pte_insert_ok:
220 /* Insert slot number & secondary bit in PTE */ 236 /* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
227 ld r6,STK_PARM(r6)(r1) 243 ld r6,STK_PARM(r6)(r1)
228 std r30,0(r6) 244 std r30,0(r6)
229 li r3, 0 245 li r3, 0
230bail: 246htab_bail:
231 ld r27,STK_REG(r27)(r1) 247 ld r27,STK_REG(r27)(r1)
232 ld r28,STK_REG(r28)(r1) 248 ld r28,STK_REG(r28)(r1)
233 ld r29,STK_REG(r29)(r1) 249 ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
256 272
257 /* Call ppc_md.hpte_updatepp */ 273 /* Call ppc_md.hpte_updatepp */
258 mr r5,r29 /* va */ 274 mr r5,r29 /* va */
259 li r6,0 /* large is 0 */ 275 li r6,MMU_PAGE_4K /* page size */
260 ld r7,STK_PARM(r8)(r1) /* get "local" param */ 276 ld r7,STK_PARM(r8)(r1) /* get "local" param */
261_GLOBAL(htab_call_hpte_updatepp) 277_GLOBAL(htab_call_hpte_updatepp)
262 bl . /* Will be patched by htab_finish_init() */ 278 bl . /* Patched by htab_finish_init() */
263 279
264 /* if we failed because typically the HPTE wasn't really here 280 /* if we failed because typically the HPTE wasn't really here
265 * we try an insertion. 281 * we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
276 /* Bail out clearing reservation */ 292 /* Bail out clearing reservation */
277 stdcx. r31,0,r6 293 stdcx. r31,0,r6
278 li r3,1 294 li r3,1
279 b bail 295 b htab_bail
296
297htab_pte_insert_failure:
298 /* Bail out restoring old PTE */
299 ld r6,STK_PARM(r6)(r1)
300 std r31,0(r6)
301 li r3,-1
302 b htab_bail
303
304
305#else /* CONFIG_PPC_64K_PAGES */
306
307
308/*****************************************************************************
309 * *
310 * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
311 * *
312 *****************************************************************************/
313
314/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
315 * pte_t *ptep, unsigned long trap, int local)
316 */
317
318/*
319 * For now, we do NOT implement Admixed pages
320 */
321_GLOBAL(__hash_page_4K)
322 mflr r0
323 std r0,16(r1)
324 stdu r1,-STACKFRAMESIZE(r1)
325 /* Save all params that we need after a function call */
326 std r6,STK_PARM(r6)(r1)
327 std r8,STK_PARM(r8)(r1)
328
329 /* Add _PAGE_PRESENT to access */
330 ori r4,r4,_PAGE_PRESENT
331
332 /* Save non-volatile registers.
333 * r31 will hold "old PTE"
334 * r30 is "new PTE"
335 * r29 is "va"
336 * r28 is a hash value
337 * r27 is hashtab mask (maybe dynamic patched instead ?)
338 * r26 is the hidx mask
339 * r25 is the index in combo page
340 */
341 std r25,STK_REG(r25)(r1)
342 std r26,STK_REG(r26)(r1)
343 std r27,STK_REG(r27)(r1)
344 std r28,STK_REG(r28)(r1)
345 std r29,STK_REG(r29)(r1)
346 std r30,STK_REG(r30)(r1)
347 std r31,STK_REG(r31)(r1)
348
349 /* Step 1:
350 *
351 * Check permissions, atomically mark the linux PTE busy
352 * and hashed.
353 */
3541:
355 ldarx r31,0,r6
356 /* Check access rights (access & ~(pte_val(*ptep))) */
357 andc. r0,r4,r31
358 bne- htab_wrong_access
359 /* Check if PTE is busy */
360 andi. r0,r31,_PAGE_BUSY
361 /* If so, just bail out and refault if needed. Someone else
362 * is changing this PTE anyway and might hash it.
363 */
364 bne- htab_bail_ok
365 /* Prepare new PTE value (turn access RW into DIRTY, then
366 * add BUSY and ACCESSED)
367 */
368 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
369 or r30,r30,r31
370 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
371 /* Write the linux PTE atomically (setting busy) */
372 stdcx. r30,0,r6
373 bne- 1b
374 isync
375
376 /* Step 2:
377 *
378 * Insert/Update the HPTE in the hash table. At this point,
379 * r4 (access) is re-useable, we use it for the new HPTE flags
380 */
381
382 /* Load the hidx index */
383 rldicl r25,r3,64-12,60
384
385 /* Calc va and put it in r29 */
386 rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
387 rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
388 or r29,r3,r29 /* r29 = va
389
390 /* Calculate hash value for primary slot and store it in r28 */
391 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
392 rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
393 xor r28,r5,r0
394
395 /* Convert linux PTE bits into HW equivalents */
396 andi. r3,r30,0x1fe /* Get basic set of flags */
397 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
398 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
399 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
400 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
401 andc r0,r30,r0 /* r0 = pte & ~r0 */
402 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
403
404 /* We eventually do the icache sync here (maybe inline that
405 * code rather than call a C function...)
406 */
407BEGIN_FTR_SECTION
408 mr r4,r30
409 mr r5,r7
410 bl .hash_page_do_lazy_icache
411END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
412
413 /* At this point, r3 contains new PP bits, save them in
414 * place of "access" in the param area (sic)
415 */
416 std r3,STK_PARM(r4)(r1)
417
418 /* Get htab_hash_mask */
419 ld r4,htab_hash_mask@got(2)
420 ld r27,0(r4) /* htab_hash_mask -> r27 */
421
422 /* Check if we may already be in the hashtable, in this case, we
423 * go to out-of-line code to try to modify the HPTE. We look for
424 * the bit at (1 >> (index + 32))
425 */
426 andi. r0,r31,_PAGE_HASHPTE
427 li r26,0 /* Default hidx */
428 beq htab_insert_pte
429 ld r6,STK_PARM(r6)(r1)
430 ori r26,r6,0x8000 /* Load the hidx mask */
431 ld r26,0(r26)
432 addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
433 rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
434 bne htab_modify_pte
435
436htab_insert_pte:
437 /* real page number in r5, PTE RPN value + index */
438 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
439 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
440 add r5,r5,r25
441 sldi r5,r5,HW_PAGE_SHIFT
442
443 /* Calculate primary group hash */
444 and r0,r28,r27
445 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
446
447 /* Call ppc_md.hpte_insert */
448 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
449 mr r4,r29 /* Retreive va */
450 li r7,0 /* !bolted, !secondary */
451 li r8,MMU_PAGE_4K /* page size */
452_GLOBAL(htab_call_hpte_insert1)
453 bl . /* patched by htab_finish_init() */
454 cmpdi 0,r3,0
455 bge htab_pte_insert_ok /* Insertion successful */
456 cmpdi 0,r3,-2 /* Critical failure */
457 beq- htab_pte_insert_failure
458
459 /* Now try secondary slot */
460
461 /* real page number in r5, PTE RPN value + index */
462 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
463 sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
464 add r5,r5,r25
465 sldi r5,r5,HW_PAGE_SHIFT
466
467 /* Calculate secondary group hash */
468 andc r0,r27,r28
469 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
470
471 /* Call ppc_md.hpte_insert */
472 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
473 mr r4,r29 /* Retreive va */
474 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
475 li r8,MMU_PAGE_4K /* page size */
476_GLOBAL(htab_call_hpte_insert2)
477 bl . /* patched by htab_finish_init() */
478 cmpdi 0,r3,0
479 bge+ htab_pte_insert_ok /* Insertion successful */
480 cmpdi 0,r3,-2 /* Critical failure */
481 beq- htab_pte_insert_failure
482
483 /* Both are full, we need to evict something */
484 mftb r0
485 /* Pick a random group based on TB */
486 andi. r0,r0,1
487 mr r5,r28
488 bne 2f
489 not r5,r5
4902: and r0,r5,r27
491 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
492 /* Call ppc_md.hpte_remove */
493_GLOBAL(htab_call_hpte_remove)
494 bl . /* patched by htab_finish_init() */
495
496 /* Try all again */
497 b htab_insert_pte
498
499htab_bail_ok:
500 li r3,0
501 b htab_bail
502
503htab_pte_insert_ok:
504 /* Insert slot number & secondary bit in PTE second half,
505 * clear _PAGE_BUSY and set approriate HPTE slot bit
506 */
507 ld r6,STK_PARM(r6)(r1)
508 li r0,_PAGE_BUSY
509 andc r30,r30,r0
510 /* HPTE SUB bit */
511 li r0,1
512 subfic r5,r25,27 /* Must match bit position in */
513 sld r0,r0,r5 /* pgtable.h */
514 or r30,r30,r0
515 /* hindx */
516 sldi r5,r25,2
517 sld r3,r3,r5
518 li r4,0xf
519 sld r4,r4,r5
520 andc r26,r26,r4
521 or r26,r26,r3
522 ori r5,r6,0x8000
523 std r26,0(r5)
524 lwsync
525 std r30,0(r6)
526 li r3, 0
527htab_bail:
528 ld r25,STK_REG(r25)(r1)
529 ld r26,STK_REG(r26)(r1)
530 ld r27,STK_REG(r27)(r1)
531 ld r28,STK_REG(r28)(r1)
532 ld r29,STK_REG(r29)(r1)
533 ld r30,STK_REG(r30)(r1)
534 ld r31,STK_REG(r31)(r1)
535 addi r1,r1,STACKFRAMESIZE
536 ld r0,16(r1)
537 mtlr r0
538 blr
539
540htab_modify_pte:
541 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
542 mr r4,r3
543 sldi r5,r25,2
544 srd r3,r26,r5
545
546 /* Secondary group ? if yes, get a inverted hash value */
547 mr r5,r28
548 andi. r0,r3,0x8 /* page secondary ? */
549 beq 1f
550 not r5,r5
5511: andi. r3,r3,0x7 /* extract idx alone */
552
553 /* Calculate proper slot value for ppc_md.hpte_updatepp */
554 and r0,r5,r27
555 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
556 add r3,r0,r3 /* add slot idx */
557
558 /* Call ppc_md.hpte_updatepp */
559 mr r5,r29 /* va */
560 li r6,MMU_PAGE_4K /* page size */
561 ld r7,STK_PARM(r8)(r1) /* get "local" param */
562_GLOBAL(htab_call_hpte_updatepp)
563 bl . /* patched by htab_finish_init() */
564
565 /* if we failed because typically the HPTE wasn't really here
566 * we try an insertion.
567 */
568 cmpdi 0,r3,-1
569 beq- htab_insert_pte
570
571 /* Clear the BUSY bit and Write out the PTE */
572 li r0,_PAGE_BUSY
573 andc r30,r30,r0
574 ld r6,STK_PARM(r6)(r1)
575 std r30,0(r6)
576 li r3,0
577 b htab_bail
578
579htab_wrong_access:
580 /* Bail out clearing reservation */
581 stdcx. r31,0,r6
582 li r3,1
583 b htab_bail
280 584
281htab_pte_insert_failure: 585htab_pte_insert_failure:
282 /* Bail out restoring old PTE */ 586 /* Bail out restoring old PTE */
283 ld r6,STK_PARM(r6)(r1) 587 ld r6,STK_PARM(r6)(r1)
284 std r31,0(r6) 588 std r31,0(r6)
285 li r3,-1 589 li r3,-1
286 b bail 590 b htab_bail
591
592
593/*****************************************************************************
594 * *
595 * 64K SW & 64K HW in a 64K segment pages implementation *
596 * *
597 *****************************************************************************/
598
599_GLOBAL(__hash_page_64K)
600 mflr r0
601 std r0,16(r1)
602 stdu r1,-STACKFRAMESIZE(r1)
603 /* Save all params that we need after a function call */
604 std r6,STK_PARM(r6)(r1)
605 std r8,STK_PARM(r8)(r1)
606
607 /* Add _PAGE_PRESENT to access */
608 ori r4,r4,_PAGE_PRESENT
609
610 /* Save non-volatile registers.
611 * r31 will hold "old PTE"
612 * r30 is "new PTE"
613 * r29 is "va"
614 * r28 is a hash value
615 * r27 is hashtab mask (maybe dynamic patched instead ?)
616 */
617 std r27,STK_REG(r27)(r1)
618 std r28,STK_REG(r28)(r1)
619 std r29,STK_REG(r29)(r1)
620 std r30,STK_REG(r30)(r1)
621 std r31,STK_REG(r31)(r1)
622
623 /* Step 1:
624 *
625 * Check permissions, atomically mark the linux PTE busy
626 * and hashed.
627 */
6281:
629 ldarx r31,0,r6
630 /* Check access rights (access & ~(pte_val(*ptep))) */
631 andc. r0,r4,r31
632 bne- ht64_wrong_access
633 /* Check if PTE is busy */
634 andi. r0,r31,_PAGE_BUSY
635 /* If so, just bail out and refault if needed. Someone else
636 * is changing this PTE anyway and might hash it.
637 */
638 bne- ht64_bail_ok
639 /* Prepare new PTE value (turn access RW into DIRTY, then
640 * add BUSY,HASHPTE and ACCESSED)
641 */
642 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
643 or r30,r30,r31
644 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
645 /* Write the linux PTE atomically (setting busy) */
646 stdcx. r30,0,r6
647 bne- 1b
648 isync
649
650 /* Step 2:
651 *
652 * Insert/Update the HPTE in the hash table. At this point,
653 * r4 (access) is re-useable, we use it for the new HPTE flags
654 */
655
656 /* Calc va and put it in r29 */
657 rldicr r29,r5,28,63-28
658 rldicl r3,r3,0,36
659 or r29,r3,r29
660
661 /* Calculate hash value for primary slot and store it in r28 */
662 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
663 rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
664 xor r28,r5,r0
665
666 /* Convert linux PTE bits into HW equivalents */
667 andi. r3,r30,0x1fe /* Get basic set of flags */
668 xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
669 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
670 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
671 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
672 andc r0,r30,r0 /* r0 = pte & ~r0 */
673 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
674
675 /* We eventually do the icache sync here (maybe inline that
676 * code rather than call a C function...)
677 */
678BEGIN_FTR_SECTION
679 mr r4,r30
680 mr r5,r7
681 bl .hash_page_do_lazy_icache
682END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
683
684 /* At this point, r3 contains new PP bits, save them in
685 * place of "access" in the param area (sic)
686 */
687 std r3,STK_PARM(r4)(r1)
688
689 /* Get htab_hash_mask */
690 ld r4,htab_hash_mask@got(2)
691 ld r27,0(r4) /* htab_hash_mask -> r27 */
692
693 /* Check if we may already be in the hashtable, in this case, we
694 * go to out-of-line code to try to modify the HPTE
695 */
696 andi. r0,r31,_PAGE_HASHPTE
697 bne ht64_modify_pte
698
699ht64_insert_pte:
700 /* Clear hpte bits in new pte (we also clear BUSY btw) and
701 * add _PAGE_HASHPTE
702 */
703 lis r0,_PAGE_HPTEFLAGS@h
704 ori r0,r0,_PAGE_HPTEFLAGS@l
705 andc r30,r30,r0
706 ori r30,r30,_PAGE_HASHPTE
707
708 /* Phyical address in r5 */
709 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
710 sldi r5,r5,PAGE_SHIFT
711
712 /* Calculate primary group hash */
713 and r0,r28,r27
714 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
715
716 /* Call ppc_md.hpte_insert */
717 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
718 mr r4,r29 /* Retreive va */
719 li r7,0 /* !bolted, !secondary */
720 li r8,MMU_PAGE_64K
721_GLOBAL(ht64_call_hpte_insert1)
722 bl . /* patched by htab_finish_init() */
723 cmpdi 0,r3,0
724 bge ht64_pte_insert_ok /* Insertion successful */
725 cmpdi 0,r3,-2 /* Critical failure */
726 beq- ht64_pte_insert_failure
727
728 /* Now try secondary slot */
729
730 /* Phyical address in r5 */
731 rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
732 sldi r5,r5,PAGE_SHIFT
733
734 /* Calculate secondary group hash */
735 andc r0,r27,r28
736 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
737
738 /* Call ppc_md.hpte_insert */
739 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
740 mr r4,r29 /* Retreive va */
741 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
742 li r8,MMU_PAGE_64K
743_GLOBAL(ht64_call_hpte_insert2)
744 bl . /* patched by htab_finish_init() */
745 cmpdi 0,r3,0
746 bge+ ht64_pte_insert_ok /* Insertion successful */
747 cmpdi 0,r3,-2 /* Critical failure */
748 beq- ht64_pte_insert_failure
749
750 /* Both are full, we need to evict something */
751 mftb r0
752 /* Pick a random group based on TB */
753 andi. r0,r0,1
754 mr r5,r28
755 bne 2f
756 not r5,r5
7572: and r0,r5,r27
758 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
759 /* Call ppc_md.hpte_remove */
760_GLOBAL(ht64_call_hpte_remove)
761 bl . /* patched by htab_finish_init() */
762
763 /* Try all again */
764 b ht64_insert_pte
765
766ht64_bail_ok:
767 li r3,0
768 b ht64_bail
769
770ht64_pte_insert_ok:
771 /* Insert slot number & secondary bit in PTE */
772 rldimi r30,r3,12,63-15
773
774 /* Write out the PTE with a normal write
775 * (maybe add eieio may be good still ?)
776 */
777ht64_write_out_pte:
778 ld r6,STK_PARM(r6)(r1)
779 std r30,0(r6)
780 li r3, 0
781ht64_bail:
782 ld r27,STK_REG(r27)(r1)
783 ld r28,STK_REG(r28)(r1)
784 ld r29,STK_REG(r29)(r1)
785 ld r30,STK_REG(r30)(r1)
786 ld r31,STK_REG(r31)(r1)
787 addi r1,r1,STACKFRAMESIZE
788 ld r0,16(r1)
789 mtlr r0
790 blr
791
792ht64_modify_pte:
793 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
794 mr r4,r3
795 rlwinm r3,r31,32-12,29,31
796
797 /* Secondary group ? if yes, get a inverted hash value */
798 mr r5,r28
799 andi. r0,r31,_PAGE_F_SECOND
800 beq 1f
801 not r5,r5
8021:
803 /* Calculate proper slot value for ppc_md.hpte_updatepp */
804 and r0,r5,r27
805 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
806 add r3,r0,r3 /* add slot idx */
807
808 /* Call ppc_md.hpte_updatepp */
809 mr r5,r29 /* va */
810 li r6,MMU_PAGE_64K
811 ld r7,STK_PARM(r8)(r1) /* get "local" param */
812_GLOBAL(ht64_call_hpte_updatepp)
813 bl . /* patched by htab_finish_init() */
814
815 /* if we failed because typically the HPTE wasn't really here
816 * we try an insertion.
817 */
818 cmpdi 0,r3,-1
819 beq- ht64_insert_pte
820
821 /* Clear the BUSY bit and Write out the PTE */
822 li r0,_PAGE_BUSY
823 andc r30,r30,r0
824 b ht64_write_out_pte
825
826ht64_wrong_access:
827 /* Bail out clearing reservation */
828 stdcx. r31,0,r6
829 li r3,1
830 b ht64_bail
831
832ht64_pte_insert_failure:
833 /* Bail out restoring old PTE */
834 ld r6,STK_PARM(r6)(r1)
835 std r31,0(r6)
836 li r3,-1
837 b ht64_bail
838
839
840#endif /* CONFIG_PPC_64K_PAGES */
287 841
288 842
843/*****************************************************************************
844 * *
845 * Huge pages implementation is in hugetlbpage.c *
846 * *
847 *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c28..d96bcfe4c6f6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12
13#undef DEBUG_LOW
14
12#include <linux/spinlock.h> 15#include <linux/spinlock.h>
13#include <linux/bitops.h> 16#include <linux/bitops.h>
14#include <linux/threads.h> 17#include <linux/threads.h>
@@ -22,11 +25,84 @@
22#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
23#include <asm/tlb.h> 26#include <asm/tlb.h>
24#include <asm/cputable.h> 27#include <asm/cputable.h>
28#include <asm/udbg.h>
29
30#ifdef DEBUG_LOW
31#define DBG_LOW(fmt...) udbg_printf(fmt)
32#else
33#define DBG_LOW(fmt...)
34#endif
25 35
26#define HPTE_LOCK_BIT 3 36#define HPTE_LOCK_BIT 3
27 37
28static DEFINE_SPINLOCK(native_tlbie_lock); 38static DEFINE_SPINLOCK(native_tlbie_lock);
29 39
40static inline void __tlbie(unsigned long va, unsigned int psize)
41{
42 unsigned int penc;
43
44 /* clear top 16 bits, non SLS segment */
45 va &= ~(0xffffULL << 48);
46
47 switch (psize) {
48 case MMU_PAGE_4K:
49 va &= ~0xffful;
50 asm volatile("tlbie %0,0" : : "r" (va) : "memory");
51 break;
52 default:
53 penc = mmu_psize_defs[psize].penc;
54 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
55 va |= (0x7f >> (8 - penc)) << 12;
56 asm volatile("tlbie %0,1" : : "r" (va) : "memory");
57 break;
58 }
59}
60
61static inline void __tlbiel(unsigned long va, unsigned int psize)
62{
63 unsigned int penc;
64
65 /* clear top 16 bits, non SLS segment */
66 va &= ~(0xffffULL << 48);
67
68 switch (psize) {
69 case MMU_PAGE_4K:
70 va &= ~0xffful;
71 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
72 : : "r"(va) : "memory");
73 break;
74 default:
75 penc = mmu_psize_defs[psize].penc;
76 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
77 va |= (0x7f >> (8 - penc)) << 12;
78 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
79 : : "r"(va) : "memory");
80 break;
81 }
82
83}
84
85static inline void tlbie(unsigned long va, int psize, int local)
86{
87 unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
88 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
89
90 if (use_local)
91 use_local = mmu_psize_defs[psize].tlbiel;
92 if (lock_tlbie && !use_local)
93 spin_lock(&native_tlbie_lock);
94 asm volatile("ptesync": : :"memory");
95 if (use_local) {
96 __tlbiel(va, psize);
97 asm volatile("ptesync": : :"memory");
98 } else {
99 __tlbie(va, psize);
100 asm volatile("eieio; tlbsync; ptesync": : :"memory");
101 }
102 if (lock_tlbie && !use_local)
103 spin_unlock(&native_tlbie_lock);
104}
105
30static inline void native_lock_hpte(hpte_t *hptep) 106static inline void native_lock_hpte(hpte_t *hptep)
31{ 107{
32 unsigned long *word = &hptep->v; 108 unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
48} 124}
49 125
50long native_hpte_insert(unsigned long hpte_group, unsigned long va, 126long native_hpte_insert(unsigned long hpte_group, unsigned long va,
51 unsigned long prpn, unsigned long vflags, 127 unsigned long pa, unsigned long rflags,
52 unsigned long rflags) 128 unsigned long vflags, int psize)
53{ 129{
54 hpte_t *hptep = htab_address + hpte_group; 130 hpte_t *hptep = htab_address + hpte_group;
55 unsigned long hpte_v, hpte_r; 131 unsigned long hpte_v, hpte_r;
56 int i; 132 int i;
57 133
134 if (!(vflags & HPTE_V_BOLTED)) {
135 DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
136 " rflags=%lx, vflags=%lx, psize=%d)\n",
137 hpte_group, va, pa, rflags, vflags, psize);
138 }
139
58 for (i = 0; i < HPTES_PER_GROUP; i++) { 140 for (i = 0; i < HPTES_PER_GROUP; i++) {
59 if (! (hptep->v & HPTE_V_VALID)) { 141 if (! (hptep->v & HPTE_V_VALID)) {
60 /* retry with lock held */ 142 /* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
70 if (i == HPTES_PER_GROUP) 152 if (i == HPTES_PER_GROUP)
71 return -1; 153 return -1;
72 154
73 hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; 155 hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
74 if (vflags & HPTE_V_LARGE) 156 hpte_r = hpte_encode_r(pa, psize) | rflags;
75 va &= ~(1UL << HPTE_V_AVPN_SHIFT); 157
76 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; 158 if (!(vflags & HPTE_V_BOLTED)) {
159 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
160 i, hpte_v, hpte_r);
161 }
77 162
78 hptep->r = hpte_r; 163 hptep->r = hpte_r;
79 /* Guarantee the second dword is visible before the valid bit */ 164 /* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
96 int slot_offset; 181 int slot_offset;
97 unsigned long hpte_v; 182 unsigned long hpte_v;
98 183
184 DBG_LOW(" remove(group=%lx)\n", hpte_group);
185
99 /* pick a random entry to start at */ 186 /* pick a random entry to start at */
100 slot_offset = mftb() & 0x7; 187 slot_offset = mftb() & 0x7;
101 188
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
126 return i; 213 return i;
127} 214}
128 215
129static inline void set_pp_bit(unsigned long pp, hpte_t *addr) 216static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
217 unsigned long va, int psize, int local)
130{ 218{
131 unsigned long old; 219 hpte_t *hptep = htab_address + slot;
132 unsigned long *p = &addr->r; 220 unsigned long hpte_v, want_v;
133 221 int ret = 0;
134 __asm__ __volatile__( 222
135 "1: ldarx %0,0,%3\n\ 223 want_v = hpte_encode_v(va, psize);
136 rldimi %0,%2,0,61\n\ 224
137 stdcx. %0,0,%3\n\ 225 DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
138 bne 1b" 226 va, want_v & HPTE_V_AVPN, slot, newpp);
139 : "=&r" (old), "=m" (*p) 227
140 : "r" (pp), "r" (p), "m" (*p) 228 native_lock_hpte(hptep);
141 : "cc"); 229
230 hpte_v = hptep->v;
231
232 /* Even if we miss, we need to invalidate the TLB */
233 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
234 DBG_LOW(" -> miss\n");
235 native_unlock_hpte(hptep);
236 ret = -1;
237 } else {
238 DBG_LOW(" -> hit\n");
239 /* Update the HPTE */
240 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
241 (newpp & (HPTE_R_PP | HPTE_R_N));
242 native_unlock_hpte(hptep);
243 }
244
245 /* Ensure it is out of the tlb too. */
246 tlbie(va, psize, local);
247
248 return ret;
142} 249}
143 250
144/* 251static long native_hpte_find(unsigned long va, int psize)
145 * Only works on small pages. Yes its ugly to have to check each slot in
146 * the group but we only use this during bootup.
147 */
148static long native_hpte_find(unsigned long vpn)
149{ 252{
150 hpte_t *hptep; 253 hpte_t *hptep;
151 unsigned long hash; 254 unsigned long hash;
152 unsigned long i, j; 255 unsigned long i, j;
153 long slot; 256 long slot;
154 unsigned long hpte_v; 257 unsigned long want_v, hpte_v;
155 258
156 hash = hpt_hash(vpn, 0); 259 hash = hpt_hash(va, mmu_psize_defs[psize].shift);
260 want_v = hpte_encode_v(va, psize);
157 261
158 for (j = 0; j < 2; j++) { 262 for (j = 0; j < 2; j++) {
159 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 263 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
161 hptep = htab_address + slot; 265 hptep = htab_address + slot;
162 hpte_v = hptep->v; 266 hpte_v = hptep->v;
163 267
164 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) 268 if (HPTE_V_COMPARE(hpte_v, want_v)
165 && (hpte_v & HPTE_V_VALID) 269 && (hpte_v & HPTE_V_VALID)
166 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { 270 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
167 /* HPTE matches */ 271 /* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
177 return -1; 281 return -1;
178} 282}
179 283
180static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
181 unsigned long va, int large, int local)
182{
183 hpte_t *hptep = htab_address + slot;
184 unsigned long hpte_v;
185 unsigned long avpn = va >> 23;
186 int ret = 0;
187
188 if (large)
189 avpn &= ~1;
190
191 native_lock_hpte(hptep);
192
193 hpte_v = hptep->v;
194
195 /* Even if we miss, we need to invalidate the TLB */
196 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
197 || !(hpte_v & HPTE_V_VALID)) {
198 native_unlock_hpte(hptep);
199 ret = -1;
200 } else {
201 set_pp_bit(newpp, hptep);
202 native_unlock_hpte(hptep);
203 }
204
205 /* Ensure it is out of the tlb too */
206 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
207 tlbiel(va);
208 } else {
209 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
210
211 if (lock_tlbie)
212 spin_lock(&native_tlbie_lock);
213 tlbie(va, large);
214 if (lock_tlbie)
215 spin_unlock(&native_tlbie_lock);
216 }
217
218 return ret;
219}
220
221/* 284/*
222 * Update the page protection bits. Intended to be used to create 285 * Update the page protection bits. Intended to be used to create
223 * guard pages for kernel data structures on pages which are bolted 286 * guard pages for kernel data structures on pages which are bolted
224 * in the HPT. Assumes pages being operated on will not be stolen. 287 * in the HPT. Assumes pages being operated on will not be stolen.
225 * Does not work on large pages.
226 * 288 *
227 * No need to lock here because we should be the only user. 289 * No need to lock here because we should be the only user.
228 */ 290 */
229static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) 291static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
292 int psize)
230{ 293{
231 unsigned long vsid, va, vpn, flags = 0; 294 unsigned long vsid, va;
232 long slot; 295 long slot;
233 hpte_t *hptep; 296 hpte_t *hptep;
234 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
235 297
236 vsid = get_kernel_vsid(ea); 298 vsid = get_kernel_vsid(ea);
237 va = (vsid << 28) | (ea & 0x0fffffff); 299 va = (vsid << 28) | (ea & 0x0fffffff);
238 vpn = va >> PAGE_SHIFT;
239 300
240 slot = native_hpte_find(vpn); 301 slot = native_hpte_find(va, psize);
241 if (slot == -1) 302 if (slot == -1)
242 panic("could not find page to bolt\n"); 303 panic("could not find page to bolt\n");
243 hptep = htab_address + slot; 304 hptep = htab_address + slot;
244 305
245 set_pp_bit(newpp, hptep); 306 /* Update the HPTE */
307 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
308 (newpp & (HPTE_R_PP | HPTE_R_N));
246 309
247 /* Ensure it is out of the tlb too */ 310 /* Ensure it is out of the tlb too. */
248 if (lock_tlbie) 311 tlbie(va, psize, 0);
249 spin_lock_irqsave(&native_tlbie_lock, flags);
250 tlbie(va, 0);
251 if (lock_tlbie)
252 spin_unlock_irqrestore(&native_tlbie_lock, flags);
253} 312}
254 313
255static void native_hpte_invalidate(unsigned long slot, unsigned long va, 314static void native_hpte_invalidate(unsigned long slot, unsigned long va,
256 int large, int local) 315 int psize, int local)
257{ 316{
258 hpte_t *hptep = htab_address + slot; 317 hpte_t *hptep = htab_address + slot;
259 unsigned long hpte_v; 318 unsigned long hpte_v;
260 unsigned long avpn = va >> 23; 319 unsigned long want_v;
261 unsigned long flags; 320 unsigned long flags;
262 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
263
264 if (large)
265 avpn &= ~1;
266 321
267 local_irq_save(flags); 322 local_irq_save(flags);
268 native_lock_hpte(hptep);
269 323
324 DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
325
326 want_v = hpte_encode_v(va, psize);
327 native_lock_hpte(hptep);
270 hpte_v = hptep->v; 328 hpte_v = hptep->v;
271 329
272 /* Even if we miss, we need to invalidate the TLB */ 330 /* Even if we miss, we need to invalidate the TLB */
273 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) 331 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
274 || !(hpte_v & HPTE_V_VALID)) {
275 native_unlock_hpte(hptep); 332 native_unlock_hpte(hptep);
276 } else { 333 else
277 /* Invalidate the hpte. NOTE: this also unlocks it */ 334 /* Invalidate the hpte. NOTE: this also unlocks it */
278 hptep->v = 0; 335 hptep->v = 0;
279 }
280 336
281 /* Invalidate the tlb */ 337 /* Invalidate the TLB */
282 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 338 tlbie(va, psize, local);
283 tlbiel(va); 339
284 } else {
285 if (lock_tlbie)
286 spin_lock(&native_tlbie_lock);
287 tlbie(va, large);
288 if (lock_tlbie)
289 spin_unlock(&native_tlbie_lock);
290 }
291 local_irq_restore(flags); 340 local_irq_restore(flags);
292} 341}
293 342
294/* 343/*
344 * XXX This need fixing based on page size. It's only used by
345 * native_hpte_clear() for now which needs fixing too so they
346 * make a good pair...
347 */
348static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
349{
350 unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
351 unsigned long va;
352
353 va = avpn << 23;
354
355 if (! (hpte_v & HPTE_V_LARGE)) {
356 unsigned long vpi, pteg;
357
358 pteg = slot / HPTES_PER_GROUP;
359 if (hpte_v & HPTE_V_SECONDARY)
360 pteg = ~pteg;
361
362 vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
363
364 va |= vpi << PAGE_SHIFT;
365 }
366
367 return va;
368}
369
370/*
295 * clear all mappings on kexec. All cpus are in real mode (or they will 371 * clear all mappings on kexec. All cpus are in real mode (or they will
296 * be when they isi), and we are the only one left. We rely on our kernel 372 * be when they isi), and we are the only one left. We rely on our kernel
297 * mapping being 0xC0's and the hardware ignoring those two real bits. 373 * mapping being 0xC0's and the hardware ignoring those two real bits.
298 * 374 *
299 * TODO: add batching support when enabled. remember, no dynamic memory here, 375 * TODO: add batching support when enabled. remember, no dynamic memory here,
300 * athough there is the control page available... 376 * athough there is the control page available...
377 *
378 * XXX FIXME: 4k only for now !
301 */ 379 */
302static void native_hpte_clear(void) 380static void native_hpte_clear(void)
303{ 381{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
327 405
328 if (hpte_v & HPTE_V_VALID) { 406 if (hpte_v & HPTE_V_VALID) {
329 hptep->v = 0; 407 hptep->v = 0;
330 tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); 408 tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
331 } 409 }
332 } 410 }
333 411
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
335 local_irq_restore(flags); 413 local_irq_restore(flags);
336} 414}
337 415
416/*
417 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
418 * the lock all the time
419 */
338static void native_flush_hash_range(unsigned long number, int local) 420static void native_flush_hash_range(unsigned long number, int local)
339{ 421{
340 unsigned long va, vpn, hash, secondary, slot, flags, avpn; 422 unsigned long va, hash, index, hidx, shift, slot;
341 int i, j;
342 hpte_t *hptep; 423 hpte_t *hptep;
343 unsigned long hpte_v; 424 unsigned long hpte_v;
425 unsigned long want_v;
426 unsigned long flags;
427 real_pte_t pte;
344 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 428 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
345 unsigned long large = batch->large; 429 unsigned long psize = batch->psize;
430 int i;
346 431
347 local_irq_save(flags); 432 local_irq_save(flags);
348 433
349 j = 0;
350 for (i = 0; i < number; i++) { 434 for (i = 0; i < number; i++) {
351 va = batch->vaddr[j]; 435 va = batch->vaddr[i];
352 if (large) 436 pte = batch->pte[i];
353 vpn = va >> HPAGE_SHIFT; 437
354 else 438 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
355 vpn = va >> PAGE_SHIFT; 439 hash = hpt_hash(va, shift);
356 hash = hpt_hash(vpn, large); 440 hidx = __rpte_to_hidx(pte, index);
357 secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; 441 if (hidx & _PTEIDX_SECONDARY)
358 if (secondary) 442 hash = ~hash;
359 hash = ~hash; 443 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
360 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 444 slot += hidx & _PTEIDX_GROUP_IX;
361 slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; 445 hptep = htab_address + slot;
362 446 want_v = hpte_encode_v(va, psize);
363 hptep = htab_address + slot; 447 native_lock_hpte(hptep);
364 448 hpte_v = hptep->v;
365 avpn = va >> 23; 449 if (!HPTE_V_COMPARE(hpte_v, want_v) ||
366 if (large) 450 !(hpte_v & HPTE_V_VALID))
367 avpn &= ~0x1UL; 451 native_unlock_hpte(hptep);
368 452 else
369 native_lock_hpte(hptep); 453 hptep->v = 0;
370 454 } pte_iterate_hashed_end();
371 hpte_v = hptep->v;
372
373 /* Even if we miss, we need to invalidate the TLB */
374 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
375 || !(hpte_v & HPTE_V_VALID)) {
376 native_unlock_hpte(hptep);
377 } else {
378 /* Invalidate the hpte. NOTE: this also unlocks it */
379 hptep->v = 0;
380 }
381
382 j++;
383 } 455 }
384 456
385 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { 457 if (cpu_has_feature(CPU_FTR_TLBIEL) &&
458 mmu_psize_defs[psize].tlbiel && local) {
386 asm volatile("ptesync":::"memory"); 459 asm volatile("ptesync":::"memory");
387 460 for (i = 0; i < number; i++) {
388 for (i = 0; i < j; i++) 461 va = batch->vaddr[i];
389 __tlbiel(batch->vaddr[i]); 462 pte = batch->pte[i];
390 463
464 pte_iterate_hashed_subpages(pte, psize, va, index,
465 shift) {
466 __tlbiel(va, psize);
467 } pte_iterate_hashed_end();
468 }
391 asm volatile("ptesync":::"memory"); 469 asm volatile("ptesync":::"memory");
392 } else { 470 } else {
393 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); 471 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
396 spin_lock(&native_tlbie_lock); 474 spin_lock(&native_tlbie_lock);
397 475
398 asm volatile("ptesync":::"memory"); 476 asm volatile("ptesync":::"memory");
399 477 for (i = 0; i < number; i++) {
400 for (i = 0; i < j; i++) 478 va = batch->vaddr[i];
401 __tlbie(batch->vaddr[i], large); 479 pte = batch->pte[i];
402 480
481 pte_iterate_hashed_subpages(pte, psize, va, index,
482 shift) {
483 __tlbie(va, psize);
484 } pte_iterate_hashed_end();
485 }
403 asm volatile("eieio; tlbsync; ptesync":::"memory"); 486 asm volatile("eieio; tlbsync; ptesync":::"memory");
404 487
405 if (lock_tlbie) 488 if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b6970c96d96f..37273f518a35 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#undef DEBUG 21#undef DEBUG
22#undef DEBUG_LOW
22 23
23#include <linux/config.h> 24#include <linux/config.h>
24#include <linux/spinlock.h> 25#include <linux/spinlock.h>
@@ -58,6 +59,15 @@
58#define DBG(fmt...) 59#define DBG(fmt...)
59#endif 60#endif
60 61
62#ifdef DEBUG_LOW
63#define DBG_LOW(fmt...) udbg_printf(fmt)
64#else
65#define DBG_LOW(fmt...)
66#endif
67
68#define KB (1024)
69#define MB (1024*KB)
70
61/* 71/*
62 * Note: pte --> Linux PTE 72 * Note: pte --> Linux PTE
63 * HPTE --> PowerPC Hashed Page Table Entry 73 * HPTE --> PowerPC Hashed Page Table Entry
@@ -76,91 +86,290 @@ extern unsigned long dart_tablebase;
76 86
77hpte_t *htab_address; 87hpte_t *htab_address;
78unsigned long htab_hash_mask; 88unsigned long htab_hash_mask;
79
80unsigned long _SDR1; 89unsigned long _SDR1;
90struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
91int mmu_linear_psize = MMU_PAGE_4K;
92int mmu_virtual_psize = MMU_PAGE_4K;
93#ifdef CONFIG_HUGETLB_PAGE
94int mmu_huge_psize = MMU_PAGE_16M;
95unsigned int HPAGE_SHIFT;
96#endif
81 97
82#define KB (1024) 98/* There are definitions of page sizes arrays to be used when none
83#define MB (1024*KB) 99 * is provided by the firmware.
84 100 */
85static inline void loop_forever(void)
86{
87 volatile unsigned long x = 1;
88 for(;x;x|=1)
89 ;
90}
91 101
92static inline void create_pte_mapping(unsigned long start, unsigned long end, 102/* Pre-POWER4 CPUs (4k pages only)
93 unsigned long mode, int large) 103 */
104struct mmu_psize_def mmu_psize_defaults_old[] = {
105 [MMU_PAGE_4K] = {
106 .shift = 12,
107 .sllp = 0,
108 .penc = 0,
109 .avpnm = 0,
110 .tlbiel = 0,
111 },
112};
113
114/* POWER4, GPUL, POWER5
115 *
116 * Support for 16Mb large pages
117 */
118struct mmu_psize_def mmu_psize_defaults_gp[] = {
119 [MMU_PAGE_4K] = {
120 .shift = 12,
121 .sllp = 0,
122 .penc = 0,
123 .avpnm = 0,
124 .tlbiel = 1,
125 },
126 [MMU_PAGE_16M] = {
127 .shift = 24,
128 .sllp = SLB_VSID_L,
129 .penc = 0,
130 .avpnm = 0x1UL,
131 .tlbiel = 0,
132 },
133};
134
135
136int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
137 unsigned long pstart, unsigned long mode, int psize)
94{ 138{
95 unsigned long addr; 139 unsigned long vaddr, paddr;
96 unsigned int step; 140 unsigned int step, shift;
97 unsigned long tmp_mode; 141 unsigned long tmp_mode;
98 unsigned long vflags; 142 int ret = 0;
99 143
100 if (large) { 144 shift = mmu_psize_defs[psize].shift;
101 step = 16*MB; 145 step = 1 << shift;
102 vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
103 } else {
104 step = 4*KB;
105 vflags = HPTE_V_BOLTED;
106 }
107 146
108 for (addr = start; addr < end; addr += step) { 147 for (vaddr = vstart, paddr = pstart; vaddr < vend;
148 vaddr += step, paddr += step) {
109 unsigned long vpn, hash, hpteg; 149 unsigned long vpn, hash, hpteg;
110 unsigned long vsid = get_kernel_vsid(addr); 150 unsigned long vsid = get_kernel_vsid(vaddr);
111 unsigned long va = (vsid << 28) | (addr & 0xfffffff); 151 unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
112 int ret = -1;
113
114 if (large)
115 vpn = va >> HPAGE_SHIFT;
116 else
117 vpn = va >> PAGE_SHIFT;
118
119 152
153 vpn = va >> shift;
120 tmp_mode = mode; 154 tmp_mode = mode;
121 155
122 /* Make non-kernel text non-executable */ 156 /* Make non-kernel text non-executable */
123 if (!in_kernel_text(addr)) 157 if (!in_kernel_text(vaddr))
124 tmp_mode = mode | HW_NO_EXEC; 158 tmp_mode = mode | HPTE_R_N;
125
126 hash = hpt_hash(vpn, large);
127 159
160 hash = hpt_hash(va, shift);
128 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 161 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
129 162
163 /* The crap below can be cleaned once ppd_md.probe() can
164 * set up the hash callbacks, thus we can just used the
165 * normal insert callback here.
166 */
130#ifdef CONFIG_PPC_ISERIES 167#ifdef CONFIG_PPC_ISERIES
131 if (systemcfg->platform & PLATFORM_ISERIES_LPAR) 168 if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
132 ret = iSeries_hpte_bolt_or_insert(hpteg, va, 169 ret = iSeries_hpte_insert(hpteg, va,
133 virt_to_abs(addr) >> PAGE_SHIFT, 170 virt_to_abs(paddr),
134 vflags, tmp_mode); 171 tmp_mode,
172 HPTE_V_BOLTED,
173 psize);
135 else 174 else
136#endif 175#endif
137#ifdef CONFIG_PPC_PSERIES 176#ifdef CONFIG_PPC_PSERIES
138 if (systemcfg->platform & PLATFORM_LPAR) 177 if (systemcfg->platform & PLATFORM_LPAR)
139 ret = pSeries_lpar_hpte_insert(hpteg, va, 178 ret = pSeries_lpar_hpte_insert(hpteg, va,
140 virt_to_abs(addr) >> PAGE_SHIFT, 179 virt_to_abs(paddr),
141 vflags, tmp_mode); 180 tmp_mode,
181 HPTE_V_BOLTED,
182 psize);
142 else 183 else
143#endif 184#endif
144#ifdef CONFIG_PPC_MULTIPLATFORM 185#ifdef CONFIG_PPC_MULTIPLATFORM
145 ret = native_hpte_insert(hpteg, va, 186 ret = native_hpte_insert(hpteg, va,
146 virt_to_abs(addr) >> PAGE_SHIFT, 187 virt_to_abs(paddr),
147 vflags, tmp_mode); 188 tmp_mode, HPTE_V_BOLTED,
189 psize);
148#endif 190#endif
191 if (ret < 0)
192 break;
193 }
194 return ret < 0 ? ret : 0;
195}
149 196
150 if (ret == -1) { 197static int __init htab_dt_scan_page_sizes(unsigned long node,
151 ppc64_terminate_msg(0x20, "create_pte_mapping"); 198 const char *uname, int depth,
152 loop_forever(); 199 void *data)
200{
201 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
202 u32 *prop;
203 unsigned long size = 0;
204
205 /* We are scanning "cpu" nodes only */
206 if (type == NULL || strcmp(type, "cpu") != 0)
207 return 0;
208
209 prop = (u32 *)of_get_flat_dt_prop(node,
210 "ibm,segment-page-sizes", &size);
211 if (prop != NULL) {
212 DBG("Page sizes from device-tree:\n");
213 size /= 4;
214 cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
215 while(size > 0) {
216 unsigned int shift = prop[0];
217 unsigned int slbenc = prop[1];
218 unsigned int lpnum = prop[2];
219 unsigned int lpenc = 0;
220 struct mmu_psize_def *def;
221 int idx = -1;
222
223 size -= 3; prop += 3;
224 while(size > 0 && lpnum) {
225 if (prop[0] == shift)
226 lpenc = prop[1];
227 prop += 2; size -= 2;
228 lpnum--;
229 }
230 switch(shift) {
231 case 0xc:
232 idx = MMU_PAGE_4K;
233 break;
234 case 0x10:
235 idx = MMU_PAGE_64K;
236 break;
237 case 0x14:
238 idx = MMU_PAGE_1M;
239 break;
240 case 0x18:
241 idx = MMU_PAGE_16M;
242 cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
243 break;
244 case 0x22:
245 idx = MMU_PAGE_16G;
246 break;
247 }
248 if (idx < 0)
249 continue;
250 def = &mmu_psize_defs[idx];
251 def->shift = shift;
252 if (shift <= 23)
253 def->avpnm = 0;
254 else
255 def->avpnm = (1 << (shift - 23)) - 1;
256 def->sllp = slbenc;
257 def->penc = lpenc;
258 /* We don't know for sure what's up with tlbiel, so
259 * for now we only set it for 4K and 64K pages
260 */
261 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
262 def->tlbiel = 1;
263 else
264 def->tlbiel = 0;
265
266 DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
267 "tlbiel=%d, penc=%d\n",
268 idx, shift, def->sllp, def->avpnm, def->tlbiel,
269 def->penc);
153 } 270 }
271 return 1;
272 }
273 return 0;
274}
275
276
277static void __init htab_init_page_sizes(void)
278{
279 int rc;
280
281 /* Default to 4K pages only */
282 memcpy(mmu_psize_defs, mmu_psize_defaults_old,
283 sizeof(mmu_psize_defaults_old));
284
285 /*
286 * Try to find the available page sizes in the device-tree
287 */
288 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
289 if (rc != 0) /* Found */
290 goto found;
291
292 /*
293 * Not in the device-tree, let's fallback on known size
294 * list for 16M capable GP & GR
295 */
296 if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
297 cpu_has_feature(CPU_FTR_16M_PAGE))
298 memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
299 sizeof(mmu_psize_defaults_gp));
300 found:
301 /*
302 * Pick a size for the linear mapping. Currently, we only support
303 * 16M, 1M and 4K which is the default
304 */
305 if (mmu_psize_defs[MMU_PAGE_16M].shift)
306 mmu_linear_psize = MMU_PAGE_16M;
307 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
308 mmu_linear_psize = MMU_PAGE_1M;
309
310 /*
311 * Pick a size for the ordinary pages. Default is 4K, we support
312 * 64K if cache inhibited large pages are supported by the
313 * processor
314 */
315#ifdef CONFIG_PPC_64K_PAGES
316 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
317 cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
318 mmu_virtual_psize = MMU_PAGE_64K;
319#endif
320
321 printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
322 mmu_psize_defs[mmu_linear_psize].shift,
323 mmu_psize_defs[mmu_virtual_psize].shift);
324
325#ifdef CONFIG_HUGETLB_PAGE
326 /* Init large page size. Currently, we pick 16M or 1M depending
327 * on what is available
328 */
329 if (mmu_psize_defs[MMU_PAGE_16M].shift)
330 mmu_huge_psize = MMU_PAGE_16M;
331 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
332 mmu_huge_psize = MMU_PAGE_1M;
333
334 /* Calculate HPAGE_SHIFT and sanity check it */
335 if (mmu_psize_defs[mmu_huge_psize].shift > 16 &&
336 mmu_psize_defs[mmu_huge_psize].shift < 28)
337 HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
338 else
339 HPAGE_SHIFT = 0; /* No huge pages dude ! */
340#endif /* CONFIG_HUGETLB_PAGE */
341}
342
343static int __init htab_dt_scan_pftsize(unsigned long node,
344 const char *uname, int depth,
345 void *data)
346{
347 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
348 u32 *prop;
349
350 /* We are scanning "cpu" nodes only */
351 if (type == NULL || strcmp(type, "cpu") != 0)
352 return 0;
353
354 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
355 if (prop != NULL) {
356 /* pft_size[0] is the NUMA CEC cookie */
357 ppc64_pft_size = prop[1];
358 return 1;
154 } 359 }
360 return 0;
155} 361}
156 362
157static unsigned long get_hashtable_size(void) 363static unsigned long __init htab_get_table_size(void)
158{ 364{
159 unsigned long rnd_mem_size, pteg_count; 365 unsigned long rnd_mem_size, pteg_count;
160 366
161 /* If hash size wasn't obtained in prom.c, we calculate it now based on 367 /* If hash size isn't already provided by the platform, we try to
162 * the total RAM size 368 * retreive it from the device-tree. If it's not there neither, we
369 * calculate it now based on the total RAM size
163 */ 370 */
371 if (ppc64_pft_size == 0)
372 of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
164 if (ppc64_pft_size) 373 if (ppc64_pft_size)
165 return 1UL << ppc64_pft_size; 374 return 1UL << ppc64_pft_size;
166 375
@@ -180,17 +389,21 @@ void __init htab_initialize(void)
180 unsigned long table, htab_size_bytes; 389 unsigned long table, htab_size_bytes;
181 unsigned long pteg_count; 390 unsigned long pteg_count;
182 unsigned long mode_rw; 391 unsigned long mode_rw;
183 int i, use_largepages = 0;
184 unsigned long base = 0, size = 0; 392 unsigned long base = 0, size = 0;
393 int i;
394
185 extern unsigned long tce_alloc_start, tce_alloc_end; 395 extern unsigned long tce_alloc_start, tce_alloc_end;
186 396
187 DBG(" -> htab_initialize()\n"); 397 DBG(" -> htab_initialize()\n");
188 398
399 /* Initialize page sizes */
400 htab_init_page_sizes();
401
189 /* 402 /*
190 * Calculate the required size of the htab. We want the number of 403 * Calculate the required size of the htab. We want the number of
191 * PTEGs to equal one half the number of real pages. 404 * PTEGs to equal one half the number of real pages.
192 */ 405 */
193 htab_size_bytes = get_hashtable_size(); 406 htab_size_bytes = htab_get_table_size();
194 pteg_count = htab_size_bytes >> 7; 407 pteg_count = htab_size_bytes >> 7;
195 408
196 htab_hash_mask = pteg_count - 1; 409 htab_hash_mask = pteg_count - 1;
@@ -204,14 +417,11 @@ void __init htab_initialize(void)
204 * the absolute address space. 417 * the absolute address space.
205 */ 418 */
206 table = lmb_alloc(htab_size_bytes, htab_size_bytes); 419 table = lmb_alloc(htab_size_bytes, htab_size_bytes);
420 BUG_ON(table == 0);
207 421
208 DBG("Hash table allocated at %lx, size: %lx\n", table, 422 DBG("Hash table allocated at %lx, size: %lx\n", table,
209 htab_size_bytes); 423 htab_size_bytes);
210 424
211 if ( !table ) {
212 ppc64_terminate_msg(0x20, "hpt space");
213 loop_forever();
214 }
215 htab_address = abs_to_virt(table); 425 htab_address = abs_to_virt(table);
216 426
217 /* htab absolute addr + encoded htabsize */ 427 /* htab absolute addr + encoded htabsize */
@@ -227,8 +437,6 @@ void __init htab_initialize(void)
227 * _NOT_ map it to avoid cache paradoxes as it's remapped non 437 * _NOT_ map it to avoid cache paradoxes as it's remapped non
228 * cacheable later on 438 * cacheable later on
229 */ 439 */
230 if (cpu_has_feature(CPU_FTR_16M_PAGE))
231 use_largepages = 1;
232 440
233 /* create bolted the linear mapping in the hash table */ 441 /* create bolted the linear mapping in the hash table */
234 for (i=0; i < lmb.memory.cnt; i++) { 442 for (i=0; i < lmb.memory.cnt; i++) {
@@ -239,27 +447,32 @@ void __init htab_initialize(void)
239 447
240#ifdef CONFIG_U3_DART 448#ifdef CONFIG_U3_DART
241 /* Do not map the DART space. Fortunately, it will be aligned 449 /* Do not map the DART space. Fortunately, it will be aligned
242 * in such a way that it will not cross two lmb regions and will 450 * in such a way that it will not cross two lmb regions and
243 * fit within a single 16Mb page. 451 * will fit within a single 16Mb page.
244 * The DART space is assumed to be a full 16Mb region even if we 452 * The DART space is assumed to be a full 16Mb region even if
245 * only use 2Mb of that space. We will use more of it later for 453 * we only use 2Mb of that space. We will use more of it later
246 * AGP GART. We have to use a full 16Mb large page. 454 * for AGP GART. We have to use a full 16Mb large page.
247 */ 455 */
248 DBG("DART base: %lx\n", dart_tablebase); 456 DBG("DART base: %lx\n", dart_tablebase);
249 457
250 if (dart_tablebase != 0 && dart_tablebase >= base 458 if (dart_tablebase != 0 && dart_tablebase >= base
251 && dart_tablebase < (base + size)) { 459 && dart_tablebase < (base + size)) {
252 if (base != dart_tablebase) 460 if (base != dart_tablebase)
253 create_pte_mapping(base, dart_tablebase, mode_rw, 461 BUG_ON(htab_bolt_mapping(base, dart_tablebase,
254 use_largepages); 462 base, mode_rw,
463 mmu_linear_psize));
255 if ((base + size) > (dart_tablebase + 16*MB)) 464 if ((base + size) > (dart_tablebase + 16*MB))
256 create_pte_mapping(dart_tablebase + 16*MB, base + size, 465 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
257 mode_rw, use_largepages); 466 base + size,
467 dart_tablebase+16*MB,
468 mode_rw,
469 mmu_linear_psize));
258 continue; 470 continue;
259 } 471 }
260#endif /* CONFIG_U3_DART */ 472#endif /* CONFIG_U3_DART */
261 create_pte_mapping(base, base + size, mode_rw, use_largepages); 473 BUG_ON(htab_bolt_mapping(base, base + size, base,
262 } 474 mode_rw, mmu_linear_psize));
475 }
263 476
264 /* 477 /*
265 * If we have a memory_limit and we've allocated TCEs then we need to 478 * If we have a memory_limit and we've allocated TCEs then we need to
@@ -275,8 +488,9 @@ void __init htab_initialize(void)
275 if (base + size >= tce_alloc_start) 488 if (base + size >= tce_alloc_start)
276 tce_alloc_start = base + size + 1; 489 tce_alloc_start = base + size + 1;
277 490
278 create_pte_mapping(tce_alloc_start, tce_alloc_end, 491 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
279 mode_rw, use_largepages); 492 tce_alloc_start, mode_rw,
493 mmu_linear_psize));
280 } 494 }
281 495
282 DBG(" <- htab_initialize()\n"); 496 DBG(" <- htab_initialize()\n");
@@ -291,9 +505,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
291{ 505{
292 struct page *page; 506 struct page *page;
293 507
294 if (!pfn_valid(pte_pfn(pte)))
295 return pp;
296
297 page = pte_page(pte); 508 page = pte_page(pte);
298 509
299 /* page is dirty */ 510 /* page is dirty */
@@ -302,7 +513,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
302 __flush_dcache_icache(page_address(page)); 513 __flush_dcache_icache(page_address(page));
303 set_bit(PG_arch_1, &page->flags); 514 set_bit(PG_arch_1, &page->flags);
304 } else 515 } else
305 pp |= HW_NO_EXEC; 516 pp |= HPTE_R_N;
306 } 517 }
307 return pp; 518 return pp;
308} 519}
@@ -318,94 +529,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
318 unsigned long vsid; 529 unsigned long vsid;
319 struct mm_struct *mm; 530 struct mm_struct *mm;
320 pte_t *ptep; 531 pte_t *ptep;
321 int ret;
322 int user_region = 0;
323 int local = 0;
324 cpumask_t tmp; 532 cpumask_t tmp;
533 int rc, user_region = 0, local = 0;
325 534
326 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 535 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
327 return 1; 536 ea, access, trap);
328 537
538 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
539 DBG_LOW(" out of pgtable range !\n");
540 return 1;
541 }
542
543 /* Get region & vsid */
329 switch (REGION_ID(ea)) { 544 switch (REGION_ID(ea)) {
330 case USER_REGION_ID: 545 case USER_REGION_ID:
331 user_region = 1; 546 user_region = 1;
332 mm = current->mm; 547 mm = current->mm;
333 if (! mm) 548 if (! mm) {
549 DBG_LOW(" user region with no mm !\n");
334 return 1; 550 return 1;
335 551 }
336 vsid = get_vsid(mm->context.id, ea); 552 vsid = get_vsid(mm->context.id, ea);
337 break; 553 break;
338 case VMALLOC_REGION_ID: 554 case VMALLOC_REGION_ID:
339 mm = &init_mm; 555 mm = &init_mm;
340 vsid = get_kernel_vsid(ea); 556 vsid = get_kernel_vsid(ea);
341 break; 557 break;
342#if 0
343 case KERNEL_REGION_ID:
344 /*
345 * Should never get here - entire 0xC0... region is bolted.
346 * Send the problem up to do_page_fault
347 */
348#endif
349 default: 558 default:
350 /* Not a valid range 559 /* Not a valid range
351 * Send the problem up to do_page_fault 560 * Send the problem up to do_page_fault
352 */ 561 */
353 return 1; 562 return 1;
354 break;
355 } 563 }
564 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
356 565
566 /* Get pgdir */
357 pgdir = mm->pgd; 567 pgdir = mm->pgd;
358
359 if (pgdir == NULL) 568 if (pgdir == NULL)
360 return 1; 569 return 1;
361 570
571 /* Check CPU locality */
362 tmp = cpumask_of_cpu(smp_processor_id()); 572 tmp = cpumask_of_cpu(smp_processor_id());
363 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 573 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
364 local = 1; 574 local = 1;
365 575
366 /* Is this a huge page ? */ 576 /* Handle hugepage regions */
367 if (unlikely(in_hugepage_area(mm->context, ea))) 577 if (unlikely(in_hugepage_area(mm->context, ea))) {
368 ret = hash_huge_page(mm, access, ea, vsid, local); 578 DBG_LOW(" -> huge page !\n");
369 else { 579 return hash_huge_page(mm, access, ea, vsid, local);
370 ptep = find_linux_pte(pgdir, ea); 580 }
371 if (ptep == NULL) 581
372 return 1; 582 /* Get PTE and page size from page tables */
373 ret = __hash_page(ea, access, vsid, ptep, trap, local); 583 ptep = find_linux_pte(pgdir, ea);
584 if (ptep == NULL || !pte_present(*ptep)) {
585 DBG_LOW(" no PTE !\n");
586 return 1;
587 }
588
589#ifndef CONFIG_PPC_64K_PAGES
590 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
591#else
592 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
593 pte_val(*(ptep + PTRS_PER_PTE)));
594#endif
595 /* Pre-check access permissions (will be re-checked atomically
596 * in __hash_page_XX but this pre-check is a fast path
597 */
598 if (access & ~pte_val(*ptep)) {
599 DBG_LOW(" no access !\n");
600 return 1;
374 } 601 }
375 602
376 return ret; 603 /* Do actual hashing */
604#ifndef CONFIG_PPC_64K_PAGES
605 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
606#else
607 if (mmu_virtual_psize == MMU_PAGE_64K)
608 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
609 else
610 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
611#endif /* CONFIG_PPC_64K_PAGES */
612
613#ifndef CONFIG_PPC_64K_PAGES
614 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
615#else
616 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
617 pte_val(*(ptep + PTRS_PER_PTE)));
618#endif
619 DBG_LOW(" -> rc=%d\n", rc);
620 return rc;
377} 621}
378 622
379void flush_hash_page(unsigned long va, pte_t pte, int local) 623void hash_preload(struct mm_struct *mm, unsigned long ea,
624 unsigned long access, unsigned long trap)
380{ 625{
381 unsigned long vpn, hash, secondary, slot; 626 unsigned long vsid;
382 unsigned long huge = pte_huge(pte); 627 void *pgdir;
628 pte_t *ptep;
629 cpumask_t mask;
630 unsigned long flags;
631 int local = 0;
632
633 /* We don't want huge pages prefaulted for now
634 */
635 if (unlikely(in_hugepage_area(mm->context, ea)))
636 return;
637
638 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
639 " trap=%lx\n", mm, mm->pgd, ea, access, trap);
383 640
384 if (huge) 641 /* Get PTE, VSID, access mask */
385 vpn = va >> HPAGE_SHIFT; 642 pgdir = mm->pgd;
643 if (pgdir == NULL)
644 return;
645 ptep = find_linux_pte(pgdir, ea);
646 if (!ptep)
647 return;
648 vsid = get_vsid(mm->context.id, ea);
649
650 /* Hash it in */
651 local_irq_save(flags);
652 mask = cpumask_of_cpu(smp_processor_id());
653 if (cpus_equal(mm->cpu_vm_mask, mask))
654 local = 1;
655#ifndef CONFIG_PPC_64K_PAGES
656 __hash_page_4K(ea, access, vsid, ptep, trap, local);
657#else
658 if (mmu_virtual_psize == MMU_PAGE_64K)
659 __hash_page_64K(ea, access, vsid, ptep, trap, local);
386 else 660 else
387 vpn = va >> PAGE_SHIFT; 661 __hash_page_4K(ea, access, vsid, ptep, trap, local);
388 hash = hpt_hash(vpn, huge); 662#endif /* CONFIG_PPC_64K_PAGES */
389 secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; 663 local_irq_restore(flags);
390 if (secondary) 664}
391 hash = ~hash; 665
392 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 666void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
393 slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; 667{
394 668 unsigned long hash, index, shift, hidx, slot;
395 ppc_md.hpte_invalidate(slot, va, huge, local); 669
670 DBG_LOW("flush_hash_page(va=%016x)\n", va);
671 pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
672 hash = hpt_hash(va, shift);
673 hidx = __rpte_to_hidx(pte, index);
674 if (hidx & _PTEIDX_SECONDARY)
675 hash = ~hash;
676 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
677 slot += hidx & _PTEIDX_GROUP_IX;
678 DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
679 ppc_md.hpte_invalidate(slot, va, psize, local);
680 } pte_iterate_hashed_end();
396} 681}
397 682
398void flush_hash_range(unsigned long number, int local) 683void flush_hash_range(unsigned long number, int local)
399{ 684{
400 if (ppc_md.flush_hash_range) { 685 if (ppc_md.flush_hash_range)
401 ppc_md.flush_hash_range(number, local); 686 ppc_md.flush_hash_range(number, local);
402 } else { 687 else {
403 int i; 688 int i;
404 struct ppc64_tlb_batch *batch = 689 struct ppc64_tlb_batch *batch =
405 &__get_cpu_var(ppc64_tlb_batch); 690 &__get_cpu_var(ppc64_tlb_batch);
406 691
407 for (i = 0; i < number; i++) 692 for (i = 0; i < number; i++)
408 flush_hash_page(batch->vaddr[i], batch->pte[i], local); 693 flush_hash_page(batch->vaddr[i], batch->pte[i],
694 batch->psize, local);
409 } 695 }
410} 696}
411 697
@@ -445,6 +731,18 @@ void __init htab_finish_init(void)
445 extern unsigned int *htab_call_hpte_remove; 731 extern unsigned int *htab_call_hpte_remove;
446 extern unsigned int *htab_call_hpte_updatepp; 732 extern unsigned int *htab_call_hpte_updatepp;
447 733
734#ifdef CONFIG_PPC_64K_PAGES
735 extern unsigned int *ht64_call_hpte_insert1;
736 extern unsigned int *ht64_call_hpte_insert2;
737 extern unsigned int *ht64_call_hpte_remove;
738 extern unsigned int *ht64_call_hpte_updatepp;
739
740 make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
741 make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
742 make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
743 make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
744#endif /* CONFIG_PPC_64K_PAGES */
745
448 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); 746 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
449 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); 747 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
450 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); 748 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974..0073a04047e4 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
47 pu = pud_offset(pg, addr); 47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) { 48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr); 49 pm = pmd_offset(pu, addr);
50#ifdef CONFIG_PPC_64K_PAGES
51 /* Currently, we use the normal PTE offset within full
52 * size PTE pages, thus our huge PTEs are scattered in
53 * the PTE page and we do waste some. We may change
54 * that in the future, but the current mecanism keeps
55 * things much simpler
56 */
57 if (!pmd_none(*pm)) {
58 /* Note: pte_offset_* are all equivalent on
59 * ppc64 as we don't have HIGHMEM
60 */
61 pt = pte_offset_kernel(pm, addr);
62 return pt;
63 }
64#else /* CONFIG_PPC_64K_PAGES */
65 /* On 4k pages, we put huge PTEs in the PMD page */
50 pt = (pte_t *)pm; 66 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt; 67 return pt;
68#endif /* CONFIG_PPC_64K_PAGES */
54 } 69 }
55 } 70 }
56 71
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
74 if (pu) { 89 if (pu) {
75 pm = pmd_alloc(mm, pu, addr); 90 pm = pmd_alloc(mm, pu, addr);
76 if (pm) { 91 if (pm) {
92#ifdef CONFIG_PPC_64K_PAGES
93 /* See comment in huge_pte_offset. Note that if we ever
94 * want to put the page size in the PMD, we would have
95 * to open code our own pte_alloc* function in order
96 * to populate and set the size atomically
97 */
98 pt = pte_alloc_map(mm, pm, addr);
99#else /* CONFIG_PPC_64K_PAGES */
77 pt = (pte_t *)pm; 100 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm) 101#endif /* CONFIG_PPC_64K_PAGES */
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt; 102 return pt;
81 } 103 }
82 } 104 }
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
84 return NULL; 106 return NULL;
85} 107}
86 108
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 109void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte) 110 pte_t *ptep, pte_t pte)
91{ 111{
92 int i;
93
94 if (pte_present(*ptep)) { 112 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep); 113 /* We open-code pte_clear because we need to pass the right
114 * argument to hpte_update (huge / !huge)
115 */
116 unsigned long old = pte_update(ptep, ~0UL);
117 if (old & _PAGE_HASHPTE)
118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
96 flush_tlb_pending(); 119 flush_tlb_pending();
97 } 120 }
98 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103} 122}
104 123
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 124pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep) 125 pte_t *ptep)
107{ 126{
108 unsigned long old = pte_update(ptep, ~0UL); 127 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110 128
111 if (old & _PAGE_HASHPTE) 129 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0); 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
113 131 *ptep = __pte(0);
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116 132
117 return __pte(old); 133 return __pte(old);
118} 134}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
563 int lastshift; 579 int lastshift;
564 u16 areamask, curareas; 580 u16 areamask, curareas;
565 581
582 if (HPAGE_SHIFT == 0)
583 return -EINVAL;
566 if (len & ~HPAGE_MASK) 584 if (len & ~HPAGE_MASK)
567 return -EINVAL; 585 return -EINVAL;
568 586
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local) 637 unsigned long ea, unsigned long vsid, int local)
620{ 638{
621 pte_t *ptep; 639 pte_t *ptep;
622 unsigned long va, vpn; 640 unsigned long old_pte, new_pte;
623 pte_t old_pte, new_pte; 641 unsigned long va, rflags, pa;
624 unsigned long rflags, prpn;
625 long slot; 642 long slot;
626 int err = 1; 643 int err = 1;
627 644
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea); 645 ptep = huge_pte_offset(mm, ea);
631 646
632 /* Search the Linux page table for a match with va */ 647 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff); 648 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635 649
636 /* 650 /*
637 * If no pte found or not present, send the problem up to 651 * If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
640 if (unlikely(!ptep || pte_none(*ptep))) 654 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out; 655 goto out;
642 656
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /* 657 /*
646 * Check the user's access rights to the page. If access should be 658 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault. 659 * prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
661 */ 673 */
662 674
663 675
664 old_pte = *ptep; 676 do {
665 new_pte = old_pte; 677 old_pte = pte_val(*ptep);
666 678 if (old_pte & _PAGE_BUSY)
667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); 679 goto out;
680 new_pte = old_pte | _PAGE_BUSY |
681 _PAGE_ACCESSED | _PAGE_HASHPTE;
682 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
683 old_pte, new_pte));
684
685 rflags = 0x2 | (!(new_pte & _PAGE_RW));
668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 686 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); 687 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
670 688
671 /* Check if pte already has an hpte (case 2) */ 689 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { 690 if (unlikely(old_pte & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */ 691 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot; 692 unsigned long hash, slot;
675 693
676 hash = hpt_hash(vpn, 1); 694 hash = hpt_hash(va, HPAGE_SHIFT);
677 if (pte_val(old_pte) & _PAGE_SECONDARY) 695 if (old_pte & _PAGE_F_SECOND)
678 hash = ~hash; 696 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 697 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; 698 slot += (old_pte & _PAGE_F_GIX) >> 12;
681 699
682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) 700 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; 701 old_pte &= ~_PAGE_HPTEFLAGS;
684 } 702 }
685 703
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { 704 if (likely(!(old_pte & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1); 705 unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
688 unsigned long hpte_group; 706 unsigned long hpte_group;
689 707
690 prpn = pte_pfn(old_pte); 708 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
691 709
692repeat: 710repeat:
693 hpte_group = ((hash & htab_hash_mask) * 711 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL; 712 HPTES_PER_GROUP) & ~0x7UL;
695 713
696 /* Update the linux pte with the HPTE slot */ 714 /* clear HPTE slot informations in new PTE */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; 715 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699 716
700 /* Add in WIMG bits */ 717 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */ 718 /* XXX We should store these in the pte */
719 /* --BenH: I think they are ... */
702 rflags |= _PAGE_COHERENT; 720 rflags |= _PAGE_COHERENT;
703 721
704 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 722 /* Insert into the hash table, primary slot */
705 HPTE_V_LARGE, rflags); 723 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
724 mmu_huge_psize);
706 725
707 /* Primary is full, try the secondary */ 726 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) { 727 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY; 728 new_pte |= _PAGE_F_SECOND;
710 hpte_group = ((~hash & htab_hash_mask) * 729 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL; 730 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 731 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY, 732 HPTE_V_SECONDARY,
715 rflags); 733 mmu_huge_psize);
716 if (slot == -1) { 734 if (slot == -1) {
717 if (mftb() & 0x1) 735 if (mftb() & 0x1)
718 hpte_group = ((hash & htab_hash_mask) * 736 hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
726 if (unlikely(slot == -2)) 744 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n"); 745 panic("hash_huge_page: pte_insert failed\n");
728 746
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; 747 new_pte |= (slot << 12) & _PAGE_F_GIX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 } 748 }
738 749
750 /*
751 * No need to use ldarx/stdcx here because all who
752 * might be updating the pte will hold the
753 * page_table_lock
754 */
755 *ptep = __pte(new_pte & ~_PAGE_BUSY);
756
739 err = 0; 757 err = 0;
740 758
741 out: 759 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err; 760 return err;
745} 761}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index aa98b79e734c..ce974c83d88a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -187,12 +187,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
187 memset(addr, 0, kmem_cache_size(cache)); 187 memset(addr, 0, kmem_cache_size(cache));
188} 188}
189 189
190#ifdef CONFIG_PPC_64K_PAGES
191static const int pgtable_cache_size[2] = {
192 PTE_TABLE_SIZE, PGD_TABLE_SIZE
193};
194static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
195 "pte_pmd_cache", "pgd_cache",
196};
197#else
190static const int pgtable_cache_size[2] = { 198static const int pgtable_cache_size[2] = {
191 PTE_TABLE_SIZE, PMD_TABLE_SIZE 199 PTE_TABLE_SIZE, PMD_TABLE_SIZE
192}; 200};
193static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 201static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
194 "pgd_pte_cache", "pud_pmd_cache", 202 "pgd_pte_cache", "pud_pmd_cache",
195}; 203};
204#endif /* CONFIG_PPC_64K_PAGES */
196 205
197kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 206kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
198 207
@@ -200,19 +209,14 @@ void pgtable_cache_init(void)
200{ 209{
201 int i; 210 int i;
202 211
203 BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
204 BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
205 BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
206 BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
207
208 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { 212 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
209 int size = pgtable_cache_size[i]; 213 int size = pgtable_cache_size[i];
210 const char *name = pgtable_cache_name[i]; 214 const char *name = pgtable_cache_name[i];
211 215
212 pgtable_cache[i] = kmem_cache_create(name, 216 pgtable_cache[i] = kmem_cache_create(name,
213 size, size, 217 size, size,
214 SLAB_HWCACHE_ALIGN 218 SLAB_HWCACHE_ALIGN |
215 | SLAB_MUST_HWCACHE_ALIGN, 219 SLAB_MUST_HWCACHE_ALIGN,
216 zero_ctor, 220 zero_ctor,
217 NULL); 221 NULL);
218 if (! pgtable_cache[i]) 222 if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e14..7faa46b71f21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
61int mem_init_done; 61int mem_init_done;
62unsigned long memory_limit; 62unsigned long memory_limit;
63 63
64extern void hash_preload(struct mm_struct *mm, unsigned long ea,
65 unsigned long access, unsigned long trap);
66
64/* 67/*
65 * This is called by /dev/mem to know if a given address has to 68 * This is called by /dev/mem to know if a given address has to
66 * be mapped non-cacheable or not 69 * be mapped non-cacheable or not
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
493void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 496void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
494 pte_t pte) 497 pte_t pte)
495{ 498{
496 /* handle i-cache coherency */ 499#ifdef CONFIG_PPC_STD_MMU
497 unsigned long pfn = pte_pfn(pte); 500 unsigned long access = 0, trap;
498#ifdef CONFIG_PPC32
499 pmd_t *pmd;
500#else
501 unsigned long vsid;
502 void *pgdir;
503 pte_t *ptep;
504 int local = 0;
505 cpumask_t tmp;
506 unsigned long flags;
507#endif 501#endif
502 unsigned long pfn = pte_pfn(pte);
508 503
509 /* handle i-cache coherency */ 504 /* handle i-cache coherency */
510 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && 505 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
535 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 530 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
536 if (!pte_young(pte) || address >= TASK_SIZE) 531 if (!pte_young(pte) || address >= TASK_SIZE)
537 return; 532 return;
538#ifdef CONFIG_PPC32
539 if (Hash == 0)
540 return;
541 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
542 if (!pmd_none(*pmd))
543 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
544#else
545 pgdir = vma->vm_mm->pgd;
546 if (pgdir == NULL)
547 return;
548 533
549 ptep = find_linux_pte(pgdir, address); 534 /* We try to figure out if we are coming from an instruction
550 if (!ptep) 535 * access fault and pass that down to __hash_page so we avoid
536 * double-faulting on execution of fresh text. We have to test
537 * for regs NULL since init will get here first thing at boot
538 *
539 * We also avoid filling the hash if not coming from a fault
540 */
541 if (current->thread.regs == NULL)
551 return; 542 return;
552 543 trap = TRAP(current->thread.regs);
553 vsid = get_vsid(vma->vm_mm->context.id, address); 544 if (trap == 0x400)
554 545 access |= _PAGE_EXEC;
555 local_irq_save(flags); 546 else if (trap != 0x300)
556 tmp = cpumask_of_cpu(smp_processor_id()); 547 return;
557 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) 548 hash_preload(vma->vm_mm, address, access, trap);
558 local = 1; 549#endif /* CONFIG_PPC_STD_MMU */
559
560 __hash_page(address, 0, vsid, ptep, 0x300, local);
561 local_irq_restore(flags);
562#endif
563#endif
564} 550}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d1fbd3fe684f..900842451bd3 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -100,7 +100,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
100 pud_t *pudp; 100 pud_t *pudp;
101 pmd_t *pmdp; 101 pmd_t *pmdp;
102 pte_t *ptep; 102 pte_t *ptep;
103 unsigned long vsid;
104 103
105 if (mem_init_done) { 104 if (mem_init_done) {
106 pgdp = pgd_offset_k(ea); 105 pgdp = pgd_offset_k(ea);
@@ -116,28 +115,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
116 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 115 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
117 __pgprot(flags))); 116 __pgprot(flags)));
118 } else { 117 } else {
119 unsigned long va, vpn, hash, hpteg;
120
121 /* 118 /*
122 * If the mm subsystem is not fully up, we cannot create a 119 * If the mm subsystem is not fully up, we cannot create a
123 * linux page table entry for this mapping. Simply bolt an 120 * linux page table entry for this mapping. Simply bolt an
124 * entry in the hardware page table. 121 * entry in the hardware page table.
122 *
125 */ 123 */
126 vsid = get_kernel_vsid(ea); 124 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
127 va = (vsid << 28) | (ea & 0xFFFFFFF); 125 mmu_virtual_psize))
128 vpn = va >> PAGE_SHIFT; 126 panic("Can't map bolted IO mapping");
129
130 hash = hpt_hash(vpn, 0);
131
132 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
133
134 /* Panic if a pte grpup is full */
135 if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
136 HPTE_V_BOLTED,
137 _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
138 == -1) {
139 panic("map_io_page: could not insert mapping");
140 }
141 } 127 }
142 return 0; 128 return 0;
143} 129}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e9..d137abd241ff 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
179} 179}
180 180
181/* 181/*
182 * Preload a translation in the hash table
183 */
184void hash_preload(struct mm_struct *mm, unsigned long ea,
185 unsigned long access, unsigned long trap)
186{
187 pmd_t *pmd;
188
189 if (Hash == 0)
190 return;
191 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
192 if (!pmd_none(*pmd))
193 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
194}
195
196/*
182 * Initialize the hash table and patch the instructions in hashtable.S. 197 * Initialize the hash table and patch the instructions in hashtable.S.
183 */ 198 */
184void __init MMU_init_hw(void) 199void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a37..60e852f2f8e5 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 */ 15 */
16 16
17#undef DEBUG
18
17#include <linux/config.h> 19#include <linux/config.h>
18#include <asm/pgtable.h> 20#include <asm/pgtable.h>
19#include <asm/mmu.h> 21#include <asm/mmu.h>
20#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
21#include <asm/paca.h> 23#include <asm/paca.h>
22#include <asm/cputable.h> 24#include <asm/cputable.h>
25#include <asm/cacheflush.h>
26
27#ifdef DEBUG
28#define DBG(fmt...) udbg_printf(fmt)
29#else
30#define DBG(fmt...)
31#endif
23 32
24extern void slb_allocate(unsigned long ea); 33extern void slb_allocate_realmode(unsigned long ea);
34extern void slb_allocate_user(unsigned long ea);
35
36static void slb_allocate(unsigned long ea)
37{
38 /* Currently, we do real mode for all SLBs including user, but
39 * that will change if we bring back dynamic VSIDs
40 */
41 slb_allocate_realmode(ea);
42}
25 43
26static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) 44static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
27{ 45{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
46{ 64{
47 /* If you change this make sure you change SLB_NUM_BOLTED 65 /* If you change this make sure you change SLB_NUM_BOLTED
48 * appropriately too. */ 66 * appropriately too. */
49 unsigned long ksp_flags = SLB_VSID_KERNEL; 67 unsigned long linear_llp, virtual_llp, lflags, vflags;
50 unsigned long ksp_esid_data; 68 unsigned long ksp_esid_data;
51 69
52 WARN_ON(!irqs_disabled()); 70 WARN_ON(!irqs_disabled());
53 71
54 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 72 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
55 ksp_flags |= SLB_VSID_L; 73 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
74 lflags = SLB_VSID_KERNEL | linear_llp;
75 vflags = SLB_VSID_KERNEL | virtual_llp;
56 76
57 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); 77 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
58 if ((ksp_esid_data & ESID_MASK) == KERNELBASE) 78 if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
67 /* Slot 2 - kernel stack */ 87 /* Slot 2 - kernel stack */
68 "slbmte %2,%3\n" 88 "slbmte %2,%3\n"
69 "isync" 89 "isync"
70 :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), 90 :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
71 "r"(mk_esid_data(VMALLOCBASE, 1)), 91 "r"(mk_esid_data(VMALLOCBASE, 1)),
72 "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), 92 "r"(mk_vsid_data(ksp_esid_data, lflags)),
73 "r"(ksp_esid_data) 93 "r"(ksp_esid_data)
74 : "memory"); 94 : "memory");
75} 95}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
102 122
103 get_paca()->slb_cache_ptr = 0; 123 get_paca()->slb_cache_ptr = 0;
104 get_paca()->context = mm->context; 124 get_paca()->context = mm->context;
125#ifdef CONFIG_PPC_64K_PAGES
126 get_paca()->pgdir = mm->pgd;
127#endif /* CONFIG_PPC_64K_PAGES */
105 128
106 /* 129 /*
107 * preload some userspace segments into the SLB. 130 * preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
131 slb_allocate(unmapped_base); 154 slb_allocate(unmapped_base);
132} 155}
133 156
157static inline void patch_slb_encoding(unsigned int *insn_addr,
158 unsigned int immed)
159{
160 /* Assume the instruction had a "0" immediate value, just
161 * "or" in the new value
162 */
163 *insn_addr |= immed;
164 flush_icache_range((unsigned long)insn_addr, 4+
165 (unsigned long)insn_addr);
166}
167
134void slb_initialize(void) 168void slb_initialize(void)
135{ 169{
170 unsigned long linear_llp, virtual_llp;
171 static int slb_encoding_inited;
172 extern unsigned int *slb_miss_kernel_load_linear;
173 extern unsigned int *slb_miss_kernel_load_virtual;
174 extern unsigned int *slb_miss_user_load_normal;
175#ifdef CONFIG_HUGETLB_PAGE
176 extern unsigned int *slb_miss_user_load_huge;
177 unsigned long huge_llp;
178
179 huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
180#endif
181
182 /* Prepare our SLB miss handler based on our page size */
183 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
184 virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
185 if (!slb_encoding_inited) {
186 slb_encoding_inited = 1;
187 patch_slb_encoding(slb_miss_kernel_load_linear,
188 SLB_VSID_KERNEL | linear_llp);
189 patch_slb_encoding(slb_miss_kernel_load_virtual,
190 SLB_VSID_KERNEL | virtual_llp);
191 patch_slb_encoding(slb_miss_user_load_normal,
192 SLB_VSID_USER | virtual_llp);
193
194 DBG("SLB: linear LLP = %04x\n", linear_llp);
195 DBG("SLB: virtual LLP = %04x\n", virtual_llp);
196#ifdef CONFIG_HUGETLB_PAGE
197 patch_slb_encoding(slb_miss_user_load_huge,
198 SLB_VSID_USER | huge_llp);
199 DBG("SLB: huge LLP = %04x\n", huge_llp);
200#endif
201 }
202
136 /* On iSeries the bolted entries have already been set up by 203 /* On iSeries the bolted entries have already been set up by
137 * the hypervisor from the lparMap data in head.S */ 204 * the hypervisor from the lparMap data in head.S */
138#ifndef CONFIG_PPC_ISERIES 205#ifndef CONFIG_PPC_ISERIES
139 unsigned long flags = SLB_VSID_KERNEL; 206 {
207 unsigned long lflags, vflags;
140 208
141 /* Invalidate the entire SLB (even slot 0) & all the ERATS */ 209 lflags = SLB_VSID_KERNEL | linear_llp;
142 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 210 vflags = SLB_VSID_KERNEL | virtual_llp;
143 flags |= SLB_VSID_L;
144 211
145 asm volatile("isync":::"memory"); 212 /* Invalidate the entire SLB (even slot 0) & all the ERATS */
146 asm volatile("slbmte %0,%0"::"r" (0) : "memory"); 213 asm volatile("isync":::"memory");
214 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
147 asm volatile("isync; slbia; isync":::"memory"); 215 asm volatile("isync; slbia; isync":::"memory");
148 create_slbe(KERNELBASE, flags, 0); 216 create_slbe(KERNELBASE, lflags, 0);
149 create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); 217
218 /* VMALLOC space has 4K pages always for now */
219 create_slbe(VMALLOCBASE, vflags, 1);
220
150 /* We don't bolt the stack for the time being - we're in boot, 221 /* We don't bolt the stack for the time being - we're in boot,
151 * so the stack is in the bolted segment. By the time it goes 222 * so the stack is in the bolted segment. By the time it goes
152 * elsewhere, we'll call _switch() which will bolt in the new 223 * elsewhere, we'll call _switch() which will bolt in the new
153 * one. */ 224 * one. */
154 asm volatile("isync":::"memory"); 225 asm volatile("isync":::"memory");
155#endif 226 }
227#endif /* CONFIG_PPC_ISERIES */
156 228
157 get_paca()->stab_rr = SLB_NUM_BOLTED; 229 get_paca()->stab_rr = SLB_NUM_BOLTED;
158} 230}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503bc..3e18241b6f35 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
18 18
19#include <linux/config.h> 19#include <linux/config.h>
20#include <asm/processor.h> 20#include <asm/processor.h>
21#include <asm/page.h>
22#include <asm/mmu.h>
23#include <asm/ppc_asm.h> 21#include <asm/ppc_asm.h>
24#include <asm/asm-offsets.h> 22#include <asm/asm-offsets.h>
25#include <asm/cputable.h> 23#include <asm/cputable.h>
24#include <asm/page.h>
25#include <asm/mmu.h>
26#include <asm/pgtable.h>
26 27
27/* void slb_allocate(unsigned long ea); 28/* void slb_allocate_realmode(unsigned long ea);
28 * 29 *
29 * Create an SLB entry for the given EA (user or kernel). 30 * Create an SLB entry for the given EA (user or kernel).
30 * r3 = faulting address, r13 = PACA 31 * r3 = faulting address, r13 = PACA
31 * r9, r10, r11 are clobbered by this function 32 * r9, r10, r11 are clobbered by this function
32 * No other registers are examined or changed. 33 * No other registers are examined or changed.
33 */ 34 */
34_GLOBAL(slb_allocate) 35_GLOBAL(slb_allocate_realmode)
35 /* 36 /* r3 = faulting address */
36 * First find a slot, round robin. Previously we tried to find
37 * a free slot first but that took too long. Unfortunately we
38 * dont have any LRU information to help us choose a slot.
39 */
40#ifdef CONFIG_PPC_ISERIES
41 /*
42 * On iSeries, the "bolted" stack segment can be cast out on
43 * shared processor switch so we need to check for a miss on
44 * it and restore it to the right slot.
45 */
46 ld r9,PACAKSAVE(r13)
47 clrrdi r9,r9,28
48 clrrdi r11,r3,28
49 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
50 cmpld r9,r11
51 beq 3f
52#endif /* CONFIG_PPC_ISERIES */
53
54 ld r10,PACASTABRR(r13)
55 addi r10,r10,1
56 /* use a cpu feature mask if we ever change our slb size */
57 cmpldi r10,SLB_NUM_ENTRIES
58
59 blt+ 4f
60 li r10,SLB_NUM_BOLTED
61
624:
63 std r10,PACASTABRR(r13)
643:
65 /* r3 = faulting address, r10 = entry */
66 37
67 srdi r9,r3,60 /* get region */ 38 srdi r9,r3,60 /* get region */
68 srdi r3,r3,28 /* get esid */ 39 srdi r10,r3,28 /* get esid */
69 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ 40 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
70 41
71 rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ 42 /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
72 oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
73
74 /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
75
76 blt cr7,0f /* user or kernel? */ 43 blt cr7,0f /* user or kernel? */
77 44
78 /* kernel address: proto-VSID = ESID */ 45 /* kernel address: proto-VSID = ESID */
@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
81 * top segment. That's ok, the scramble below will translate 48 * top segment. That's ok, the scramble below will translate
82 * it to VSID 0, which is reserved as a bad VSID - one which 49 * it to VSID 0, which is reserved as a bad VSID - one which
83 * will never have any pages in it. */ 50 * will never have any pages in it. */
84 li r11,SLB_VSID_KERNEL
85BEGIN_FTR_SECTION
86 bne cr7,9f
87 li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
88END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
89 b 9f
90 51
910: /* user address: proto-VSID = context<<15 | ESID */ 52 /* Check if hitting the linear mapping of the vmalloc/ioremap
92 srdi. r9,r3,USER_ESID_BITS 53 * kernel space
54 */
55 bne cr7,1f
56
57 /* Linear mapping encoding bits, the "li" instruction below will
58 * be patched by the kernel at boot
59 */
60_GLOBAL(slb_miss_kernel_load_linear)
61 li r11,0
62 b slb_finish_load
63
641: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
65 * will be patched by the kernel at boot
66 */
67_GLOBAL(slb_miss_kernel_load_virtual)
68 li r11,0
69 b slb_finish_load
70
71
720: /* user address: proto-VSID = context << 15 | ESID. First check
73 * if the address is within the boundaries of the user region
74 */
75 srdi. r9,r10,USER_ESID_BITS
93 bne- 8f /* invalid ea bits set */ 76 bne- 8f /* invalid ea bits set */
94 77
78 /* Figure out if the segment contains huge pages */
95#ifdef CONFIG_HUGETLB_PAGE 79#ifdef CONFIG_HUGETLB_PAGE
96BEGIN_FTR_SECTION 80BEGIN_FTR_SECTION
81 b 1f
82END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
97 lhz r9,PACAHIGHHTLBAREAS(r13) 83 lhz r9,PACAHIGHHTLBAREAS(r13)
98 srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) 84 srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
99 srd r9,r9,r11 85 srd r9,r9,r11
100 lhz r11,PACALOWHTLBAREAS(r13) 86 lhz r11,PACALOWHTLBAREAS(r13)
101 srd r11,r11,r3 87 srd r11,r11,r10
102 or r9,r9,r11 88 or. r9,r9,r11
103END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 89 beq 1f
90_GLOBAL(slb_miss_user_load_huge)
91 li r11,0
92 b 2f
931:
104#endif /* CONFIG_HUGETLB_PAGE */ 94#endif /* CONFIG_HUGETLB_PAGE */
105 95
106 li r11,SLB_VSID_USER 96_GLOBAL(slb_miss_user_load_normal)
97 li r11,0
107 98
108#ifdef CONFIG_HUGETLB_PAGE 992:
109BEGIN_FTR_SECTION 100 ld r9,PACACONTEXTID(r13)
110 rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */ 101 rldimi r10,r9,USER_ESID_BITS,0
111END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 102 b slb_finish_load
112#endif /* CONFIG_HUGETLB_PAGE */ 103
1048: /* invalid EA */
105 li r10,0 /* BAD_VSID */
106 li r11,SLB_VSID_USER /* flags don't much matter */
107 b slb_finish_load
108
109#ifdef __DISABLED__
110
111/* void slb_allocate_user(unsigned long ea);
112 *
113 * Create an SLB entry for the given EA (user or kernel).
114 * r3 = faulting address, r13 = PACA
115 * r9, r10, r11 are clobbered by this function
116 * No other registers are examined or changed.
117 *
118 * It is called with translation enabled in order to be able to walk the
119 * page tables. This is not currently used.
120 */
121_GLOBAL(slb_allocate_user)
122 /* r3 = faulting address */
123 srdi r10,r3,28 /* get esid */
124
125 crset 4*cr7+lt /* set "user" flag for later */
126
127 /* check if we fit in the range covered by the pagetables*/
128 srdi. r9,r3,PGTABLE_EADDR_SIZE
129 crnot 4*cr0+eq,4*cr0+eq
130 beqlr
113 131
132 /* now we need to get to the page tables in order to get the page
133 * size encoding from the PMD. In the future, we'll be able to deal
134 * with 1T segments too by getting the encoding from the PGD instead
135 */
136 ld r9,PACAPGDIR(r13)
137 cmpldi cr0,r9,0
138 beqlr
139 rlwinm r11,r10,8,25,28
140 ldx r9,r9,r11 /* get pgd_t */
141 cmpldi cr0,r9,0
142 beqlr
143 rlwinm r11,r10,3,17,28
144 ldx r9,r9,r11 /* get pmd_t */
145 cmpldi cr0,r9,0
146 beqlr
147
148 /* build vsid flags */
149 andi. r11,r9,SLB_VSID_LLP
150 ori r11,r11,SLB_VSID_USER
151
152 /* get context to calculate proto-VSID */
114 ld r9,PACACONTEXTID(r13) 153 ld r9,PACACONTEXTID(r13)
115 rldimi r3,r9,USER_ESID_BITS,0 154 rldimi r10,r9,USER_ESID_BITS,0
155
156 /* fall through slb_finish_load */
157
158#endif /* __DISABLED__ */
116 159
1179: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
118 ASM_VSID_SCRAMBLE(r3,r9)
119 160
120 rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ 161/*
162 * Finish loading of an SLB entry and return
163 *
164 * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
165 */
166slb_finish_load:
167 ASM_VSID_SCRAMBLE(r10,r9)
168 rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
169
170 /* r3 = EA, r11 = VSID data */
171 /*
172 * Find a slot, round robin. Previously we tried to find a
173 * free slot first but that took too long. Unfortunately we
174 * dont have any LRU information to help us choose a slot.
175 */
176#ifdef CONFIG_PPC_ISERIES
177 /*
178 * On iSeries, the "bolted" stack segment can be cast out on
179 * shared processor switch so we need to check for a miss on
180 * it and restore it to the right slot.
181 */
182 ld r9,PACAKSAVE(r13)
183 clrrdi r9,r9,28
184 clrrdi r3,r3,28
185 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
186 cmpld r9,r3
187 beq 3f
188#endif /* CONFIG_PPC_ISERIES */
189
190 ld r10,PACASTABRR(r13)
191 addi r10,r10,1
192 /* use a cpu feature mask if we ever change our slb size */
193 cmpldi r10,SLB_NUM_ENTRIES
194
195 blt+ 4f
196 li r10,SLB_NUM_BOLTED
197
1984:
199 std r10,PACASTABRR(r13)
200
2013:
202 rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
203 oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
204
205 /* r3 = ESID data, r11 = VSID data */
121 206
122 /* 207 /*
123 * No need for an isync before or after this slbmte. The exception 208 * No need for an isync before or after this slbmte. The exception
@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
125 */ 210 */
126 slbmte r11,r10 211 slbmte r11,r10
127 212
128 bgelr cr7 /* we're done for kernel addresses */ 213 /* we're done for kernel addresses */
214 crclr 4*cr0+eq /* set result to "success" */
215 bgelr cr7
129 216
130 /* Update the slb cache */ 217 /* Update the slb cache */
131 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 218 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
143 li r3,SLB_CACHE_ENTRIES+1 230 li r3,SLB_CACHE_ENTRIES+1
1442: 2312:
145 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 232 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
233 crclr 4*cr0+eq /* set result to "success" */
146 blr 234 blr
147 235
1488: /* invalid EA */
149 li r3,0 /* BAD_VSID */
150 li r11,SLB_VSID_USER /* flags don't much matter */
151 b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf27..fa325dbf98fc 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -26,7 +26,6 @@ struct stab_entry {
26 unsigned long vsid_data; 26 unsigned long vsid_data;
27}; 27};
28 28
29/* Both the segment table and SLB code uses the following cache */
30#define NR_STAB_CACHE_ENTRIES 8 29#define NR_STAB_CACHE_ENTRIES 8
31DEFINE_PER_CPU(long, stab_cache_ptr); 30DEFINE_PER_CPU(long, stab_cache_ptr);
32DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); 31DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
186 /* Never flush the first entry. */ 185 /* Never flush the first entry. */
187 ste += 1; 186 ste += 1;
188 for (entry = 1; 187 for (entry = 1;
189 entry < (PAGE_SIZE / sizeof(struct stab_entry)); 188 entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
190 entry++, ste++) { 189 entry++, ste++) {
191 unsigned long ea; 190 unsigned long ea;
192 ea = ste->esid_data & ESID_MASK; 191 ea = ste->esid_data & ESID_MASK;
@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
200 199
201 __get_cpu_var(stab_cache_ptr) = 0; 200 __get_cpu_var(stab_cache_ptr) = 0;
202 201
202#ifdef CONFIG_PPC_64K_PAGES
203 get_paca()->pgdir = mm->pgd;
204#endif /* CONFIG_PPC_64K_PAGES */
205
203 /* Now preload some entries for the new task */ 206 /* Now preload some entries for the new task */
204 if (test_tsk_thread_flag(tsk, TIF_32BIT)) 207 if (test_tsk_thread_flag(tsk, TIF_32BIT))
205 unmapped_base = TASK_UNMAPPED_BASE_USER32; 208 unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
223 asm volatile("sync" : : : "memory"); 226 asm volatile("sync" : : : "memory");
224} 227}
225 228
226extern void slb_initialize(void);
227
228/* 229/*
229 * Allocate segment tables for secondary CPUs. These must all go in 230 * Allocate segment tables for secondary CPUs. These must all go in
230 * the first (bolted) segment, so that do_stab_bolted won't get a 231 * the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +244,21 @@ void stabs_alloc(void)
243 if (cpu == 0) 244 if (cpu == 0)
244 continue; /* stab for CPU 0 is statically allocated */ 245 continue; /* stab for CPU 0 is statically allocated */
245 246
246 newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT); 247 newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
248 1<<SID_SHIFT);
247 if (! newstab) 249 if (! newstab)
248 panic("Unable to allocate segment table for CPU %d.\n", 250 panic("Unable to allocate segment table for CPU %d.\n",
249 cpu); 251 cpu);
250 252
251 newstab += KERNELBASE; 253 newstab += KERNELBASE;
252 254
253 memset((void *)newstab, 0, PAGE_SIZE); 255 memset((void *)newstab, 0, HW_PAGE_SIZE);
254 256
255 paca[cpu].stab_addr = newstab; 257 paca[cpu].stab_addr = newstab;
256 paca[cpu].stab_real = virt_to_abs(newstab); 258 paca[cpu].stab_real = virt_to_abs(newstab);
257 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); 259 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
260 "virtual, 0x%lx absolute\n",
261 cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
258 } 262 }
259} 263}
260 264
@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
267{ 271{
268 unsigned long vsid = get_kernel_vsid(KERNELBASE); 272 unsigned long vsid = get_kernel_vsid(KERNELBASE);
269 273
270 if (cpu_has_feature(CPU_FTR_SLB)) { 274 asm volatile("isync; slbia; isync":::"memory");
271 slb_initialize(); 275 make_ste(stab, GET_ESID(KERNELBASE), vsid);
272 } else {
273 asm volatile("isync; slbia; isync":::"memory");
274 make_ste(stab, GET_ESID(KERNELBASE), vsid);
275 276
276 /* Order update */ 277 /* Order update */
277 asm volatile("sync":::"memory"); 278 asm volatile("sync":::"memory");
278 }
279} 279}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4f..53e31b834ace 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
21 * as published by the Free Software Foundation; either version 21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version. 22 * 2 of the License, or (at your option) any later version.
23 */ 23 */
24
24#include <linux/config.h> 25#include <linux/config.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
26#include <linux/mm.h> 27#include <linux/mm.h>
@@ -30,7 +31,7 @@
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
33#include <linux/highmem.h> 34#include <asm/bug.h>
34 35
35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 36DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
36 37
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
126 * (if we remove it we should clear the _PTE_HPTEFLAGS bits). 127 * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
127 */ 128 */
128void hpte_update(struct mm_struct *mm, unsigned long addr, 129void hpte_update(struct mm_struct *mm, unsigned long addr,
129 unsigned long pte, int wrprot) 130 pte_t *ptep, unsigned long pte, int huge)
130{ 131{
131 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 132 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
132 unsigned long vsid; 133 unsigned long vsid;
134 unsigned int psize = mmu_virtual_psize;
133 int i; 135 int i;
134 136
135 i = batch->index; 137 i = batch->index;
136 138
139 /* We mask the address for the base page size. Huge pages will
140 * have applied their own masking already
141 */
142 addr &= PAGE_MASK;
143
144 /* Get page size (maybe move back to caller) */
145 if (huge) {
146#ifdef CONFIG_HUGETLB_PAGE
147 psize = mmu_huge_psize;
148#else
149 BUG();
150#endif
151 }
152
137 /* 153 /*
138 * This can happen when we are in the middle of a TLB batch and 154 * This can happen when we are in the middle of a TLB batch and
139 * we encounter memory pressure (eg copy_page_range when it tries 155 * we encounter memory pressure (eg copy_page_range when it tries
140 * to allocate a new pte). If we have to reclaim memory and end 156 * to allocate a new pte). If we have to reclaim memory and end
141 * up scanning and resetting referenced bits then our batch context 157 * up scanning and resetting referenced bits then our batch context
142 * will change mid stream. 158 * will change mid stream.
159 *
160 * We also need to ensure only one page size is present in a given
161 * batch
143 */ 162 */
144 if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) { 163 if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
145 flush_tlb_pending(); 164 flush_tlb_pending();
146 i = 0; 165 i = 0;
147 } 166 }
148 if (i == 0) { 167 if (i == 0) {
149 batch->mm = mm; 168 batch->mm = mm;
150 batch->large = pte_huge(pte); 169 batch->psize = psize;
151 } 170 }
152 if (addr < KERNELBASE) { 171 if (addr < KERNELBASE) {
153 vsid = get_vsid(mm->context.id, addr); 172 vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
155 } else 174 } else
156 vsid = get_kernel_vsid(addr); 175 vsid = get_kernel_vsid(addr);
157 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); 176 batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
158 batch->pte[i] = __pte(pte); 177 batch->pte[i] = __real_pte(__pte(pte), ptep);
159 batch->index = ++i; 178 batch->index = ++i;
160 if (i >= PPC64_TLB_BATCH_NR) 179 if (i >= PPC64_TLB_BATCH_NR)
161 flush_tlb_pending(); 180 flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
177 local = 1; 196 local = 1;
178 197
179 if (i == 1) 198 if (i == 1)
180 flush_hash_page(batch->vaddr[0], batch->pte[0], local); 199 flush_hash_page(batch->vaddr[0], batch->pte[0],
200 batch->psize, local);
181 else 201 else
182 flush_hash_range(i, local); 202 flush_hash_range(i, local);
183 batch->index = 0; 203 batch->index = 0;
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index b3c6c3374ca6..30bdcf3925d9 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot)
39 spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]); 39 spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
40} 40}
41 41
42static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, 42long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
43 unsigned long prpn, unsigned long vflags, 43 unsigned long pa, unsigned long rflags,
44 unsigned long rflags) 44 unsigned long vflags, int psize)
45{ 45{
46 unsigned long arpn;
47 long slot; 46 long slot;
48 hpte_t lhpte; 47 hpte_t lhpte;
49 int secondary = 0; 48 int secondary = 0;
50 49
50 BUG_ON(psize != MMU_PAGE_4K);
51
51 /* 52 /*
52 * The hypervisor tries both primary and secondary. 53 * The hypervisor tries both primary and secondary.
53 * If we are being called to insert in the secondary, 54 * If we are being called to insert in the secondary,
@@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
59 60
60 iSeries_hlock(hpte_group); 61 iSeries_hlock(hpte_group);
61 62
62 slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); 63 slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
63 BUG_ON(lhpte.v & HPTE_V_VALID); 64 if (unlikely(lhpte.v & HPTE_V_VALID)) {
65 if (vflags & HPTE_V_BOLTED) {
66 HvCallHpt_setSwBits(slot, 0x10, 0);
67 HvCallHpt_setPp(slot, PP_RWXX);
68 iSeries_hunlock(hpte_group);
69 if (slot < 0)
70 return 0x8 | (slot & 7);
71 else
72 return slot & 7;
73 }
74 BUG();
75 }
64 76
65 if (slot == -1) { /* No available entry found in either group */ 77 if (slot == -1) { /* No available entry found in either group */
66 iSeries_hunlock(hpte_group); 78 iSeries_hunlock(hpte_group);
@@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
73 slot &= 0x7fffffffffffffff; 85 slot &= 0x7fffffffffffffff;
74 } 86 }
75 87
76 arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
77 88
78 lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; 89 lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
79 lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; 90 lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
80 91
81 /* Now fill in the actual HPTE */ 92 /* Now fill in the actual HPTE */
82 HvCallHpt_addValidate(slot, secondary, &lhpte); 93 HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
86 return (secondary << 3) | (slot & 7); 97 return (secondary << 3) | (slot & 7);
87} 98}
88 99
89long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
90 unsigned long va, unsigned long prpn, unsigned long vflags,
91 unsigned long rflags)
92{
93 long slot;
94 hpte_t lhpte;
95
96 slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
97
98 if (lhpte.v & HPTE_V_VALID) {
99 /* Bolt the existing HPTE */
100 HvCallHpt_setSwBits(slot, 0x10, 0);
101 HvCallHpt_setPp(slot, PP_RWXX);
102 return 0;
103 }
104
105 return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
106}
107
108static unsigned long iSeries_hpte_getword0(unsigned long slot) 100static unsigned long iSeries_hpte_getword0(unsigned long slot)
109{ 101{
110 hpte_t hpte; 102 hpte_t hpte;
@@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
150 * bits 61..63 : PP2,PP1,PP0 142 * bits 61..63 : PP2,PP1,PP0
151 */ 143 */
152static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, 144static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
153 unsigned long va, int large, int local) 145 unsigned long va, int psize, int local)
154{ 146{
155 hpte_t hpte; 147 hpte_t hpte;
156 unsigned long avpn = va >> 23; 148 unsigned long want_v;
157 149
158 iSeries_hlock(slot); 150 iSeries_hlock(slot);
159 151
160 HvCallHpt_get(&hpte, slot); 152 HvCallHpt_get(&hpte, slot);
161 if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) { 153 want_v = hpte_encode_v(va, MMU_PAGE_4K);
154
155 if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
162 /* 156 /*
163 * Hypervisor expects bits as NPPP, which is 157 * Hypervisor expects bits as NPPP, which is
164 * different from how they are mapped in our PP. 158 * different from how they are mapped in our PP.
@@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn)
210 * 204 *
211 * No need to lock here because we should be the only user. 205 * No need to lock here because we should be the only user.
212 */ 206 */
213static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) 207static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
208 int psize)
214{ 209{
215 unsigned long vsid,va,vpn; 210 unsigned long vsid,va,vpn;
216 long slot; 211 long slot;
217 212
213 BUG_ON(psize != MMU_PAGE_4K);
214
218 vsid = get_kernel_vsid(ea); 215 vsid = get_kernel_vsid(ea);
219 va = (vsid << 28) | (ea & 0x0fffffff); 216 va = (vsid << 28) | (ea & 0x0fffffff);
220 vpn = va >> PAGE_SHIFT; 217 vpn = va >> HW_PAGE_SHIFT;
221 slot = iSeries_hpte_find(vpn); 218 slot = iSeries_hpte_find(vpn);
222 if (slot == -1) 219 if (slot == -1)
223 panic("updateboltedpp: Could not find page to bolt\n"); 220 panic("updateboltedpp: Could not find page to bolt\n");
@@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
225} 222}
226 223
227static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, 224static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
228 int large, int local) 225 int psize, int local)
229{ 226{
230 unsigned long hpte_v; 227 unsigned long hpte_v;
231 unsigned long avpn = va >> 23; 228 unsigned long avpn = va >> 23;
diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c
index 62ec73479687..f476d71194fa 100644
--- a/arch/powerpc/platforms/iseries/hvlog.c
+++ b/arch/powerpc/platforms/iseries/hvlog.c
@@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
22 22
23 while (len) { 23 while (len) {
24 hv_buf.addr = cur; 24 hv_buf.addr = cur;
25 left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur; 25 left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
26 if (left_this_page > len) 26 if (left_this_page > len)
27 left_this_page = len; 27 left_this_page = len;
28 hv_buf.len = left_this_page; 28 hv_buf.len = left_this_page;
@@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
30 HvCall2(HvCallBaseWriteLogBuffer, 30 HvCall2(HvCallBaseWriteLogBuffer,
31 virt_to_abs(&hv_buf), 31 virt_to_abs(&hv_buf),
32 left_this_page); 32 left_this_page);
33 cur = (cur & PAGE_MASK) + PAGE_SIZE; 33 cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
34 } 34 }
35} 35}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 1a6845b5c5a4..bf081b345820 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
43 u64 rc; 43 u64 rc;
44 union tce_entry tce; 44 union tce_entry tce;
45 45
46 index <<= TCE_PAGE_FACTOR;
47 npages <<= TCE_PAGE_FACTOR;
48
46 while (npages--) { 49 while (npages--) {
47 tce.te_word = 0; 50 tce.te_word = 0;
48 tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; 51 tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
49 52
50 if (tbl->it_type == TCE_VB) { 53 if (tbl->it_type == TCE_VB) {
51 /* Virtual Bus */ 54 /* Virtual Bus */
@@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
66 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", 69 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
67 rc); 70 rc);
68 index++; 71 index++;
69 uaddr += PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
70 } 73 }
71} 74}
72 75
@@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
74{ 77{
75 u64 rc; 78 u64 rc;
76 79
80 npages <<= TCE_PAGE_FACTOR;
81 index <<= TCE_PAGE_FACTOR;
82
77 while (npages--) { 83 while (npages--) {
78 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); 84 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
79 if (rc) 85 if (rc)
@@ -83,27 +89,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
83 } 89 }
84} 90}
85 91
86#ifdef CONFIG_PCI
87/*
88 * This function compares the known tables to find an iommu_table
89 * that has already been built for hardware TCEs.
90 */
91static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
92{
93 struct pci_dn *pdn;
94
95 list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
96 struct iommu_table *it = pdn->iommu_table;
97 if ((it != NULL) &&
98 (it->it_type == TCE_PCI) &&
99 (it->it_offset == tbl->it_offset) &&
100 (it->it_index == tbl->it_index) &&
101 (it->it_size == tbl->it_size))
102 return it;
103 }
104 return NULL;
105}
106
107/* 92/*
108 * Call Hv with the architected data structure to get TCE table info. 93 * Call Hv with the architected data structure to get TCE table info.
109 * info. Put the returned data into the Linux representation of the 94 * info. Put the returned data into the Linux representation of the
@@ -113,8 +98,10 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
113 * 2. TCE table per Bus. 98 * 2. TCE table per Bus.
114 * 3. TCE Table per IOA. 99 * 3. TCE Table per IOA.
115 */ 100 */
116static void iommu_table_getparms(struct pci_dn *pdn, 101void iommu_table_getparms_iSeries(unsigned long busno,
117 struct iommu_table* tbl) 102 unsigned char slotno,
103 unsigned char virtbus,
104 struct iommu_table* tbl)
118{ 105{
119 struct iommu_table_cb *parms; 106 struct iommu_table_cb *parms;
120 107
@@ -124,9 +111,9 @@ static void iommu_table_getparms(struct pci_dn *pdn,
124 111
125 memset(parms, 0, sizeof(*parms)); 112 memset(parms, 0, sizeof(*parms));
126 113
127 parms->itc_busno = pdn->busno; 114 parms->itc_busno = busno;
128 parms->itc_slotno = pdn->LogicalSlot; 115 parms->itc_slotno = slotno;
129 parms->itc_virtbus = 0; 116 parms->itc_virtbus = virtbus;
130 117
131 HvCallXm_getTceTableParms(iseries_hv_addr(parms)); 118 HvCallXm_getTceTableParms(iseries_hv_addr(parms));
132 119
@@ -134,17 +121,40 @@ static void iommu_table_getparms(struct pci_dn *pdn,
134 panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); 121 panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
135 122
136 /* itc_size is in pages worth of table, it_size is in # of entries */ 123 /* itc_size is in pages worth of table, it_size is in # of entries */
137 tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry); 124 tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
125 sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
138 tbl->it_busno = parms->itc_busno; 126 tbl->it_busno = parms->itc_busno;
139 tbl->it_offset = parms->itc_offset; 127 tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
140 tbl->it_index = parms->itc_index; 128 tbl->it_index = parms->itc_index;
141 tbl->it_blocksize = 1; 129 tbl->it_blocksize = 1;
142 tbl->it_type = TCE_PCI; 130 tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
143 131
144 kfree(parms); 132 kfree(parms);
145} 133}
146 134
147 135
136#ifdef CONFIG_PCI
137/*
138 * This function compares the known tables to find an iommu_table
139 * that has already been built for hardware TCEs.
140 */
141static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
142{
143 struct pci_dn *pdn;
144
145 list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
146 struct iommu_table *it = pdn->iommu_table;
147 if ((it != NULL) &&
148 (it->it_type == TCE_PCI) &&
149 (it->it_offset == tbl->it_offset) &&
150 (it->it_index == tbl->it_index) &&
151 (it->it_size == tbl->it_size))
152 return it;
153 }
154 return NULL;
155}
156
157
148void iommu_devnode_init_iSeries(struct device_node *dn) 158void iommu_devnode_init_iSeries(struct device_node *dn)
149{ 159{
150 struct iommu_table *tbl; 160 struct iommu_table *tbl;
@@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
152 162
153 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); 163 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
154 164
155 iommu_table_getparms(pdn, tbl); 165 iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
156 166
157 /* Look for existing tce table */ 167 /* Look for existing tce table */
158 pdn->iommu_table = iommu_table_find(tbl); 168 pdn->iommu_table = iommu_table_find(tbl);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 36f89e9ec7d0..d3e4bf756c83 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -316,11 +316,11 @@ static void __init iSeries_init_early(void)
316 */ 316 */
317 if (naca.xRamDisk) { 317 if (naca.xRamDisk) {
318 initrd_start = (unsigned long)__va(naca.xRamDisk); 318 initrd_start = (unsigned long)__va(naca.xRamDisk);
319 initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; 319 initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
320 initrd_below_start_ok = 1; // ramdisk in kernel space 320 initrd_below_start_ok = 1; // ramdisk in kernel space
321 ROOT_DEV = Root_RAM0; 321 ROOT_DEV = Root_RAM0;
322 if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize) 322 if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
323 rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024; 323 rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
324 } else 324 } else
325#endif /* CONFIG_BLK_DEV_INITRD */ 325#endif /* CONFIG_BLK_DEV_INITRD */
326 { 326 {
@@ -466,13 +466,14 @@ static void __init build_iSeries_Memory_Map(void)
466 */ 466 */
467 hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); 467 hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
468 hptSizePages = (u32)HvCallHpt_getHptPages(); 468 hptSizePages = (u32)HvCallHpt_getHptPages();
469 hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); 469 hptSizeChunks = hptSizePages >>
470 (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
470 hptLastChunk = hptFirstChunk + hptSizeChunks - 1; 471 hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
471 472
472 printk("HPT absolute addr = %016lx, size = %dK\n", 473 printk("HPT absolute addr = %016lx, size = %dK\n",
473 chunk_to_addr(hptFirstChunk), hptSizeChunks * 256); 474 chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
474 475
475 ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE); 476 ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
476 477
477 /* 478 /*
478 * The actual hashed page table is in the hypervisor, 479 * The actual hashed page table is in the hypervisor,
@@ -625,7 +626,7 @@ static void __init iSeries_fixup_klimit(void)
625 */ 626 */
626 if (naca.xRamDisk) 627 if (naca.xRamDisk)
627 klimit = KERNELBASE + (u64)naca.xRamDisk + 628 klimit = KERNELBASE + (u64)naca.xRamDisk +
628 (naca.xRamDiskSize * PAGE_SIZE); 629 (naca.xRamDiskSize * HW_PAGE_SIZE);
629 else { 630 else {
630 /* 631 /*
631 * No ram disk was included - check and see if there 632 * No ram disk was included - check and see if there
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index c27a66876c2c..384360ee06ec 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table;
30 30
31static void __init iommu_vio_init(void) 31static void __init iommu_vio_init(void)
32{ 32{
33 struct iommu_table *t; 33 iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
34 struct iommu_table_cb cb; 34 veth_iommu_table.it_size /= 2;
35 unsigned long cbp; 35 vio_iommu_table = veth_iommu_table;
36 unsigned long itc_entries; 36 vio_iommu_table.it_offset += veth_iommu_table.it_size;
37 37
38 cb.itc_busno = 255; /* Bus 255 is the virtual bus */ 38 if (!iommu_init_table(&veth_iommu_table))
39 cb.itc_virtbus = 0xff; /* Ask for virtual bus */
40
41 cbp = virt_to_abs(&cb);
42 HvCallXm_getTceTableParms(cbp);
43
44 itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
45 veth_iommu_table.it_size = itc_entries / 2;
46 veth_iommu_table.it_busno = cb.itc_busno;
47 veth_iommu_table.it_offset = cb.itc_offset;
48 veth_iommu_table.it_index = cb.itc_index;
49 veth_iommu_table.it_type = TCE_VB;
50 veth_iommu_table.it_blocksize = 1;
51
52 t = iommu_init_table(&veth_iommu_table);
53
54 if (!t)
55 printk("Virtual Bus VETH TCE table failed.\n"); 39 printk("Virtual Bus VETH TCE table failed.\n");
56 40 if (!iommu_init_table(&vio_iommu_table))
57 vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
58 vio_iommu_table.it_busno = cb.itc_busno;
59 vio_iommu_table.it_offset = cb.itc_offset +
60 veth_iommu_table.it_size;
61 vio_iommu_table.it_index = cb.itc_index;
62 vio_iommu_table.it_type = TCE_VB;
63 vio_iommu_table.it_blocksize = 1;
64
65 t = iommu_init_table(&vio_iommu_table);
66
67 if (!t)
68 printk("Virtual Bus VIO TCE table failed.\n"); 41 printk("Virtual Bus VIO TCE table failed.\n");
69} 42}
70 43
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index fe97bfbf7463..842672695598 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock);
68 * For each kind of event we allocate a buffer that is 68 * For each kind of event we allocate a buffer that is
69 * guaranteed not to cross a page boundary 69 * guaranteed not to cross a page boundary
70 */ 70 */
71static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned; 71static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
72 __attribute__((__aligned__(4096)));
72static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; 73static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
73static int event_buffer_initialised; 74static int event_buffer_initialised;
74 75
@@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v)
116 HvLpEvent_Rc hvrc; 117 HvLpEvent_Rc hvrc;
117 DECLARE_MUTEX_LOCKED(Semaphore); 118 DECLARE_MUTEX_LOCKED(Semaphore);
118 119
119 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 120 buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
120 if (!buf) 121 if (!buf)
121 return 0; 122 return 0;
122 memset(buf, 0, PAGE_SIZE); 123 memset(buf, 0, HW_PAGE_SIZE);
123 124
124 handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE, 125 handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
125 DMA_FROM_DEVICE); 126 DMA_FROM_DEVICE);
126 127
127 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, 128 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
@@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
131 viopath_sourceinst(viopath_hostLp), 132 viopath_sourceinst(viopath_hostLp),
132 viopath_targetinst(viopath_hostLp), 133 viopath_targetinst(viopath_hostLp),
133 (u64)(unsigned long)&Semaphore, VIOVERSION << 16, 134 (u64)(unsigned long)&Semaphore, VIOVERSION << 16,
134 ((u64)handle) << 32, PAGE_SIZE, 0, 0); 135 ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
135 136
136 if (hvrc != HvLpEvent_Rc_Good) 137 if (hvrc != HvLpEvent_Rc_Good)
137 printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc); 138 printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
@@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
140 141
141 vlanMap = HvLpConfig_getVirtualLanIndexMap(); 142 vlanMap = HvLpConfig_getVirtualLanIndexMap();
142 143
143 buf[PAGE_SIZE-1] = '\0'; 144 buf[HW_PAGE_SIZE-1] = '\0';
144 seq_printf(m, "%s", buf); 145 seq_printf(m, "%s", buf);
145 seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); 146 seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
146 seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", 147 seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
@@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v)
152 e2a(xItExtVpdPanel.systemSerial[4]), 153 e2a(xItExtVpdPanel.systemSerial[4]),
153 e2a(xItExtVpdPanel.systemSerial[5])); 154 e2a(xItExtVpdPanel.systemSerial[5]));
154 155
155 dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE); 156 dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
157 DMA_FROM_DEVICE);
156 kfree(buf); 158 kfree(buf);
157 159
158 return 0; 160 return 0;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 8a42006370c5..a50e5f3f396d 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,7 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#define DEBUG 22#undef DEBUG_LOW
23 23
24#include <linux/config.h> 24#include <linux/config.h>
25#include <linux/kernel.h> 25#include <linux/kernel.h>
@@ -42,10 +42,10 @@
42 42
43#include "plpar_wrappers.h" 43#include "plpar_wrappers.h"
44 44
45#ifdef DEBUG 45#ifdef DEBUG_LOW
46#define DBG(fmt...) udbg_printf(fmt) 46#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
47#else 47#else
48#define DBG(fmt...) 48#define DBG_LOW(fmt...) do { } while(0)
49#endif 49#endif
50 50
51/* in pSeries_hvCall.S */ 51/* in pSeries_hvCall.S */
@@ -277,8 +277,9 @@ void vpa_init(int cpu)
277} 277}
278 278
279long pSeries_lpar_hpte_insert(unsigned long hpte_group, 279long pSeries_lpar_hpte_insert(unsigned long hpte_group,
280 unsigned long va, unsigned long prpn, 280 unsigned long va, unsigned long pa,
281 unsigned long vflags, unsigned long rflags) 281 unsigned long rflags, unsigned long vflags,
282 int psize)
282{ 283{
283 unsigned long lpar_rc; 284 unsigned long lpar_rc;
284 unsigned long flags; 285 unsigned long flags;
@@ -286,11 +287,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
286 unsigned long hpte_v, hpte_r; 287 unsigned long hpte_v, hpte_r;
287 unsigned long dummy0, dummy1; 288 unsigned long dummy0, dummy1;
288 289
289 hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; 290 if (!(vflags & HPTE_V_BOLTED))
290 if (vflags & HPTE_V_LARGE) 291 DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
291 hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); 292 "rflags=%lx, vflags=%lx, psize=%d)\n",
292 293 hpte_group, va, pa, rflags, vflags, psize);
293 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; 294
295 hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
296 hpte_r = hpte_encode_r(pa, psize) | rflags;
297
298 if (!(vflags & HPTE_V_BOLTED))
299 DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
300
301#if 1
302 {
303 int i;
304 for (i=0;i<8;i++) {
305 unsigned long w0, w1;
306 plpar_pte_read(0, hpte_group, &w0, &w1);
307 BUG_ON (HPTE_V_COMPARE(hpte_v, w0)
308 && (w0 & HPTE_V_VALID));
309 }
310 }
311#endif
294 312
295 /* Now fill in the actual HPTE */ 313 /* Now fill in the actual HPTE */
296 /* Set CEC cookie to 0 */ 314 /* Set CEC cookie to 0 */
@@ -300,23 +318,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
300 /* Exact = 0 */ 318 /* Exact = 0 */
301 flags = 0; 319 flags = 0;
302 320
303 /* XXX why is this here? - Anton */ 321 /* Make pHyp happy */
304 if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) 322 if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
305 hpte_r &= ~_PAGE_COHERENT; 323 hpte_r &= ~_PAGE_COHERENT;
306 324
307 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, 325 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
308 hpte_r, &slot, &dummy0, &dummy1); 326 hpte_r, &slot, &dummy0, &dummy1);
309 327 if (unlikely(lpar_rc == H_PTEG_Full)) {
310 if (unlikely(lpar_rc == H_PTEG_Full)) 328 if (!(vflags & HPTE_V_BOLTED))
329 DBG_LOW(" full\n");
311 return -1; 330 return -1;
331 }
312 332
313 /* 333 /*
314 * Since we try and ioremap PHBs we don't own, the pte insert 334 * Since we try and ioremap PHBs we don't own, the pte insert
315 * will fail. However we must catch the failure in hash_page 335 * will fail. However we must catch the failure in hash_page
316 * or we will loop forever, so return -2 in this case. 336 * or we will loop forever, so return -2 in this case.
317 */ 337 */
318 if (unlikely(lpar_rc != H_Success)) 338 if (unlikely(lpar_rc != H_Success)) {
339 if (!(vflags & HPTE_V_BOLTED))
340 DBG_LOW(" lpar err %d\n", lpar_rc);
319 return -2; 341 return -2;
342 }
343 if (!(vflags & HPTE_V_BOLTED))
344 DBG_LOW(" -> slot: %d\n", slot & 7);
320 345
321 /* Because of iSeries, we have to pass down the secondary 346 /* Because of iSeries, we have to pass down the secondary
322 * bucket bit here as well 347 * bucket bit here as well
@@ -341,10 +366,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
341 /* don't remove a bolted entry */ 366 /* don't remove a bolted entry */
342 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, 367 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
343 (0x1UL << 4), &dummy1, &dummy2); 368 (0x1UL << 4), &dummy1, &dummy2);
344
345 if (lpar_rc == H_Success) 369 if (lpar_rc == H_Success)
346 return i; 370 return i;
347
348 BUG_ON(lpar_rc != H_Not_Found); 371 BUG_ON(lpar_rc != H_Not_Found);
349 372
350 slot_offset++; 373 slot_offset++;
@@ -372,20 +395,28 @@ static void pSeries_lpar_hptab_clear(void)
372 * We can probably optimize here and assume the high bits of newpp are 395 * We can probably optimize here and assume the high bits of newpp are
373 * already zero. For now I am paranoid. 396 * already zero. For now I am paranoid.
374 */ 397 */
375static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, 398static long pSeries_lpar_hpte_updatepp(unsigned long slot,
376 unsigned long va, int large, int local) 399 unsigned long newpp,
400 unsigned long va,
401 int psize, int local)
377{ 402{
378 unsigned long lpar_rc; 403 unsigned long lpar_rc;
379 unsigned long flags = (newpp & 7) | H_AVPN; 404 unsigned long flags = (newpp & 7) | H_AVPN;
380 unsigned long avpn = va >> 23; 405 unsigned long want_v;
381 406
382 if (large) 407 want_v = hpte_encode_v(va, psize);
383 avpn &= ~0x1UL;
384 408
385 lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); 409 DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
410 want_v & HPTE_V_AVPN, slot, flags, psize);
386 411
387 if (lpar_rc == H_Not_Found) 412 lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN);
413
414 if (lpar_rc == H_Not_Found) {
415 DBG_LOW("not found !\n");
388 return -1; 416 return -1;
417 }
418
419 DBG_LOW("ok\n");
389 420
390 BUG_ON(lpar_rc != H_Success); 421 BUG_ON(lpar_rc != H_Success);
391 422
@@ -411,21 +442,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
411 return dword0; 442 return dword0;
412} 443}
413 444
414static long pSeries_lpar_hpte_find(unsigned long vpn) 445static long pSeries_lpar_hpte_find(unsigned long va, int psize)
415{ 446{
416 unsigned long hash; 447 unsigned long hash;
417 unsigned long i, j; 448 unsigned long i, j;
418 long slot; 449 long slot;
419 unsigned long hpte_v; 450 unsigned long want_v, hpte_v;
420 451
421 hash = hpt_hash(vpn, 0); 452 hash = hpt_hash(va, mmu_psize_defs[psize].shift);
453 want_v = hpte_encode_v(va, psize);
422 454
423 for (j = 0; j < 2; j++) { 455 for (j = 0; j < 2; j++) {
424 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 456 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
425 for (i = 0; i < HPTES_PER_GROUP; i++) { 457 for (i = 0; i < HPTES_PER_GROUP; i++) {
426 hpte_v = pSeries_lpar_hpte_getword0(slot); 458 hpte_v = pSeries_lpar_hpte_getword0(slot);
427 459
428 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) 460 if (HPTE_V_COMPARE(hpte_v, want_v)
429 && (hpte_v & HPTE_V_VALID) 461 && (hpte_v & HPTE_V_VALID)
430 && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { 462 && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
431 /* HPTE matches */ 463 /* HPTE matches */
@@ -442,17 +474,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
442} 474}
443 475
444static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, 476static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
445 unsigned long ea) 477 unsigned long ea,
478 int psize)
446{ 479{
447 unsigned long lpar_rc; 480 unsigned long lpar_rc, slot, vsid, va, flags;
448 unsigned long vsid, va, vpn, flags;
449 long slot;
450 481
451 vsid = get_kernel_vsid(ea); 482 vsid = get_kernel_vsid(ea);
452 va = (vsid << 28) | (ea & 0x0fffffff); 483 va = (vsid << 28) | (ea & 0x0fffffff);
453 vpn = va >> PAGE_SHIFT;
454 484
455 slot = pSeries_lpar_hpte_find(vpn); 485 slot = pSeries_lpar_hpte_find(va, psize);
456 BUG_ON(slot == -1); 486 BUG_ON(slot == -1);
457 487
458 flags = newpp & 7; 488 flags = newpp & 7;
@@ -462,18 +492,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
462} 492}
463 493
464static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, 494static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
465 int large, int local) 495 int psize, int local)
466{ 496{
467 unsigned long avpn = va >> 23; 497 unsigned long want_v;
468 unsigned long lpar_rc; 498 unsigned long lpar_rc;
469 unsigned long dummy1, dummy2; 499 unsigned long dummy1, dummy2;
470 500
471 if (large) 501 DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d",
472 avpn &= ~0x1UL; 502 slot, va, psize, local);
473
474 lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
475 &dummy2);
476 503
504 want_v = hpte_encode_v(va, psize);
505 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN,
506 &dummy1, &dummy2);
477 if (lpar_rc == H_Not_Found) 507 if (lpar_rc == H_Not_Found)
478 return; 508 return;
479 509
@@ -495,7 +525,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local)
495 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 525 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
496 526
497 for (i = 0; i < number; i++) 527 for (i = 0; i < number; i++)
498 flush_hash_page(batch->vaddr[i], batch->pte[i], local); 528 flush_hash_page(batch->vaddr[i], batch->pte[i],
529 batch->psize, local);
499 530
500 if (lock_tlbie) 531 if (lock_tlbie)
501 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 532 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);