aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2011-06-22 07:25:42 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-06-29 03:47:48 -0400
commitf67f4ef5fcdfdeeddcb0ed4ab2c85d9bb4185d5f (patch)
tree2a2dd8b027cc596dae37dd1b0a3710bca4791ef1 /arch/powerpc
parent3d97a619acbb2c8a7a9a7da08c2d3041dfdd241f (diff)
powerpc/book3e-64: use a separate TLB handler when linear map is bolted
On MMUs such as FSL where we can guarantee the entire linear mapping is bolted, we don't need to worry about linear TLB misses. If on top of that we do a full table walk, we get rid of all recursive TLB faults, and can dispense with some state saving. This gains a few percent on TLB-miss-heavy workloads, and around 50% on a benchmark that had a high rate of virtual page table faults under the normal handler. While touching the EX_TLB layout, remove EX_TLB_MMUCR0, EX_TLB_SRR0, and EX_TLB_SRR1 as they're not used. [BenH: Fixed build with 64K pages (wsp config)] Signed-off-by: Scott Wood <scottwood@freescale.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/exception-64e.h52
-rw-r--r--arch/powerpc/include/asm/paca.h7
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S206
-rw-r--r--arch/powerpc/mm/tlb_nohash.c35
4 files changed, 266 insertions, 34 deletions
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index 6d53f311d942..ac13addb8495 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -48,30 +48,33 @@
48#define EX_R14 (4 * 8) 48#define EX_R14 (4 * 8)
49#define EX_R15 (5 * 8) 49#define EX_R15 (5 * 8)
50 50
51/* The TLB miss exception uses different slots */ 51/*
52 * The TLB miss exception uses different slots.
53 *
54 * The bolted variant uses only the first six fields,
55 * which in combination with pgd and kernel_pgd fits in
56 * one 64-byte cache line.
57 */
52 58
53#define EX_TLB_R10 ( 0 * 8) 59#define EX_TLB_R10 ( 0 * 8)
54#define EX_TLB_R11 ( 1 * 8) 60#define EX_TLB_R11 ( 1 * 8)
55#define EX_TLB_R12 ( 2 * 8) 61#define EX_TLB_R14 ( 2 * 8)
56#define EX_TLB_R13 ( 3 * 8) 62#define EX_TLB_R15 ( 3 * 8)
57#define EX_TLB_R14 ( 4 * 8) 63#define EX_TLB_R16 ( 4 * 8)
58#define EX_TLB_R15 ( 5 * 8) 64#define EX_TLB_CR ( 5 * 8)
59#define EX_TLB_R16 ( 6 * 8) 65#define EX_TLB_R12 ( 6 * 8)
60#define EX_TLB_CR ( 7 * 8) 66#define EX_TLB_R13 ( 7 * 8)
61#define EX_TLB_DEAR ( 8 * 8) /* Level 0 and 2 only */ 67#define EX_TLB_DEAR ( 8 * 8) /* Level 0 and 2 only */
62#define EX_TLB_ESR ( 9 * 8) /* Level 0 and 2 only */ 68#define EX_TLB_ESR ( 9 * 8) /* Level 0 and 2 only */
63#define EX_TLB_SRR0 (10 * 8) 69#define EX_TLB_SRR0 (10 * 8)
64#define EX_TLB_SRR1 (11 * 8) 70#define EX_TLB_SRR1 (11 * 8)
65#define EX_TLB_MMUCR0 (12 * 8) /* Level 0 */
66#define EX_TLB_MAS1 (12 * 8) /* Level 0 */
67#define EX_TLB_MAS2 (13 * 8) /* Level 0 */
68#ifdef CONFIG_BOOK3E_MMU_TLB_STATS 71#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
69#define EX_TLB_R8 (14 * 8) 72#define EX_TLB_R8 (12 * 8)
70#define EX_TLB_R9 (15 * 8) 73#define EX_TLB_R9 (13 * 8)
71#define EX_TLB_LR (16 * 8) 74#define EX_TLB_LR (14 * 8)
72#define EX_TLB_SIZE (17 * 8) 75#define EX_TLB_SIZE (15 * 8)
73#else 76#else
74#define EX_TLB_SIZE (14 * 8) 77#define EX_TLB_SIZE (12 * 8)
75#endif 78#endif
76 79
77#define START_EXCEPTION(label) \ 80#define START_EXCEPTION(label) \
@@ -168,6 +171,16 @@ exc_##label##_book3e:
168 ld r9,EX_TLB_R9(r12); \ 171 ld r9,EX_TLB_R9(r12); \
169 ld r8,EX_TLB_R8(r12); \ 172 ld r8,EX_TLB_R8(r12); \
170 mtlr r16; 173 mtlr r16;
174#define TLB_MISS_PROLOG_STATS_BOLTED \
175 mflr r10; \
176 std r8,PACA_EXTLB+EX_TLB_R8(r13); \
177 std r9,PACA_EXTLB+EX_TLB_R9(r13); \
178 std r10,PACA_EXTLB+EX_TLB_LR(r13);
179#define TLB_MISS_RESTORE_STATS_BOLTED \
180 ld r16,PACA_EXTLB+EX_TLB_LR(r13); \
181 ld r9,PACA_EXTLB+EX_TLB_R9(r13); \
182 ld r8,PACA_EXTLB+EX_TLB_R8(r13); \
183 mtlr r16;
171#define TLB_MISS_STATS_D(name) \ 184#define TLB_MISS_STATS_D(name) \
172 addi r9,r13,MMSTAT_DSTATS+name; \ 185 addi r9,r13,MMSTAT_DSTATS+name; \
173 bl .tlb_stat_inc; 186 bl .tlb_stat_inc;
@@ -183,17 +196,20 @@ exc_##label##_book3e:
18361: addi r9,r13,MMSTAT_ISTATS+name; \ 19661: addi r9,r13,MMSTAT_ISTATS+name; \
18462: bl .tlb_stat_inc; 19762: bl .tlb_stat_inc;
185#define TLB_MISS_STATS_SAVE_INFO \ 198#define TLB_MISS_STATS_SAVE_INFO \
186 std r14,EX_TLB_ESR(r12); /* save ESR */ \ 199 std r14,EX_TLB_ESR(r12); /* save ESR */
187 200#define TLB_MISS_STATS_SAVE_INFO_BOLTED \
188 201 std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */
189#else 202#else
190#define TLB_MISS_PROLOG_STATS 203#define TLB_MISS_PROLOG_STATS
191#define TLB_MISS_RESTORE_STATS 204#define TLB_MISS_RESTORE_STATS
205#define TLB_MISS_PROLOG_STATS_BOLTED
206#define TLB_MISS_RESTORE_STATS_BOLTED
192#define TLB_MISS_STATS_D(name) 207#define TLB_MISS_STATS_D(name)
193#define TLB_MISS_STATS_I(name) 208#define TLB_MISS_STATS_I(name)
194#define TLB_MISS_STATS_X(name) 209#define TLB_MISS_STATS_X(name)
195#define TLB_MISS_STATS_Y(name) 210#define TLB_MISS_STATS_Y(name)
196#define TLB_MISS_STATS_SAVE_INFO 211#define TLB_MISS_STATS_SAVE_INFO
212#define TLB_MISS_STATS_SAVE_INFO_BOLTED
197#endif 213#endif
198 214
199#define SET_IVOR(vector_number, vector_offset) \ 215#define SET_IVOR(vector_number, vector_offset) \
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 74126765106a..c1f65f597920 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -103,11 +103,12 @@ struct paca_struct {
103#endif /* CONFIG_PPC_STD_MMU_64 */ 103#endif /* CONFIG_PPC_STD_MMU_64 */
104 104
105#ifdef CONFIG_PPC_BOOK3E 105#ifdef CONFIG_PPC_BOOK3E
106 pgd_t *pgd; /* Current PGD */
107 pgd_t *kernel_pgd; /* Kernel PGD */
108 u64 exgen[8] __attribute__((aligned(0x80))); 106 u64 exgen[8] __attribute__((aligned(0x80)));
107 /* Keep pgd in the same cacheline as the start of extlb */
108 pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */
109 pgd_t *kernel_pgd; /* Kernel PGD */
109 /* We can have up to 3 levels of reentrancy in the TLB miss handler */ 110 /* We can have up to 3 levels of reentrancy in the TLB miss handler */
110 u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80))); 111 u64 extlb[3][EX_TLB_SIZE / sizeof(u64)];
111 u64 exmc[8]; /* used for machine checks */ 112 u64 exmc[8]; /* used for machine checks */
112 u64 excrit[8]; /* used for crit interrupts */ 113 u64 excrit[8]; /* used for crit interrupts */
113 u64 exdbg[8]; /* used for debug interrupts */ 114 u64 exdbg[8]; /* used for debug interrupts */
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index af0892209417..4ebb34bc01d6 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -30,6 +30,212 @@
30#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) 30#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
31#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) 31#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
32 32
33/**********************************************************************
34 * *
35 * TLB miss handling for Book3E with a bolted linear mapping *
36 * No virtual page table, no nested TLB misses *
37 * *
38 **********************************************************************/
39
40.macro tlb_prolog_bolted addr
41 mtspr SPRN_SPRG_TLB_SCRATCH,r13
42 mfspr r13,SPRN_SPRG_PACA
43 std r10,PACA_EXTLB+EX_TLB_R10(r13)
44 mfcr r10
45 std r11,PACA_EXTLB+EX_TLB_R11(r13)
46 std r16,PACA_EXTLB+EX_TLB_R16(r13)
47 mfspr r16,\addr /* get faulting address */
48 std r14,PACA_EXTLB+EX_TLB_R14(r13)
49 ld r14,PACAPGD(r13)
50 std r15,PACA_EXTLB+EX_TLB_R15(r13)
51 std r10,PACA_EXTLB+EX_TLB_CR(r13)
52 TLB_MISS_PROLOG_STATS_BOLTED
53.endm
54
55.macro tlb_epilog_bolted
56 ld r14,PACA_EXTLB+EX_TLB_CR(r13)
57 ld r10,PACA_EXTLB+EX_TLB_R10(r13)
58 ld r11,PACA_EXTLB+EX_TLB_R11(r13)
59 mtcr r14
60 ld r14,PACA_EXTLB+EX_TLB_R14(r13)
61 ld r15,PACA_EXTLB+EX_TLB_R15(r13)
62 TLB_MISS_RESTORE_STATS_BOLTED
63 ld r16,PACA_EXTLB+EX_TLB_R16(r13)
64 mfspr r13,SPRN_SPRG_TLB_SCRATCH
65.endm
66
67/* Data TLB miss */
68 START_EXCEPTION(data_tlb_miss_bolted)
69 tlb_prolog_bolted SPRN_DEAR
70
71 /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */
72
73 /* We do the user/kernel test for the PID here along with the RW test
74 */
75 /* We pre-test some combination of permissions to avoid double
76 * faults:
77 *
78 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
79 * ESR_ST is 0x00800000
80 * _PAGE_BAP_SW is 0x00000010
81 * So the shift is >> 19. This tests for supervisor writeability.
82 * If the page happens to be supervisor writeable and not user
83 * writeable, we will take a new fault later, but that should be
84 * a rare enough case.
85 *
86 * We also move ESR_ST in _PAGE_DIRTY position
87 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
88 *
89 * MAS1 is preset for all we need except for TID that needs to
90 * be cleared for kernel translations
91 */
92
93 mfspr r11,SPRN_ESR
94
95 srdi r15,r16,60 /* get region */
96 rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
97 bne- dtlb_miss_fault_bolted
98
99 rlwinm r10,r11,32-19,27,27
100 rlwimi r10,r11,32-16,19,19
101 cmpwi r15,0
102 ori r10,r10,_PAGE_PRESENT
103 oris r11,r10,_PAGE_ACCESSED@h
104
105 TLB_MISS_STATS_SAVE_INFO_BOLTED
106 bne tlb_miss_kernel_bolted
107
108tlb_miss_common_bolted:
109/*
110 * This is the guts of the TLB miss handler for bolted-linear.
111 * We are entered with:
112 *
113 * r16 = faulting address
114 * r15 = crap (free to use)
115 * r14 = page table base
116 * r13 = PACA
117 * r11 = PTE permission mask
118 * r10 = crap (free to use)
119 */
120 rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
121 cmpldi cr0,r14,0
122 clrrdi r15,r15,3
123 beq tlb_miss_fault_bolted
124
125BEGIN_MMU_FTR_SECTION
126 /* Set the TLB reservation and search for existing entry. Then load
127 * the entry.
128 */
129 PPC_TLBSRX_DOT(0,r16)
130 ldx r14,r14,r15
131 beq normal_tlb_miss_done
132MMU_FTR_SECTION_ELSE
133 ldx r14,r14,r15
134ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
135
136#ifndef CONFIG_PPC_64K_PAGES
137 rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
138 clrrdi r15,r15,3
139
140 cmpldi cr0,r14,0
141 beq tlb_miss_fault_bolted
142
143 ldx r14,r14,r15
144#endif /* CONFIG_PPC_64K_PAGES */
145
146 rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
147 clrrdi r15,r15,3
148
149 cmpldi cr0,r14,0
150 beq tlb_miss_fault_bolted
151
152 ldx r14,r14,r15
153
154 rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
155 clrrdi r15,r15,3
156
157 cmpldi cr0,r14,0
158 beq tlb_miss_fault_bolted
159
160 ldx r14,r14,r15
161
162 /* Check if required permissions are met */
163 andc. r15,r11,r14
164 rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
165 bne- tlb_miss_fault_bolted
166
167 /* Now we build the MAS:
168 *
169 * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
170 * MAS 1 : Almost fully setup
171 * - PID already updated by caller if necessary
172 * - TSIZE need change if !base page size, not
173 * yet implemented for now
174 * MAS 2 : Defaults not useful, need to be redone
175 * MAS 3+7 : Needs to be done
176 */
177 clrrdi r11,r16,12 /* Clear low crap in EA */
178 clrldi r15,r15,12 /* Clear crap at the top */
179 rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */
180 rlwimi r15,r14,32-8,22,25 /* Move in U bits */
181 mtspr SPRN_MAS2,r11
182 andi. r11,r14,_PAGE_DIRTY
183 rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
184
185 /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
186 bne 1f
187 li r11,MAS3_SW|MAS3_UW
188 andc r15,r15,r11
1891:
190 mtspr SPRN_MAS7_MAS3,r15
191 tlbwe
192
193 TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
194 tlb_epilog_bolted
195 rfi
196
197itlb_miss_kernel_bolted:
198 li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */
199 oris r11,r11,_PAGE_ACCESSED@h
200tlb_miss_kernel_bolted:
201 mfspr r10,SPRN_MAS1
202 ld r14,PACA_KERNELPGD(r13)
203 cmpldi cr0,r15,8 /* Check for vmalloc region */
204 rlwinm r10,r10,0,16,1 /* Clear TID */
205 mtspr SPRN_MAS1,r10
206 beq+ tlb_miss_common_bolted
207
208tlb_miss_fault_bolted:
209 /* We need to check if it was an instruction miss */
210 andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
211 bne itlb_miss_fault_bolted
212dtlb_miss_fault_bolted:
213 TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
214 tlb_epilog_bolted
215 b exc_data_storage_book3e
216itlb_miss_fault_bolted:
217 TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
218 tlb_epilog_bolted
219 b exc_instruction_storage_book3e
220
221/* Instruction TLB miss */
222 START_EXCEPTION(instruction_tlb_miss_bolted)
223 tlb_prolog_bolted SPRN_SRR0
224
225 rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
226 srdi r15,r16,60 /* get region */
227 TLB_MISS_STATS_SAVE_INFO_BOLTED
228 bne- itlb_miss_fault_bolted
229
230 li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
231
232 /* We do the user/kernel test for the PID here along with the RW test
233 */
234
235 cmpldi cr0,r15,0 /* Check for user region */
236 oris r11,r11,_PAGE_ACCESSED@h
237 beq tlb_miss_common_bolted
238 b itlb_miss_kernel_bolted
33 239
34/********************************************************************** 240/**********************************************************************
35 * * 241 * *
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 569349916471..3722185d1865 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -443,14 +443,27 @@ static void setup_page_sizes(void)
443 } 443 }
444} 444}
445 445
446static void setup_mmu_htw(void) 446static void __patch_exception(int exc, unsigned long addr)
447{ 447{
448 extern unsigned int interrupt_base_book3e; 448 extern unsigned int interrupt_base_book3e;
449 extern unsigned int exc_data_tlb_miss_htw_book3e; 449 unsigned int *ibase = &interrupt_base_book3e;
450 extern unsigned int exc_instruction_tlb_miss_htw_book3e; 450
451 /* Our exceptions vectors start with a NOP and -then- a branch
452 * to deal with single stepping from userspace which stops on
453 * the second instruction. Thus we need to patch the second
454 * instruction of the exception, not the first one
455 */
456
457 patch_branch(ibase + (exc / 4) + 1, addr, 0);
458}
451 459
452 unsigned int *ibase = &interrupt_base_book3e; 460#define patch_exception(exc, name) do { \
461 extern unsigned int name; \
462 __patch_exception((exc), (unsigned long)&name); \
463} while (0)
453 464
465static void setup_mmu_htw(void)
466{
454 /* Check if HW tablewalk is present, and if yes, enable it by: 467 /* Check if HW tablewalk is present, and if yes, enable it by:
455 * 468 *
456 * - patching the TLB miss handlers to branch to the 469 * - patching the TLB miss handlers to branch to the
@@ -462,15 +475,8 @@ static void setup_mmu_htw(void)
462 475
463 if ((tlb0cfg & TLBnCFG_IND) && 476 if ((tlb0cfg & TLBnCFG_IND) &&
464 (tlb0cfg & TLBnCFG_PT)) { 477 (tlb0cfg & TLBnCFG_PT)) {
465 /* Our exceptions vectors start with a NOP and -then- a branch 478 patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
466 * to deal with single stepping from userspace which stops on 479 patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
467 * the second instruction. Thus we need to patch the second
468 * instruction of the exception, not the first one
469 */
470 patch_branch(ibase + (0x1c0 / 4) + 1,
471 (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
472 patch_branch(ibase + (0x1e0 / 4) + 1,
473 (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
474 book3e_htw_enabled = 1; 480 book3e_htw_enabled = 1;
475 } 481 }
476 pr_info("MMU: Book3E HW tablewalk %s\n", 482 pr_info("MMU: Book3E HW tablewalk %s\n",
@@ -549,6 +555,9 @@ static void __early_init_mmu(int boot_cpu)
549 /* limit memory so we dont have linear faults */ 555 /* limit memory so we dont have linear faults */
550 memblock_enforce_memory_limit(linear_map_top); 556 memblock_enforce_memory_limit(linear_map_top);
551 memblock_analyze(); 557 memblock_analyze();
558
559 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
560 patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
552 } 561 }
553#endif 562#endif
554 563