diff options
Diffstat (limited to 'arch/powerpc/include/asm/mmu-hash64.h')
-rw-r--r-- | arch/powerpc/include/asm/mmu-hash64.h | 169 |
1 files changed, 120 insertions, 49 deletions
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 1c65a59881ea..9673f73eb8db 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h | |||
@@ -16,6 +16,13 @@ | |||
16 | #include <asm/page.h> | 16 | #include <asm/page.h> |
17 | 17 | ||
18 | /* | 18 | /* |
19 | * This is necessary to get the definition of PGTABLE_RANGE which we | ||
20 | * need for various slices related matters. Note that this isn't the | ||
21 | * complete pgtable.h but only a portion of it. | ||
22 | */ | ||
23 | #include <asm/pgtable-ppc64.h> | ||
24 | |||
25 | /* | ||
19 | * Segment table | 26 | * Segment table |
20 | */ | 27 | */ |
21 | 28 | ||
@@ -154,9 +161,25 @@ struct mmu_psize_def | |||
154 | #define MMU_SEGSIZE_256M 0 | 161 | #define MMU_SEGSIZE_256M 0 |
155 | #define MMU_SEGSIZE_1T 1 | 162 | #define MMU_SEGSIZE_1T 1 |
156 | 163 | ||
164 | /* | ||
165 | * encode page number shift. | ||
166 | * in order to fit the 78 bit va in a 64 bit variable we shift the va by | ||
167 | * 12 bits. This enable us to address upto 76 bit va. | ||
168 | * For hpt hash from a va we can ignore the page size bits of va and for | ||
169 | * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure | ||
170 | * we work in all cases including 4k page size. | ||
171 | */ | ||
172 | #define VPN_SHIFT 12 | ||
157 | 173 | ||
158 | #ifndef __ASSEMBLY__ | 174 | #ifndef __ASSEMBLY__ |
159 | 175 | ||
176 | static inline int segment_shift(int ssize) | ||
177 | { | ||
178 | if (ssize == MMU_SEGSIZE_256M) | ||
179 | return SID_SHIFT; | ||
180 | return SID_SHIFT_1T; | ||
181 | } | ||
182 | |||
160 | /* | 183 | /* |
161 | * The current system page and segment sizes | 184 | * The current system page and segment sizes |
162 | */ | 185 | */ |
@@ -180,18 +203,39 @@ extern unsigned long tce_alloc_start, tce_alloc_end; | |||
180 | extern int mmu_ci_restrictions; | 203 | extern int mmu_ci_restrictions; |
181 | 204 | ||
182 | /* | 205 | /* |
206 | * This computes the AVPN and B fields of the first dword of a HPTE, | ||
207 | * for use when we want to match an existing PTE. The bottom 7 bits | ||
208 | * of the returned value are zero. | ||
209 | */ | ||
210 | static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, | ||
211 | int ssize) | ||
212 | { | ||
213 | unsigned long v; | ||
214 | /* | ||
215 | * The AVA field omits the low-order 23 bits of the 78 bits VA. | ||
216 | * These bits are not needed in the PTE, because the | ||
217 | * low-order b of these bits are part of the byte offset | ||
218 | * into the virtual page and, if b < 23, the high-order | ||
219 | * 23-b of these bits are always used in selecting the | ||
220 | * PTEGs to be searched | ||
221 | */ | ||
222 | v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); | ||
223 | v <<= HPTE_V_AVPN_SHIFT; | ||
224 | v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; | ||
225 | return v; | ||
226 | } | ||
227 | |||
228 | /* | ||
183 | * This function sets the AVPN and L fields of the HPTE appropriately | 229 | * This function sets the AVPN and L fields of the HPTE appropriately |
184 | * for the page size | 230 | * for the page size |
185 | */ | 231 | */ |
186 | static inline unsigned long hpte_encode_v(unsigned long va, int psize, | 232 | static inline unsigned long hpte_encode_v(unsigned long vpn, |
187 | int ssize) | 233 | int psize, int ssize) |
188 | { | 234 | { |
189 | unsigned long v; | 235 | unsigned long v; |
190 | v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); | 236 | v = hpte_encode_avpn(vpn, psize, ssize); |
191 | v <<= HPTE_V_AVPN_SHIFT; | ||
192 | if (psize != MMU_PAGE_4K) | 237 | if (psize != MMU_PAGE_4K) |
193 | v |= HPTE_V_LARGE; | 238 | v |= HPTE_V_LARGE; |
194 | v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; | ||
195 | return v; | 239 | return v; |
196 | } | 240 | } |
197 | 241 | ||
@@ -216,30 +260,37 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize) | |||
216 | } | 260 | } |
217 | 261 | ||
218 | /* | 262 | /* |
219 | * Build a VA given VSID, EA and segment size | 263 | * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size. |
220 | */ | 264 | */ |
221 | static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid, | 265 | static inline unsigned long hpt_vpn(unsigned long ea, |
222 | int ssize) | 266 | unsigned long vsid, int ssize) |
223 | { | 267 | { |
224 | if (ssize == MMU_SEGSIZE_256M) | 268 | unsigned long mask; |
225 | return (vsid << 28) | (ea & 0xfffffffUL); | 269 | int s_shift = segment_shift(ssize); |
226 | return (vsid << 40) | (ea & 0xffffffffffUL); | 270 | |
271 | mask = (1ul << (s_shift - VPN_SHIFT)) - 1; | ||
272 | return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask); | ||
227 | } | 273 | } |
228 | 274 | ||
229 | /* | 275 | /* |
230 | * This hashes a virtual address | 276 | * This hashes a virtual address |
231 | */ | 277 | */ |
232 | 278 | static inline unsigned long hpt_hash(unsigned long vpn, | |
233 | static inline unsigned long hpt_hash(unsigned long va, unsigned int shift, | 279 | unsigned int shift, int ssize) |
234 | int ssize) | ||
235 | { | 280 | { |
281 | int mask; | ||
236 | unsigned long hash, vsid; | 282 | unsigned long hash, vsid; |
237 | 283 | ||
284 | /* VPN_SHIFT can be atmost 12 */ | ||
238 | if (ssize == MMU_SEGSIZE_256M) { | 285 | if (ssize == MMU_SEGSIZE_256M) { |
239 | hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift); | 286 | mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1; |
287 | hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^ | ||
288 | ((vpn & mask) >> (shift - VPN_SHIFT)); | ||
240 | } else { | 289 | } else { |
241 | vsid = va >> 40; | 290 | mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1; |
242 | hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift); | 291 | vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT); |
292 | hash = vsid ^ (vsid << 25) ^ | ||
293 | ((vpn & mask) >> (shift - VPN_SHIFT)) ; | ||
243 | } | 294 | } |
244 | return hash & 0x7fffffffffUL; | 295 | return hash & 0x7fffffffffUL; |
245 | } | 296 | } |
@@ -280,63 +331,61 @@ extern void slb_set_size(u16 size); | |||
280 | #endif /* __ASSEMBLY__ */ | 331 | #endif /* __ASSEMBLY__ */ |
281 | 332 | ||
282 | /* | 333 | /* |
283 | * VSID allocation | 334 | * VSID allocation (256MB segment) |
335 | * | ||
336 | * We first generate a 38-bit "proto-VSID". For kernel addresses this | ||
337 | * is equal to the ESID | 1 << 37, for user addresses it is: | ||
338 | * (context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1) | ||
284 | * | 339 | * |
285 | * We first generate a 36-bit "proto-VSID". For kernel addresses this | 340 | * This splits the proto-VSID into the below range |
286 | * is equal to the ESID, for user addresses it is: | 341 | * 0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range |
287 | * (context << 15) | (esid & 0x7fff) | 342 | * 2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range |
288 | * | 343 | * |
289 | * The two forms are distinguishable because the top bit is 0 for user | 344 | * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1 |
290 | * addresses, whereas the top two bits are 1 for kernel addresses. | 345 | * That is, we assign half of the space to user processes and half |
291 | * Proto-VSIDs with the top two bits equal to 0b10 are reserved for | 346 | * to the kernel. |
292 | * now. | ||
293 | * | 347 | * |
294 | * The proto-VSIDs are then scrambled into real VSIDs with the | 348 | * The proto-VSIDs are then scrambled into real VSIDs with the |
295 | * multiplicative hash: | 349 | * multiplicative hash: |
296 | * | 350 | * |
297 | * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS | 351 | * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS |
298 | * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7 | ||
299 | * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF | ||
300 | * | 352 | * |
301 | * This scramble is only well defined for proto-VSIDs below | 353 | * VSID_MULTIPLIER is prime, so in particular it is |
302 | * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are | ||
303 | * reserved. VSID_MULTIPLIER is prime, so in particular it is | ||
304 | * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. | 354 | * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. |
305 | * Because the modulus is 2^n-1 we can compute it efficiently without | 355 | * Because the modulus is 2^n-1 we can compute it efficiently without |
306 | * a divide or extra multiply (see below). | 356 | * a divide or extra multiply (see below). |
307 | * | 357 | * |
308 | * This scheme has several advantages over older methods: | 358 | * This scheme has several advantages over older methods: |
309 | * | 359 | * |
310 | * - We have VSIDs allocated for every kernel address | 360 | * - We have VSIDs allocated for every kernel address |
311 | * (i.e. everything above 0xC000000000000000), except the very top | 361 | * (i.e. everything above 0xC000000000000000), except the very top |
312 | * segment, which simplifies several things. | 362 | * segment, which simplifies several things. |
313 | * | 363 | * |
314 | * - We allow for 16 significant bits of ESID and 19 bits of | 364 | * - We allow for USER_ESID_BITS significant bits of ESID and |
315 | * context for user addresses. i.e. 16T (44 bits) of address space for | 365 | * CONTEXT_BITS bits of context for user addresses. |
316 | * up to half a million contexts. | 366 | * i.e. 64T (46 bits) of address space for up to half a million contexts. |
317 | * | 367 | * |
318 | * - The scramble function gives robust scattering in the hash | 368 | * - The scramble function gives robust scattering in the hash |
319 | * table (at least based on some initial results). The previous | 369 | * table (at least based on some initial results). The previous |
320 | * method was more susceptible to pathological cases giving excessive | 370 | * method was more susceptible to pathological cases giving excessive |
321 | * hash collisions. | 371 | * hash collisions. |
322 | */ | 372 | */ |
373 | |||
323 | /* | 374 | /* |
324 | * WARNING - If you change these you must make sure the asm | 375 | * This should be computed such that protovosid * vsid_mulitplier |
325 | * implementations in slb_allocate (slb_low.S), do_stab_bolted | 376 | * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus |
326 | * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly. | ||
327 | */ | 377 | */ |
328 | 378 | #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ | |
329 | #define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */ | 379 | #define VSID_BITS_256M 38 |
330 | #define VSID_BITS_256M 36 | ||
331 | #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) | 380 | #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) |
332 | 381 | ||
333 | #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ | 382 | #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ |
334 | #define VSID_BITS_1T 24 | 383 | #define VSID_BITS_1T 26 |
335 | #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) | 384 | #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) |
336 | 385 | ||
337 | #define CONTEXT_BITS 19 | 386 | #define CONTEXT_BITS 19 |
338 | #define USER_ESID_BITS 16 | 387 | #define USER_ESID_BITS 18 |
339 | #define USER_ESID_BITS_1T 4 | 388 | #define USER_ESID_BITS_1T 6 |
340 | 389 | ||
341 | #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) | 390 | #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) |
342 | 391 | ||
@@ -372,6 +421,8 @@ extern void slb_set_size(u16 size); | |||
372 | srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ | 421 | srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ |
373 | add rt,rt,rx | 422 | add rt,rt,rx |
374 | 423 | ||
424 | /* 4 bits per slice and we have one slice per 1TB */ | ||
425 | #define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) | ||
375 | 426 | ||
376 | #ifndef __ASSEMBLY__ | 427 | #ifndef __ASSEMBLY__ |
377 | 428 | ||
@@ -416,7 +467,7 @@ typedef struct { | |||
416 | 467 | ||
417 | #ifdef CONFIG_PPC_MM_SLICES | 468 | #ifdef CONFIG_PPC_MM_SLICES |
418 | u64 low_slices_psize; /* SLB page size encodings */ | 469 | u64 low_slices_psize; /* SLB page size encodings */ |
419 | u64 high_slices_psize; /* 4 bits per slice for now */ | 470 | unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; |
420 | #else | 471 | #else |
421 | u16 sllp; /* SLB page size encoding */ | 472 | u16 sllp; /* SLB page size encoding */ |
422 | #endif | 473 | #endif |
@@ -452,12 +503,32 @@ typedef struct { | |||
452 | }) | 503 | }) |
453 | #endif /* 1 */ | 504 | #endif /* 1 */ |
454 | 505 | ||
455 | /* This is only valid for addresses >= PAGE_OFFSET */ | 506 | /* |
507 | * This is only valid for addresses >= PAGE_OFFSET | ||
508 | * The proto-VSID space is divided into two class | ||
509 | * User: 0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1 | ||
510 | * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1 | ||
511 | * | ||
512 | * With KERNEL_START at 0xc000000000000000, the proto vsid for | ||
513 | * the kernel ends up with 0xc00000000 (36 bits). With 64TB | ||
514 | * support we need to have kernel proto-VSID in the | ||
515 | * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS. | ||
516 | */ | ||
456 | static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) | 517 | static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) |
457 | { | 518 | { |
458 | if (ssize == MMU_SEGSIZE_256M) | 519 | unsigned long proto_vsid; |
459 | return vsid_scramble(ea >> SID_SHIFT, 256M); | 520 | /* |
460 | return vsid_scramble(ea >> SID_SHIFT_1T, 1T); | 521 | * We need to make sure proto_vsid for the kernel is |
522 | * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T]) | ||
523 | */ | ||
524 | if (ssize == MMU_SEGSIZE_256M) { | ||
525 | proto_vsid = ea >> SID_SHIFT; | ||
526 | proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS)); | ||
527 | return vsid_scramble(proto_vsid, 256M); | ||
528 | } | ||
529 | proto_vsid = ea >> SID_SHIFT_1T; | ||
530 | proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T)); | ||
531 | return vsid_scramble(proto_vsid, 1T); | ||
461 | } | 532 | } |
462 | 533 | ||
463 | /* Returns the segment size indicator for a user address */ | 534 | /* Returns the segment size indicator for a user address */ |