diff options
author | Paul Mackerras <paulus@samba.org> | 2007-10-11 06:37:10 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-10-12 00:05:17 -0400 |
commit | 1189be6508d45183013ddb82b18f4934193de274 (patch) | |
tree | 58924481b4de56699e4a884dce8dc601e71cf7d1 /include/asm-powerpc/mmu-hash64.h | |
parent | 287e5d6fcccfa38b953cebe307e1ddfd32363355 (diff) |
[POWERPC] Use 1TB segments
This makes the kernel use 1TB segments for all kernel mappings and for
user addresses of 1TB and above, on machines which support them
(currently POWER5+, POWER6 and PA6T).
We detect that the machine supports 1TB segments by looking at the
ibm,processor-segment-sizes property in the device tree.
We don't currently use 1TB segments for user addresses < 1T, since
that would effectively prevent 32-bit processes from using huge pages
unless we also had a way to revert to using 256MB segments. That
would be possible but would involve extra complications (such as
keeping track of which segment size was used when HPTEs were inserted)
and is not addressed here.
Parts of this patch were originally written by Ben Herrenschmidt.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include/asm-powerpc/mmu-hash64.h')
-rw-r--r-- | include/asm-powerpc/mmu-hash64.h | 140 |
1 files changed, 98 insertions, 42 deletions
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index b22b0d20e157..82328dec2b52 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h | |||
@@ -47,6 +47,8 @@ extern char initial_stab[]; | |||
47 | 47 | ||
48 | /* Bits in the SLB VSID word */ | 48 | /* Bits in the SLB VSID word */ |
49 | #define SLB_VSID_SHIFT 12 | 49 | #define SLB_VSID_SHIFT 12 |
50 | #define SLB_VSID_SHIFT_1T 24 | ||
51 | #define SLB_VSID_SSIZE_SHIFT 62 | ||
50 | #define SLB_VSID_B ASM_CONST(0xc000000000000000) | 52 | #define SLB_VSID_B ASM_CONST(0xc000000000000000) |
51 | #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) | 53 | #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) |
52 | #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) | 54 | #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) |
@@ -66,6 +68,7 @@ extern char initial_stab[]; | |||
66 | #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) | 68 | #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) |
67 | 69 | ||
68 | #define SLBIE_C (0x08000000) | 70 | #define SLBIE_C (0x08000000) |
71 | #define SLBIE_SSIZE_SHIFT 25 | ||
69 | 72 | ||
70 | /* | 73 | /* |
71 | * Hash table | 74 | * Hash table |
@@ -77,7 +80,7 @@ extern char initial_stab[]; | |||
77 | #define HPTE_V_AVPN_SHIFT 7 | 80 | #define HPTE_V_AVPN_SHIFT 7 |
78 | #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) | 81 | #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) |
79 | #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) | 82 | #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) |
80 | #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN)) | 83 | #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80)) |
81 | #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) | 84 | #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) |
82 | #define HPTE_V_LOCK ASM_CONST(0x0000000000000008) | 85 | #define HPTE_V_LOCK ASM_CONST(0x0000000000000008) |
83 | #define HPTE_V_LARGE ASM_CONST(0x0000000000000004) | 86 | #define HPTE_V_LARGE ASM_CONST(0x0000000000000004) |
@@ -164,16 +167,19 @@ struct mmu_psize_def | |||
164 | #define MMU_SEGSIZE_256M 0 | 167 | #define MMU_SEGSIZE_256M 0 |
165 | #define MMU_SEGSIZE_1T 1 | 168 | #define MMU_SEGSIZE_1T 1 |
166 | 169 | ||
170 | |||
167 | #ifndef __ASSEMBLY__ | 171 | #ifndef __ASSEMBLY__ |
168 | 172 | ||
169 | /* | 173 | /* |
170 | * The current system page sizes | 174 | * The current system page and segment sizes |
171 | */ | 175 | */ |
172 | extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; | 176 | extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; |
173 | extern int mmu_linear_psize; | 177 | extern int mmu_linear_psize; |
174 | extern int mmu_virtual_psize; | 178 | extern int mmu_virtual_psize; |
175 | extern int mmu_vmalloc_psize; | 179 | extern int mmu_vmalloc_psize; |
176 | extern int mmu_io_psize; | 180 | extern int mmu_io_psize; |
181 | extern int mmu_kernel_ssize; | ||
182 | extern int mmu_highuser_ssize; | ||
177 | 183 | ||
178 | /* | 184 | /* |
179 | * If the processor supports 64k normal pages but not 64k cache | 185 | * If the processor supports 64k normal pages but not 64k cache |
@@ -195,13 +201,15 @@ extern int mmu_huge_psize; | |||
195 | * This function sets the AVPN and L fields of the HPTE appropriately | 201 | * This function sets the AVPN and L fields of the HPTE appropriately |
196 | * for the page size | 202 | * for the page size |
197 | */ | 203 | */ |
198 | static inline unsigned long hpte_encode_v(unsigned long va, int psize) | 204 | static inline unsigned long hpte_encode_v(unsigned long va, int psize, |
205 | int ssize) | ||
199 | { | 206 | { |
200 | unsigned long v = | 207 | unsigned long v; |
201 | v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); | 208 | v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); |
202 | v <<= HPTE_V_AVPN_SHIFT; | 209 | v <<= HPTE_V_AVPN_SHIFT; |
203 | if (psize != MMU_PAGE_4K) | 210 | if (psize != MMU_PAGE_4K) |
204 | v |= HPTE_V_LARGE; | 211 | v |= HPTE_V_LARGE; |
212 | v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; | ||
205 | return v; | 213 | return v; |
206 | } | 214 | } |
207 | 215 | ||
@@ -226,20 +234,40 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize) | |||
226 | } | 234 | } |
227 | 235 | ||
228 | /* | 236 | /* |
229 | * This hashes a virtual address for a 256Mb segment only for now | 237 | * Build a VA given VSID, EA and segment size |
230 | */ | 238 | */ |
239 | static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid, | ||
240 | int ssize) | ||
241 | { | ||
242 | if (ssize == MMU_SEGSIZE_256M) | ||
243 | return (vsid << 28) | (ea & 0xfffffffUL); | ||
244 | return (vsid << 40) | (ea & 0xffffffffffUL); | ||
245 | } | ||
231 | 246 | ||
232 | static inline unsigned long hpt_hash(unsigned long va, unsigned int shift) | 247 | /* |
248 | * This hashes a virtual address | ||
249 | */ | ||
250 | |||
251 | static inline unsigned long hpt_hash(unsigned long va, unsigned int shift, | ||
252 | int ssize) | ||
233 | { | 253 | { |
234 | return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift); | 254 | unsigned long hash, vsid; |
255 | |||
256 | if (ssize == MMU_SEGSIZE_256M) { | ||
257 | hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift); | ||
258 | } else { | ||
259 | vsid = va >> 40; | ||
260 | hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift); | ||
261 | } | ||
262 | return hash & 0x7fffffffffUL; | ||
235 | } | 263 | } |
236 | 264 | ||
237 | extern int __hash_page_4K(unsigned long ea, unsigned long access, | 265 | extern int __hash_page_4K(unsigned long ea, unsigned long access, |
238 | unsigned long vsid, pte_t *ptep, unsigned long trap, | 266 | unsigned long vsid, pte_t *ptep, unsigned long trap, |
239 | unsigned int local); | 267 | unsigned int local, int ssize); |
240 | extern int __hash_page_64K(unsigned long ea, unsigned long access, | 268 | extern int __hash_page_64K(unsigned long ea, unsigned long access, |
241 | unsigned long vsid, pte_t *ptep, unsigned long trap, | 269 | unsigned long vsid, pte_t *ptep, unsigned long trap, |
242 | unsigned int local); | 270 | unsigned int local, int ssize); |
243 | struct mm_struct; | 271 | struct mm_struct; |
244 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); | 272 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); |
245 | extern int hash_huge_page(struct mm_struct *mm, unsigned long access, | 273 | extern int hash_huge_page(struct mm_struct *mm, unsigned long access, |
@@ -248,7 +276,7 @@ extern int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
248 | 276 | ||
249 | extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, | 277 | extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, |
250 | unsigned long pstart, unsigned long mode, | 278 | unsigned long pstart, unsigned long mode, |
251 | int psize); | 279 | int psize, int ssize); |
252 | 280 | ||
253 | extern void htab_initialize(void); | 281 | extern void htab_initialize(void); |
254 | extern void htab_initialize_secondary(void); | 282 | extern void htab_initialize_secondary(void); |
@@ -317,12 +345,17 @@ extern void slb_vmalloc_update(void); | |||
317 | * which are used by the iSeries firmware. | 345 | * which are used by the iSeries firmware. |
318 | */ | 346 | */ |
319 | 347 | ||
320 | #define VSID_MULTIPLIER ASM_CONST(200730139) /* 28-bit prime */ | 348 | #define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */ |
321 | #define VSID_BITS 36 | 349 | #define VSID_BITS_256M 36 |
322 | #define VSID_MODULUS ((1UL<<VSID_BITS)-1) | 350 | #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) |
323 | 351 | ||
324 | #define CONTEXT_BITS 19 | 352 | #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ |
325 | #define USER_ESID_BITS 16 | 353 | #define VSID_BITS_1T 24 |
354 | #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) | ||
355 | |||
356 | #define CONTEXT_BITS 19 | ||
357 | #define USER_ESID_BITS 16 | ||
358 | #define USER_ESID_BITS_1T 4 | ||
326 | 359 | ||
327 | #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) | 360 | #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) |
328 | 361 | ||
@@ -336,17 +369,17 @@ extern void slb_vmalloc_update(void); | |||
336 | * rx = scratch register (clobbered) | 369 | * rx = scratch register (clobbered) |
337 | * | 370 | * |
338 | * - rt and rx must be different registers | 371 | * - rt and rx must be different registers |
339 | * - The answer will end up in the low 36 bits of rt. The higher | 372 | * - The answer will end up in the low VSID_BITS bits of rt. The higher |
340 | * bits may contain other garbage, so you may need to mask the | 373 | * bits may contain other garbage, so you may need to mask the |
341 | * result. | 374 | * result. |
342 | */ | 375 | */ |
343 | #define ASM_VSID_SCRAMBLE(rt, rx) \ | 376 | #define ASM_VSID_SCRAMBLE(rt, rx, size) \ |
344 | lis rx,VSID_MULTIPLIER@h; \ | 377 | lis rx,VSID_MULTIPLIER_##size@h; \ |
345 | ori rx,rx,VSID_MULTIPLIER@l; \ | 378 | ori rx,rx,VSID_MULTIPLIER_##size@l; \ |
346 | mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ | 379 | mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ |
347 | \ | 380 | \ |
348 | srdi rx,rt,VSID_BITS; \ | 381 | srdi rx,rt,VSID_BITS_##size; \ |
349 | clrldi rt,rt,(64-VSID_BITS); \ | 382 | clrldi rt,rt,(64-VSID_BITS_##size); \ |
350 | add rt,rt,rx; /* add high and low bits */ \ | 383 | add rt,rt,rx; /* add high and low bits */ \ |
351 | /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ | 384 | /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ |
352 | * 2^36-1+2^28-1. That in particular means that if r3 >= \ | 385 | * 2^36-1+2^28-1. That in particular means that if r3 >= \ |
@@ -355,7 +388,7 @@ extern void slb_vmalloc_update(void); | |||
355 | * doesn't, the answer is the low 36 bits of r3+1. So in all \ | 388 | * doesn't, the answer is the low 36 bits of r3+1. So in all \ |
356 | * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ | 389 | * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ |
357 | addi rx,rt,1; \ | 390 | addi rx,rt,1; \ |
358 | srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \ | 391 | srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ |
359 | add rt,rt,rx | 392 | add rt,rt,rx |
360 | 393 | ||
361 | 394 | ||
@@ -377,37 +410,60 @@ typedef struct { | |||
377 | } mm_context_t; | 410 | } mm_context_t; |
378 | 411 | ||
379 | 412 | ||
380 | static inline unsigned long vsid_scramble(unsigned long protovsid) | ||
381 | { | ||
382 | #if 0 | 413 | #if 0 |
383 | /* The code below is equivalent to this function for arguments | 414 | /* |
384 | * < 2^VSID_BITS, which is all this should ever be called | 415 | * The code below is equivalent to this function for arguments |
385 | * with. However gcc is not clever enough to compute the | 416 | * < 2^VSID_BITS, which is all this should ever be called |
386 | * modulus (2^n-1) without a second multiply. */ | 417 | * with. However gcc is not clever enough to compute the |
387 | return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS); | 418 | * modulus (2^n-1) without a second multiply. |
388 | #else /* 1 */ | 419 | */ |
389 | unsigned long x; | 420 | #define vsid_scrample(protovsid, size) \ |
421 | ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) | ||
390 | 422 | ||
391 | x = protovsid * VSID_MULTIPLIER; | 423 | #else /* 1 */ |
392 | x = (x >> VSID_BITS) + (x & VSID_MODULUS); | 424 | #define vsid_scramble(protovsid, size) \ |
393 | return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; | 425 | ({ \ |
426 | unsigned long x; \ | ||
427 | x = (protovsid) * VSID_MULTIPLIER_##size; \ | ||
428 | x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ | ||
429 | (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ | ||
430 | }) | ||
394 | #endif /* 1 */ | 431 | #endif /* 1 */ |
395 | } | ||
396 | 432 | ||
397 | /* This is only valid for addresses >= KERNELBASE */ | 433 | /* This is only valid for addresses >= KERNELBASE */ |
398 | static inline unsigned long get_kernel_vsid(unsigned long ea) | 434 | static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) |
399 | { | 435 | { |
400 | return vsid_scramble(ea >> SID_SHIFT); | 436 | if (ssize == MMU_SEGSIZE_256M) |
437 | return vsid_scramble(ea >> SID_SHIFT, 256M); | ||
438 | return vsid_scramble(ea >> SID_SHIFT_1T, 1T); | ||
401 | } | 439 | } |
402 | 440 | ||
403 | /* This is only valid for user addresses (which are below 2^41) */ | 441 | /* Returns the segment size indicator for a user address */ |
404 | static inline unsigned long get_vsid(unsigned long context, unsigned long ea) | 442 | static inline int user_segment_size(unsigned long addr) |
405 | { | 443 | { |
406 | return vsid_scramble((context << USER_ESID_BITS) | 444 | /* Use 1T segments if possible for addresses >= 1T */ |
407 | | (ea >> SID_SHIFT)); | 445 | if (addr >= (1UL << SID_SHIFT_1T)) |
446 | return mmu_highuser_ssize; | ||
447 | return MMU_SEGSIZE_256M; | ||
408 | } | 448 | } |
409 | 449 | ||
410 | #define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS) | 450 | /* This is only valid for user addresses (which are below 2^44) */ |
451 | static inline unsigned long get_vsid(unsigned long context, unsigned long ea, | ||
452 | int ssize) | ||
453 | { | ||
454 | if (ssize == MMU_SEGSIZE_256M) | ||
455 | return vsid_scramble((context << USER_ESID_BITS) | ||
456 | | (ea >> SID_SHIFT), 256M); | ||
457 | return vsid_scramble((context << USER_ESID_BITS_1T) | ||
458 | | (ea >> SID_SHIFT_1T), 1T); | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * This is only used on legacy iSeries in lparmap.c, | ||
463 | * hence the 256MB segment assumption. | ||
464 | */ | ||
465 | #define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER_256M) % \ | ||
466 | VSID_MODULUS_256M) | ||
411 | #define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) | 467 | #define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) |
412 | 468 | ||
413 | /* Physical address used by some IO functions */ | 469 | /* Physical address used by some IO functions */ |