diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2012-02-04 05:55:38 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2012-03-24 05:38:52 -0400 |
commit | 87067a935a174cf5e0b336d338a0ab535ffe199d (patch) | |
tree | 03a56144e82fb7bfded621e6b291906608aa1d81 /arch/arm/include/asm/tlbflush.h | |
parent | d9277d51a8eeaa097d3c1385f458c99d65ffc4f4 (diff) |
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/include/asm/tlbflush.h')
-rw-r--r-- | arch/arm/include/asm/tlbflush.h | 136 |
1 files changed, 58 insertions, 78 deletions
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 02b2f8203982..85fe61e73202 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h | |||
@@ -318,6 +318,21 @@ extern struct cpu_tlb_fns cpu_tlb; | |||
318 | 318 | ||
319 | #define tlb_flag(f) ((always_tlb_flags & (f)) || (__tlb_flag & possible_tlb_flags & (f))) | 319 | #define tlb_flag(f) ((always_tlb_flags & (f)) || (__tlb_flag & possible_tlb_flags & (f))) |
320 | 320 | ||
321 | #define __tlb_op(f, insnarg, arg) \ | ||
322 | do { \ | ||
323 | if (always_tlb_flags & (f)) \ | ||
324 | asm("mcr " insnarg \ | ||
325 | : : "r" (arg) : "cc"); \ | ||
326 | else if (possible_tlb_flags & (f)) \ | ||
327 | asm("tst %1, %2\n\t" \ | ||
328 | "mcrne " insnarg \ | ||
329 | : : "r" (arg), "r" (__tlb_flag), "Ir" (f) \ | ||
330 | : "cc"); \ | ||
331 | } while (0) | ||
332 | |||
333 | #define tlb_op(f, regs, arg) __tlb_op(f, "p15, 0, %0, " regs, arg) | ||
334 | #define tlb_l2_op(f, regs, arg) __tlb_op(f, "p15, 1, %0, " regs, arg) | ||
335 | |||
321 | static inline void local_flush_tlb_all(void) | 336 | static inline void local_flush_tlb_all(void) |
322 | { | 337 | { |
323 | const int zero = 0; | 338 | const int zero = 0; |
@@ -326,16 +341,11 @@ static inline void local_flush_tlb_all(void) | |||
326 | if (tlb_flag(TLB_WB)) | 341 | if (tlb_flag(TLB_WB)) |
327 | dsb(); | 342 | dsb(); |
328 | 343 | ||
329 | if (tlb_flag(TLB_V3_FULL)) | 344 | tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); |
330 | asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc"); | 345 | tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); |
331 | if (tlb_flag(TLB_V4_U_FULL | TLB_V6_U_FULL)) | 346 | tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); |
332 | asm("mcr p15, 0, %0, c8, c7, 0" : : "r" (zero) : "cc"); | 347 | tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero); |
333 | if (tlb_flag(TLB_V4_D_FULL | TLB_V6_D_FULL)) | 348 | tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero); |
334 | asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc"); | ||
335 | if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL)) | ||
336 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); | ||
337 | if (tlb_flag(TLB_V7_UIS_FULL)) | ||
338 | asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc"); | ||
339 | 349 | ||
340 | if (tlb_flag(TLB_BARRIER)) { | 350 | if (tlb_flag(TLB_BARRIER)) { |
341 | dsb(); | 351 | dsb(); |
@@ -352,29 +362,23 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) | |||
352 | if (tlb_flag(TLB_WB)) | 362 | if (tlb_flag(TLB_WB)) |
353 | dsb(); | 363 | dsb(); |
354 | 364 | ||
355 | if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { | 365 | if (possible_tlb_flags & (TLB_V3_FULL|TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { |
356 | if (tlb_flag(TLB_V3_FULL)) | 366 | if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { |
357 | asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc"); | 367 | tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); |
358 | if (tlb_flag(TLB_V4_U_FULL)) | 368 | tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); |
359 | asm("mcr p15, 0, %0, c8, c7, 0" : : "r" (zero) : "cc"); | 369 | tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); |
360 | if (tlb_flag(TLB_V4_D_FULL)) | 370 | tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); |
361 | asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc"); | 371 | } |
362 | if (tlb_flag(TLB_V4_I_FULL)) | 372 | put_cpu(); |
363 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); | ||
364 | } | 373 | } |
365 | put_cpu(); | 374 | |
366 | 375 | tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid); | |
367 | if (tlb_flag(TLB_V6_U_ASID)) | 376 | tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid); |
368 | asm("mcr p15, 0, %0, c8, c7, 2" : : "r" (asid) : "cc"); | 377 | tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid); |
369 | if (tlb_flag(TLB_V6_D_ASID)) | ||
370 | asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc"); | ||
371 | if (tlb_flag(TLB_V6_I_ASID)) | ||
372 | asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc"); | ||
373 | if (tlb_flag(TLB_V7_UIS_ASID)) | ||
374 | #ifdef CONFIG_ARM_ERRATA_720789 | 378 | #ifdef CONFIG_ARM_ERRATA_720789 |
375 | asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc"); | 379 | tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero); |
376 | #else | 380 | #else |
377 | asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc"); | 381 | tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid); |
378 | #endif | 382 | #endif |
379 | 383 | ||
380 | if (tlb_flag(TLB_BARRIER)) | 384 | if (tlb_flag(TLB_BARRIER)) |
@@ -392,30 +396,23 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) | |||
392 | if (tlb_flag(TLB_WB)) | 396 | if (tlb_flag(TLB_WB)) |
393 | dsb(); | 397 | dsb(); |
394 | 398 | ||
395 | if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { | 399 | if (possible_tlb_flags & (TLB_V3_PAGE|TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && |
396 | if (tlb_flag(TLB_V3_PAGE)) | 400 | cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { |
397 | asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc"); | 401 | tlb_op(TLB_V3_PAGE, "c6, c0, 0", uaddr); |
398 | if (tlb_flag(TLB_V4_U_PAGE)) | 402 | tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); |
399 | asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (uaddr) : "cc"); | 403 | tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr); |
400 | if (tlb_flag(TLB_V4_D_PAGE)) | 404 | tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr); |
401 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc"); | ||
402 | if (tlb_flag(TLB_V4_I_PAGE)) | ||
403 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc"); | ||
404 | if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL)) | 405 | if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL)) |
405 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); | 406 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); |
406 | } | 407 | } |
407 | 408 | ||
408 | if (tlb_flag(TLB_V6_U_PAGE)) | 409 | tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr); |
409 | asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (uaddr) : "cc"); | 410 | tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr); |
410 | if (tlb_flag(TLB_V6_D_PAGE)) | 411 | tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr); |
411 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc"); | ||
412 | if (tlb_flag(TLB_V6_I_PAGE)) | ||
413 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc"); | ||
414 | if (tlb_flag(TLB_V7_UIS_PAGE)) | ||
415 | #ifdef CONFIG_ARM_ERRATA_720789 | 412 | #ifdef CONFIG_ARM_ERRATA_720789 |
416 | asm("mcr p15, 0, %0, c8, c3, 3" : : "r" (uaddr & PAGE_MASK) : "cc"); | 413 | tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK); |
417 | #else | 414 | #else |
418 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc"); | 415 | tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", uaddr); |
419 | #endif | 416 | #endif |
420 | 417 | ||
421 | if (tlb_flag(TLB_BARRIER)) | 418 | if (tlb_flag(TLB_BARRIER)) |
@@ -432,25 +429,17 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) | |||
432 | if (tlb_flag(TLB_WB)) | 429 | if (tlb_flag(TLB_WB)) |
433 | dsb(); | 430 | dsb(); |
434 | 431 | ||
435 | if (tlb_flag(TLB_V3_PAGE)) | 432 | tlb_op(TLB_V3_PAGE, "c6, c0, 0", kaddr); |
436 | asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (kaddr) : "cc"); | 433 | tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); |
437 | if (tlb_flag(TLB_V4_U_PAGE)) | 434 | tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); |
438 | asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (kaddr) : "cc"); | 435 | tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); |
439 | if (tlb_flag(TLB_V4_D_PAGE)) | ||
440 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc"); | ||
441 | if (tlb_flag(TLB_V4_I_PAGE)) | ||
442 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc"); | ||
443 | if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL)) | 436 | if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL)) |
444 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); | 437 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); |
445 | 438 | ||
446 | if (tlb_flag(TLB_V6_U_PAGE)) | 439 | tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr); |
447 | asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (kaddr) : "cc"); | 440 | tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr); |
448 | if (tlb_flag(TLB_V6_D_PAGE)) | 441 | tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr); |
449 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc"); | 442 | tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr); |
450 | if (tlb_flag(TLB_V6_I_PAGE)) | ||
451 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc"); | ||
452 | if (tlb_flag(TLB_V7_UIS_PAGE)) | ||
453 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc"); | ||
454 | 443 | ||
455 | if (tlb_flag(TLB_BARRIER)) { | 444 | if (tlb_flag(TLB_BARRIER)) { |
456 | dsb(); | 445 | dsb(); |
@@ -475,13 +464,8 @@ static inline void flush_pmd_entry(void *pmd) | |||
475 | { | 464 | { |
476 | const unsigned int __tlb_flag = __cpu_tlb_flags; | 465 | const unsigned int __tlb_flag = __cpu_tlb_flags; |
477 | 466 | ||
478 | if (tlb_flag(TLB_DCLEAN)) | 467 | tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd); |
479 | asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pmd" | 468 | tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd); |
480 | : : "r" (pmd) : "cc"); | ||
481 | |||
482 | if (tlb_flag(TLB_L2CLEAN_FR)) | ||
483 | asm("mcr p15, 1, %0, c15, c9, 1 @ L2 flush_pmd" | ||
484 | : : "r" (pmd) : "cc"); | ||
485 | 469 | ||
486 | if (tlb_flag(TLB_WB)) | 470 | if (tlb_flag(TLB_WB)) |
487 | dsb(); | 471 | dsb(); |
@@ -491,15 +475,11 @@ static inline void clean_pmd_entry(void *pmd) | |||
491 | { | 475 | { |
492 | const unsigned int __tlb_flag = __cpu_tlb_flags; | 476 | const unsigned int __tlb_flag = __cpu_tlb_flags; |
493 | 477 | ||
494 | if (tlb_flag(TLB_DCLEAN)) | 478 | tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd); |
495 | asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pmd" | 479 | tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd); |
496 | : : "r" (pmd) : "cc"); | ||
497 | |||
498 | if (tlb_flag(TLB_L2CLEAN_FR)) | ||
499 | asm("mcr p15, 1, %0, c15, c9, 1 @ L2 flush_pmd" | ||
500 | : : "r" (pmd) : "cc"); | ||
501 | } | 480 | } |
502 | 481 | ||
482 | #undef tlb_op | ||
503 | #undef tlb_flag | 483 | #undef tlb_flag |
504 | #undef always_tlb_flags | 484 | #undef always_tlb_flags |
505 | #undef possible_tlb_flags | 485 | #undef possible_tlb_flags |