diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2009-05-30 09:00:14 -0400 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2009-05-30 09:00:14 -0400 |
commit | faa7bc51c11d5bbe440ac04710fd7a3208782000 (patch) | |
tree | 8aaa4e8e2fbb14b421988762fa90a3dbe6fa76e7 | |
parent | da055eb52ec067d51dc08c7e86baf92dd5c01599 (diff) |
Check whether the TLB operations need broadcasting on SMP systems
ARMv7 SMP hardware can handle the TLB maintenance operations
broadcasting in hardware so that the software can avoid the costly IPIs.
This patch adds the necessary checks (the MMFR3 CPUID register) to avoid
the broadcasting if already supported by the hardware.
(this patch is based on the work done by Tony Thompson @ ARM)
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
-rw-r--r-- | arch/arm/include/asm/cputype.h | 25 | ||||
-rw-r--r-- | arch/arm/include/asm/tlbflush.h | 26 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 69 | ||||
-rw-r--r-- | arch/arm/mm/proc-v7.S | 4 | ||||
-rw-r--r-- | arch/arm/mm/tlb-v7.S | 17 |
5 files changed, 105 insertions, 36 deletions
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 7b9d27e749b8..b3e656c6fb78 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h | |||
@@ -8,6 +8,21 @@ | |||
8 | #define CPUID_TCM 2 | 8 | #define CPUID_TCM 2 |
9 | #define CPUID_TLBTYPE 3 | 9 | #define CPUID_TLBTYPE 3 |
10 | 10 | ||
11 | #define CPUID_EXT_PFR0 "c1, 0" | ||
12 | #define CPUID_EXT_PFR1 "c1, 1" | ||
13 | #define CPUID_EXT_DFR0 "c1, 2" | ||
14 | #define CPUID_EXT_AFR0 "c1, 3" | ||
15 | #define CPUID_EXT_MMFR0 "c1, 4" | ||
16 | #define CPUID_EXT_MMFR1 "c1, 5" | ||
17 | #define CPUID_EXT_MMFR2 "c1, 6" | ||
18 | #define CPUID_EXT_MMFR3 "c1, 7" | ||
19 | #define CPUID_EXT_ISAR0 "c2, 0" | ||
20 | #define CPUID_EXT_ISAR1 "c2, 1" | ||
21 | #define CPUID_EXT_ISAR2 "c2, 2" | ||
22 | #define CPUID_EXT_ISAR3 "c2, 3" | ||
23 | #define CPUID_EXT_ISAR4 "c2, 4" | ||
24 | #define CPUID_EXT_ISAR5 "c2, 5" | ||
25 | |||
11 | #ifdef CONFIG_CPU_CP15 | 26 | #ifdef CONFIG_CPU_CP15 |
12 | #define read_cpuid(reg) \ | 27 | #define read_cpuid(reg) \ |
13 | ({ \ | 28 | ({ \ |
@@ -18,9 +33,19 @@ | |||
18 | : "cc"); \ | 33 | : "cc"); \ |
19 | __val; \ | 34 | __val; \ |
20 | }) | 35 | }) |
36 | #define read_cpuid_ext(ext_reg) \ | ||
37 | ({ \ | ||
38 | unsigned int __val; \ | ||
39 | asm("mrc p15, 0, %0, c0, " ext_reg \ | ||
40 | : "=r" (__val) \ | ||
41 | : \ | ||
42 | : "cc"); \ | ||
43 | __val; \ | ||
44 | }) | ||
21 | #else | 45 | #else |
22 | extern unsigned int processor_id; | 46 | extern unsigned int processor_id; |
23 | #define read_cpuid(reg) (processor_id) | 47 | #define read_cpuid(reg) (processor_id) |
48 | #define read_cpuid_ext(reg) 0 | ||
24 | #endif | 49 | #endif |
25 | 50 | ||
26 | /* | 51 | /* |
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index a62218013c78..c964f3fc3bc5 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h | |||
@@ -40,6 +40,12 @@ | |||
40 | #define TLB_V6_I_ASID (1 << 18) | 40 | #define TLB_V6_I_ASID (1 << 18) |
41 | 41 | ||
42 | #define TLB_BTB (1 << 28) | 42 | #define TLB_BTB (1 << 28) |
43 | |||
44 | /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */ | ||
45 | #define TLB_V7_UIS_PAGE (1 << 19) | ||
46 | #define TLB_V7_UIS_FULL (1 << 20) | ||
47 | #define TLB_V7_UIS_ASID (1 << 21) | ||
48 | |||
43 | #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ | 49 | #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ |
44 | #define TLB_DCLEAN (1 << 30) | 50 | #define TLB_DCLEAN (1 << 30) |
45 | #define TLB_WB (1 << 31) | 51 | #define TLB_WB (1 << 31) |
@@ -176,9 +182,17 @@ | |||
176 | # define v6wbi_always_flags (-1UL) | 182 | # define v6wbi_always_flags (-1UL) |
177 | #endif | 183 | #endif |
178 | 184 | ||
185 | #ifdef CONFIG_SMP | ||
186 | #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ | ||
187 | TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) | ||
188 | #else | ||
189 | #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ | ||
190 | TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID) | ||
191 | #endif | ||
192 | |||
179 | #ifdef CONFIG_CPU_TLB_V7 | 193 | #ifdef CONFIG_CPU_TLB_V7 |
180 | # define v7wbi_possible_flags v6wbi_tlb_flags | 194 | # define v7wbi_possible_flags v7wbi_tlb_flags |
181 | # define v7wbi_always_flags v6wbi_tlb_flags | 195 | # define v7wbi_always_flags v7wbi_tlb_flags |
182 | # ifdef _TLB | 196 | # ifdef _TLB |
183 | # define MULTI_TLB 1 | 197 | # define MULTI_TLB 1 |
184 | # else | 198 | # else |
@@ -316,6 +330,8 @@ static inline void local_flush_tlb_all(void) | |||
316 | asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc"); | 330 | asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc"); |
317 | if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL)) | 331 | if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL)) |
318 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); | 332 | asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); |
333 | if (tlb_flag(TLB_V7_UIS_FULL)) | ||
334 | asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc"); | ||
319 | 335 | ||
320 | if (tlb_flag(TLB_BTB)) { | 336 | if (tlb_flag(TLB_BTB)) { |
321 | /* flush the branch target cache */ | 337 | /* flush the branch target cache */ |
@@ -351,6 +367,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) | |||
351 | asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc"); | 367 | asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc"); |
352 | if (tlb_flag(TLB_V6_I_ASID)) | 368 | if (tlb_flag(TLB_V6_I_ASID)) |
353 | asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc"); | 369 | asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc"); |
370 | if (tlb_flag(TLB_V7_UIS_ASID)) | ||
371 | asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc"); | ||
354 | 372 | ||
355 | if (tlb_flag(TLB_BTB)) { | 373 | if (tlb_flag(TLB_BTB)) { |
356 | /* flush the branch target cache */ | 374 | /* flush the branch target cache */ |
@@ -389,6 +407,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) | |||
389 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc"); | 407 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc"); |
390 | if (tlb_flag(TLB_V6_I_PAGE)) | 408 | if (tlb_flag(TLB_V6_I_PAGE)) |
391 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc"); | 409 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc"); |
410 | if (tlb_flag(TLB_V7_UIS_PAGE)) | ||
411 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc"); | ||
392 | 412 | ||
393 | if (tlb_flag(TLB_BTB)) { | 413 | if (tlb_flag(TLB_BTB)) { |
394 | /* flush the branch target cache */ | 414 | /* flush the branch target cache */ |
@@ -424,6 +444,8 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) | |||
424 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc"); | 444 | asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc"); |
425 | if (tlb_flag(TLB_V6_I_PAGE)) | 445 | if (tlb_flag(TLB_V6_I_PAGE)) |
426 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc"); | 446 | asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc"); |
447 | if (tlb_flag(TLB_V7_UIS_PAGE)) | ||
448 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc"); | ||
427 | 449 | ||
428 | if (tlb_flag(TLB_BTB)) { | 450 | if (tlb_flag(TLB_BTB)) { |
429 | /* flush the branch target cache */ | 451 | /* flush the branch target cache */ |
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 6014dfd22af4..ece658e773a6 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
33 | #include <asm/tlbflush.h> | 33 | #include <asm/tlbflush.h> |
34 | #include <asm/ptrace.h> | 34 | #include <asm/ptrace.h> |
35 | #include <asm/cputype.h> | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * as from 2.5, kernels no longer have an init_tasks structure | 38 | * as from 2.5, kernels no longer have an init_tasks structure |
@@ -545,6 +546,12 @@ struct tlb_args { | |||
545 | unsigned long ta_end; | 546 | unsigned long ta_end; |
546 | }; | 547 | }; |
547 | 548 | ||
549 | /* all SMP configurations have the extended CPUID registers */ | ||
550 | static inline int tlb_ops_need_broadcast(void) | ||
551 | { | ||
552 | return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2; | ||
553 | } | ||
554 | |||
548 | static inline void ipi_flush_tlb_all(void *ignored) | 555 | static inline void ipi_flush_tlb_all(void *ignored) |
549 | { | 556 | { |
550 | local_flush_tlb_all(); | 557 | local_flush_tlb_all(); |
@@ -587,51 +594,61 @@ static inline void ipi_flush_tlb_kernel_range(void *arg) | |||
587 | 594 | ||
588 | void flush_tlb_all(void) | 595 | void flush_tlb_all(void) |
589 | { | 596 | { |
590 | on_each_cpu(ipi_flush_tlb_all, NULL, 1); | 597 | if (tlb_ops_need_broadcast()) |
598 | on_each_cpu(ipi_flush_tlb_all, NULL, 1); | ||
599 | else | ||
600 | local_flush_tlb_all(); | ||
591 | } | 601 | } |
592 | 602 | ||
593 | void flush_tlb_mm(struct mm_struct *mm) | 603 | void flush_tlb_mm(struct mm_struct *mm) |
594 | { | 604 | { |
595 | on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask); | 605 | if (tlb_ops_need_broadcast()) |
606 | on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask); | ||
607 | else | ||
608 | local_flush_tlb_mm(mm); | ||
596 | } | 609 | } |
597 | 610 | ||
598 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) | 611 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) |
599 | { | 612 | { |
600 | struct tlb_args ta; | 613 | if (tlb_ops_need_broadcast()) { |
601 | 614 | struct tlb_args ta; | |
602 | ta.ta_vma = vma; | 615 | ta.ta_vma = vma; |
603 | ta.ta_start = uaddr; | 616 | ta.ta_start = uaddr; |
604 | 617 | on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask); | |
605 | on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask); | 618 | } else |
619 | local_flush_tlb_page(vma, uaddr); | ||
606 | } | 620 | } |
607 | 621 | ||
608 | void flush_tlb_kernel_page(unsigned long kaddr) | 622 | void flush_tlb_kernel_page(unsigned long kaddr) |
609 | { | 623 | { |
610 | struct tlb_args ta; | 624 | if (tlb_ops_need_broadcast()) { |
611 | 625 | struct tlb_args ta; | |
612 | ta.ta_start = kaddr; | 626 | ta.ta_start = kaddr; |
613 | 627 | on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); | |
614 | on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); | 628 | } else |
629 | local_flush_tlb_kernel_page(kaddr); | ||
615 | } | 630 | } |
616 | 631 | ||
617 | void flush_tlb_range(struct vm_area_struct *vma, | 632 | void flush_tlb_range(struct vm_area_struct *vma, |
618 | unsigned long start, unsigned long end) | 633 | unsigned long start, unsigned long end) |
619 | { | 634 | { |
620 | struct tlb_args ta; | 635 | if (tlb_ops_need_broadcast()) { |
621 | 636 | struct tlb_args ta; | |
622 | ta.ta_vma = vma; | 637 | ta.ta_vma = vma; |
623 | ta.ta_start = start; | 638 | ta.ta_start = start; |
624 | ta.ta_end = end; | 639 | ta.ta_end = end; |
625 | 640 | on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask); | |
626 | on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask); | 641 | } else |
642 | local_flush_tlb_range(vma, start, end); | ||
627 | } | 643 | } |
628 | 644 | ||
629 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | 645 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) |
630 | { | 646 | { |
631 | struct tlb_args ta; | 647 | if (tlb_ops_need_broadcast()) { |
632 | 648 | struct tlb_args ta; | |
633 | ta.ta_start = start; | 649 | ta.ta_start = start; |
634 | ta.ta_end = end; | 650 | ta.ta_end = end; |
635 | 651 | on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); | |
636 | on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); | 652 | } else |
653 | local_flush_tlb_kernel_range(start, end); | ||
637 | } | 654 | } |
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3397f1e64d76..c3f737516836 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S | |||
@@ -176,8 +176,8 @@ cpu_v7_name: | |||
176 | */ | 176 | */ |
177 | __v7_setup: | 177 | __v7_setup: |
178 | #ifdef CONFIG_SMP | 178 | #ifdef CONFIG_SMP |
179 | mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode | 179 | mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode and |
180 | orr r0, r0, #(0x1 << 6) | 180 | orr r0, r0, #(1 << 6) | (1 << 0) @ TLB ops broadcasting |
181 | mcr p15, 0, r0, c1, c0, 1 | 181 | mcr p15, 0, r0, c1, c0, 1 |
182 | #endif | 182 | #endif |
183 | adr r12, __v7_setup_stack @ the local stack | 183 | adr r12, __v7_setup_stack @ the local stack |
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index b637e7380ab7..a26a605b73bd 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S | |||
@@ -42,9 +42,11 @@ ENTRY(v7wbi_flush_user_tlb_range) | |||
42 | mov r1, r1, lsl #PAGE_SHIFT | 42 | mov r1, r1, lsl #PAGE_SHIFT |
43 | vma_vm_flags r2, r2 @ get vma->vm_flags | 43 | vma_vm_flags r2, r2 @ get vma->vm_flags |
44 | 1: | 44 | 1: |
45 | mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA (was 1) | 45 | #ifdef CONFIG_SMP |
46 | tst r2, #VM_EXEC @ Executable area ? | 46 | mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) |
47 | mcrne p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA (was 1) | 47 | #else |
48 | mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA | ||
49 | #endif | ||
48 | add r0, r0, #PAGE_SZ | 50 | add r0, r0, #PAGE_SZ |
49 | cmp r0, r1 | 51 | cmp r0, r1 |
50 | blo 1b | 52 | blo 1b |
@@ -69,8 +71,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) | |||
69 | mov r0, r0, lsl #PAGE_SHIFT | 71 | mov r0, r0, lsl #PAGE_SHIFT |
70 | mov r1, r1, lsl #PAGE_SHIFT | 72 | mov r1, r1, lsl #PAGE_SHIFT |
71 | 1: | 73 | 1: |
72 | mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA | 74 | #ifdef CONFIG_SMP |
73 | mcr p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA | 75 | mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) |
76 | #else | ||
77 | mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA | ||
78 | #endif | ||
74 | add r0, r0, #PAGE_SZ | 79 | add r0, r0, #PAGE_SZ |
75 | cmp r0, r1 | 80 | cmp r0, r1 |
76 | blo 1b | 81 | blo 1b |
@@ -87,5 +92,5 @@ ENDPROC(v7wbi_flush_kern_tlb_range) | |||
87 | ENTRY(v7wbi_tlb_fns) | 92 | ENTRY(v7wbi_tlb_fns) |
88 | .long v7wbi_flush_user_tlb_range | 93 | .long v7wbi_flush_user_tlb_range |
89 | .long v7wbi_flush_kern_tlb_range | 94 | .long v7wbi_flush_kern_tlb_range |
90 | .long v6wbi_tlb_flags | 95 | .long v7wbi_tlb_flags |
91 | .size v7wbi_tlb_fns, . - v7wbi_tlb_fns | 96 | .size v7wbi_tlb_fns, . - v7wbi_tlb_fns |