diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2014-04-03 07:55:01 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2014-04-03 08:31:00 -0400 |
commit | 1b948d6caec4f28e3524244ca0f77c6ae8ddceef (patch) | |
tree | bc7e1d5800f10c39979d3f47872ba7047568f8a4 /arch/s390/kernel/smp.c | |
parent | 02a8f3abb708919149cb657a5202f4603f0c38e2 (diff) |
s390/mm,tlb: optimize TLB flushing for zEC12
The zEC12 machines introduced the local-clearing control for the IDTE
and IPTE instruction. If the control is set only the TLB of the local
CPU is cleared of entries, either all entries of a single address space
for IDTE, or the entry for a single page-table entry for IPTE.
Without the local-clearing control the TLB flush is broadcasted to all
CPUs in the configuration, which is expensive.
The reset of the bit mask of the CPUs that need flushing after a
non-local IDTE is tricky. As TLB entries for an address space remain
in the TLB even if the address space is detached a new bit field is
required to keep track of attached CPUs vs. CPUs in the need of a
flush. After a non-local flush with IDTE the bit-field of attached CPUs
is copied to the bit-field of CPUs in need of a flush. The ordering
of operations on cpu_attach_mask, attach_count and mm_cpumask(mm) is
such that an underindication in mm_cpumask(mm) is prevented but an
overindication in mm_cpumask(mm) is possible.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/kernel/smp.c')
-rw-r--r-- | arch/s390/kernel/smp.c | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 366d14460c2b..42a501d13a3b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -236,6 +236,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) | |||
236 | { | 236 | { |
237 | struct _lowcore *lc = pcpu->lowcore; | 237 | struct _lowcore *lc = pcpu->lowcore; |
238 | 238 | ||
239 | if (MACHINE_HAS_TLB_LC) | ||
240 | cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); | ||
241 | cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); | ||
239 | atomic_inc(&init_mm.context.attach_count); | 242 | atomic_inc(&init_mm.context.attach_count); |
240 | lc->cpu_nr = cpu; | 243 | lc->cpu_nr = cpu; |
241 | lc->percpu_offset = __per_cpu_offset[cpu]; | 244 | lc->percpu_offset = __per_cpu_offset[cpu]; |
@@ -760,6 +763,9 @@ void __cpu_die(unsigned int cpu) | |||
760 | cpu_relax(); | 763 | cpu_relax(); |
761 | pcpu_free_lowcore(pcpu); | 764 | pcpu_free_lowcore(pcpu); |
762 | atomic_dec(&init_mm.context.attach_count); | 765 | atomic_dec(&init_mm.context.attach_count); |
766 | cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); | ||
767 | if (MACHINE_HAS_TLB_LC) | ||
768 | cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); | ||
763 | } | 769 | } |
764 | 770 | ||
765 | void __noreturn cpu_die(void) | 771 | void __noreturn cpu_die(void) |