diff options
author | Cyrill Gorcunov <gorcunov@openvz.org> | 2011-05-19 19:45:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-20 07:41:09 -0400 |
commit | 9d0fa6c5f43f2d9c6966dcab7af96a717682fdec (patch) | |
tree | e3fdd94ba55748fbfc41ad5e38c32169638f2091 | |
parent | a39d1f3f67f6a3d72b24f0d8bf9a295a27ea448e (diff) |
x86, x2apic: Minimize IPI register writes using cluster groups
In the case of x2apic cluster mode we can group IPI register
writes based on the cluster group instead of individual per-cpu
destination messages.
This reduces the apic register writes and reduces the amount of
IPI messages (in the best case we can reduce it by a factor of
16).
With this change, the cost of flush_tlb_others(), with the flush
tlb IPI being sent from a cpu in the socket-1 to all the logical
cpus in socket-2 (on a Westmere-EX system that has 20 logical
cpus in a socket) is 3x times better now (compared to the former
'send one-by-one' algorithm).
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: steiner@sgi.com
Cc: yinghai@kernel.org
Link: http://lkml.kernel.org/r/20110519234637.512271057@sbsiddha-MOBL3.sc.intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 58 |
1 files changed, 44 insertions, 14 deletions
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4b2bb1381ffa..4dfe9363ff4e 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/dmar.h> | 7 | #include <linux/dmar.h> |
8 | #include <linux/cpu.h> | ||
8 | 9 | ||
9 | #include <asm/smp.h> | 10 | #include <asm/smp.h> |
10 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
@@ -12,6 +13,7 @@ | |||
12 | 13 | ||
13 | static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); | 14 | static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); |
14 | static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); | 15 | static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); |
16 | static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); | ||
15 | 17 | ||
16 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 18 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
17 | { | 19 | { |
@@ -54,30 +56,52 @@ static inline u32 x2apic_cluster(int cpu) | |||
54 | return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; | 56 | return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; |
55 | } | 57 | } |
56 | 58 | ||
57 | /* | ||
58 | * for now, we send the IPI's one by one in the cpumask. | ||
59 | * TBD: Based on the cpu mask, we can send the IPI's to the cluster group | ||
60 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register | ||
61 | * writes. | ||
62 | */ | ||
63 | static void | 59 | static void |
64 | __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) | 60 | __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) |
65 | { | 61 | { |
66 | unsigned long query_cpu; | 62 | struct cpumask *cpus_in_cluster_ptr; |
67 | unsigned long this_cpu; | 63 | struct cpumask *ipi_mask_ptr; |
64 | unsigned int cpu, this_cpu; | ||
68 | unsigned long flags; | 65 | unsigned long flags; |
66 | u32 dest; | ||
69 | 67 | ||
70 | x2apic_wrmsr_fence(); | 68 | x2apic_wrmsr_fence(); |
71 | 69 | ||
72 | local_irq_save(flags); | 70 | local_irq_save(flags); |
73 | 71 | ||
74 | this_cpu = smp_processor_id(); | 72 | this_cpu = smp_processor_id(); |
75 | for_each_cpu(query_cpu, mask) { | 73 | |
76 | if (apic_dest == APIC_DEST_ALLBUT && query_cpu == this_cpu) | 74 | /* |
75 | * We are to modify mask, so we need an own copy | ||
76 | * and be sure it's manipulated with irq off. | ||
77 | */ | ||
78 | ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); | ||
79 | cpumask_copy(ipi_mask_ptr, mask); | ||
80 | |||
81 | /* | ||
82 | * The idea is to send one IPI per cluster. | ||
83 | */ | ||
84 | for_each_cpu(cpu, ipi_mask_ptr) { | ||
85 | unsigned long i; | ||
86 | |||
87 | cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); | ||
88 | dest = 0; | ||
89 | |||
90 | /* Collect cpus in cluster. */ | ||
91 | for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { | ||
92 | if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) | ||
93 | dest |= per_cpu(x86_cpu_to_logical_apicid, i); | ||
94 | } | ||
95 | |||
96 | if (!dest) | ||
77 | continue; | 97 | continue; |
78 | __x2apic_send_IPI_dest( | 98 | |
79 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 99 | __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); |
80 | vector, apic->dest_logical); | 100 | /* |
101 | * Cluster sibling cpus should be discared now so | ||
102 | * we would not send IPI them second time. | ||
103 | */ | ||
104 | cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); | ||
81 | } | 105 | } |
82 | 106 | ||
83 | local_irq_restore(flags); | 107 | local_irq_restore(flags); |
@@ -198,6 +222,10 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
198 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), | 222 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), |
199 | GFP_KERNEL)) { | 223 | GFP_KERNEL)) { |
200 | err = -ENOMEM; | 224 | err = -ENOMEM; |
225 | } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), | ||
226 | GFP_KERNEL)) { | ||
227 | free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); | ||
228 | err = -ENOMEM; | ||
201 | } | 229 | } |
202 | break; | 230 | break; |
203 | case CPU_UP_CANCELED: | 231 | case CPU_UP_CANCELED: |
@@ -210,6 +238,7 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
210 | __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); | 238 | __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); |
211 | } | 239 | } |
212 | free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); | 240 | free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); |
241 | free_cpumask_var(per_cpu(ipi_mask, this_cpu)); | ||
213 | break; | 242 | break; |
214 | } | 243 | } |
215 | 244 | ||
@@ -225,8 +254,9 @@ static int x2apic_init_cpu_notifier(void) | |||
225 | int cpu = smp_processor_id(); | 254 | int cpu = smp_processor_id(); |
226 | 255 | ||
227 | zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); | 256 | zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); |
257 | zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); | ||
228 | 258 | ||
229 | BUG_ON(!per_cpu(cpus_in_cluster, cpu)); | 259 | BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); |
230 | 260 | ||
231 | __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); | 261 | __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); |
232 | register_hotcpu_notifier(&x2apic_cpu_notifier); | 262 | register_hotcpu_notifier(&x2apic_cpu_notifier); |