aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@openvz.org>2011-05-19 19:45:49 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-20 07:41:09 -0400
commit9d0fa6c5f43f2d9c6966dcab7af96a717682fdec (patch)
treee3fdd94ba55748fbfc41ad5e38c32169638f2091
parenta39d1f3f67f6a3d72b24f0d8bf9a295a27ea448e (diff)
x86, x2apic: Minimize IPI register writes using cluster groups
In the case of x2apic cluster mode we can group IPI register writes based on the cluster group instead of individual per-cpu destination messages. This reduces the apic register writes and reduces the amount of IPI messages (in the best case we can reduce it by a factor of 16). With this change, the cost of flush_tlb_others(), with the flush tlb IPI being sent from a cpu in the socket-1 to all the logical cpus in socket-2 (on a Westmere-EX system that has 20 logical cpus in a socket) is 3x times better now (compared to the former 'send one-by-one' algorithm). Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: steiner@sgi.com Cc: yinghai@kernel.org Link: http://lkml.kernel.org/r/20110519234637.512271057@sbsiddha-MOBL3.sc.intel.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c58
1 files changed, 44 insertions, 14 deletions
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 4b2bb1381ffa..4dfe9363ff4e 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -5,6 +5,7 @@
5#include <linux/ctype.h> 5#include <linux/ctype.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/dmar.h> 7#include <linux/dmar.h>
8#include <linux/cpu.h>
8 9
9#include <asm/smp.h> 10#include <asm/smp.h>
10#include <asm/apic.h> 11#include <asm/apic.h>
@@ -12,6 +13,7 @@
12 13
13static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); 14static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); 15static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
16static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
15 17
16static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 18static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
17{ 19{
@@ -54,30 +56,52 @@ static inline u32 x2apic_cluster(int cpu)
54 return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; 56 return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
55} 57}
56 58
57/*
58 * for now, we send the IPI's one by one in the cpumask.
59 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
60 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
61 * writes.
62 */
63static void 59static void
64__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) 60__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
65{ 61{
66 unsigned long query_cpu; 62 struct cpumask *cpus_in_cluster_ptr;
67 unsigned long this_cpu; 63 struct cpumask *ipi_mask_ptr;
64 unsigned int cpu, this_cpu;
68 unsigned long flags; 65 unsigned long flags;
66 u32 dest;
69 67
70 x2apic_wrmsr_fence(); 68 x2apic_wrmsr_fence();
71 69
72 local_irq_save(flags); 70 local_irq_save(flags);
73 71
74 this_cpu = smp_processor_id(); 72 this_cpu = smp_processor_id();
75 for_each_cpu(query_cpu, mask) { 73
76 if (apic_dest == APIC_DEST_ALLBUT && query_cpu == this_cpu) 74 /*
75 * We are to modify mask, so we need an own copy
76 * and be sure it's manipulated with irq off.
77 */
78 ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
79 cpumask_copy(ipi_mask_ptr, mask);
80
81 /*
82 * The idea is to send one IPI per cluster.
83 */
84 for_each_cpu(cpu, ipi_mask_ptr) {
85 unsigned long i;
86
87 cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
88 dest = 0;
89
90 /* Collect cpus in cluster. */
91 for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
92 if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
93 dest |= per_cpu(x86_cpu_to_logical_apicid, i);
94 }
95
96 if (!dest)
77 continue; 97 continue;
78 __x2apic_send_IPI_dest( 98
79 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 99 __x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
80 vector, apic->dest_logical); 100 /*
101 * Cluster sibling cpus should be discared now so
102 * we would not send IPI them second time.
103 */
104 cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
81 } 105 }
82 106
83 local_irq_restore(flags); 107 local_irq_restore(flags);
@@ -198,6 +222,10 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
198 if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), 222 if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
199 GFP_KERNEL)) { 223 GFP_KERNEL)) {
200 err = -ENOMEM; 224 err = -ENOMEM;
225 } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
226 GFP_KERNEL)) {
227 free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
228 err = -ENOMEM;
201 } 229 }
202 break; 230 break;
203 case CPU_UP_CANCELED: 231 case CPU_UP_CANCELED:
@@ -210,6 +238,7 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
210 __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); 238 __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
211 } 239 }
212 free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); 240 free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
241 free_cpumask_var(per_cpu(ipi_mask, this_cpu));
213 break; 242 break;
214 } 243 }
215 244
@@ -225,8 +254,9 @@ static int x2apic_init_cpu_notifier(void)
225 int cpu = smp_processor_id(); 254 int cpu = smp_processor_id();
226 255
227 zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); 256 zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
257 zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
228 258
229 BUG_ON(!per_cpu(cpus_in_cluster, cpu)); 259 BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
230 260
231 __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); 261 __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
232 register_hotcpu_notifier(&x2apic_cpu_notifier); 262 register_hotcpu_notifier(&x2apic_cpu_notifier);