diff options
author | Ashok Raj <ashok.raj@intel.com> | 2005-06-25 17:55:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-25 19:24:31 -0400 |
commit | 884d9e40b4089014f40c49e86ac6505842db2b53 (patch) | |
tree | 8ff4f3225b46f84a5973074d9c4792b9e744c8e1 /arch/x86_64 | |
parent | cb0cd8d49a9b81aff7a02e2ed826b5cfdfe9a172 (diff) |
[PATCH] x86_64: Dont use broadcast shortcut to make it cpu hotplug safe.
Broadcast IPI's provide un-expected behaviour for cpu hotplug. CPU's in
offline state also end up receiving the IPI. Once the cpus become online they
receive these stale IPI's which are bad and introduce unexpected behaviour.
This is easily avoided by not sending a broadcast and addressing just the
CPU's in online map. Doing prelim cycle counts it appears there is no big
overhead and numbers seem around 0x3000-0x3900 on an average on x86 and x86_64
systems with CPUS running 3G, both for broadcast and mask version of the
API's.
The shortcuts are useful only for flat mode (where the perf shows no
degradation), and in cluster mode, its unicast anyway. Its simpler to just
not use broadcast anymore.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/genapic_flat.c | 42 | ||||
-rw-r--r-- | arch/x86_64/kernel/smp.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 19 |
3 files changed, 54 insertions, 17 deletions
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index b4cbbad04226..00f3fa6df714 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * Hacked for x86-64 by James Cleverdon from i386 architecture code by | 7 | * Hacked for x86-64 by James Cleverdon from i386 architecture code by |
8 | * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and | 8 | * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and |
9 | * James Cleverdon. | 9 | * James Cleverdon. |
10 | * Ashok Raj <ashok.raj@intel.com> | ||
11 | * Removed IPI broadcast shortcut to support CPU hotplug | ||
10 | */ | 12 | */ |
11 | #include <linux/config.h> | 13 | #include <linux/config.h> |
12 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
@@ -45,22 +47,6 @@ static void flat_init_apic_ldr(void) | |||
45 | apic_write_around(APIC_LDR, val); | 47 | apic_write_around(APIC_LDR, val); |
46 | } | 48 | } |
47 | 49 | ||
48 | static void flat_send_IPI_allbutself(int vector) | ||
49 | { | ||
50 | /* | ||
51 | * if there are no other CPUs in the system then | ||
52 | * we get an APIC send error if we try to broadcast. | ||
53 | * thus we have to avoid sending IPIs in this case. | ||
54 | */ | ||
55 | if (num_online_cpus() > 1) | ||
56 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); | ||
57 | } | ||
58 | |||
59 | static void flat_send_IPI_all(int vector) | ||
60 | { | ||
61 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | ||
62 | } | ||
63 | |||
64 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | 50 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) |
65 | { | 51 | { |
66 | unsigned long mask = cpus_addr(cpumask)[0]; | 52 | unsigned long mask = cpus_addr(cpumask)[0]; |
@@ -93,6 +79,30 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | |||
93 | local_irq_restore(flags); | 79 | local_irq_restore(flags); |
94 | } | 80 | } |
95 | 81 | ||
82 | static void flat_send_IPI_allbutself(int vector) | ||
83 | { | ||
84 | cpumask_t mask; | ||
85 | /* | ||
86 | * if there are no other CPUs in the system then | ||
87 | * we get an APIC send error if we try to broadcast. | ||
88 | * thus we have to avoid sending IPIs in this case. | ||
89 | */ | ||
90 | int this_cpu = get_cpu(); | ||
91 | |||
92 | mask = cpu_online_map; | ||
93 | cpu_clear(this_cpu, mask); | ||
94 | |||
95 | if (cpus_weight(mask) >= 1) | ||
96 | flat_send_IPI_mask(mask, vector); | ||
97 | |||
98 | put_cpu(); | ||
99 | } | ||
100 | |||
101 | static void flat_send_IPI_all(int vector) | ||
102 | { | ||
103 | flat_send_IPI_mask(cpu_online_map, vector); | ||
104 | } | ||
105 | |||
96 | static int flat_apic_id_registered(void) | 106 | static int flat_apic_id_registered(void) |
97 | { | 107 | { |
98 | return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); | 108 | return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 1e379ed17b1d..ccae392886af 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -283,6 +283,16 @@ struct call_data_struct { | |||
283 | 283 | ||
284 | static struct call_data_struct * call_data; | 284 | static struct call_data_struct * call_data; |
285 | 285 | ||
286 | void lock_ipi_call_lock(void) | ||
287 | { | ||
288 | spin_lock_irq(&call_lock); | ||
289 | } | ||
290 | |||
291 | void unlock_ipi_call_lock(void) | ||
292 | { | ||
293 | spin_unlock_irq(&call_lock); | ||
294 | } | ||
295 | |||
286 | /* | 296 | /* |
287 | * this function sends a 'generic call function' IPI to all other CPUs | 297 | * this function sends a 'generic call function' IPI to all other CPUs |
288 | * in the system. | 298 | * in the system. |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 571a55462fa0..b969ee128728 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -508,9 +508,22 @@ void __cpuinit start_secondary(void) | |||
508 | set_cpu_sibling_map(smp_processor_id()); | 508 | set_cpu_sibling_map(smp_processor_id()); |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * We need to hold call_lock, so there is no inconsistency | ||
512 | * between the time smp_call_function() determines number of | ||
513 | * IPI receipients, and the time when the determination is made | ||
514 | * for which cpus receive the IPI in genapic_flat.c. Holding this | ||
515 | * lock helps us to not include this cpu in a currently in progress | ||
516 | * smp_call_function(). | ||
517 | */ | ||
518 | lock_ipi_call_lock(); | ||
519 | |||
520 | /* | ||
511 | * Allow the master to continue. | 521 | * Allow the master to continue. |
512 | */ | 522 | */ |
513 | cpu_set(smp_processor_id(), cpu_online_map); | 523 | cpu_set(smp_processor_id(), cpu_online_map); |
524 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | ||
525 | unlock_ipi_call_lock(); | ||
526 | |||
514 | mb(); | 527 | mb(); |
515 | 528 | ||
516 | /* Wait for TSC sync to not schedule things before. | 529 | /* Wait for TSC sync to not schedule things before. |
@@ -1038,6 +1051,7 @@ void __init smp_prepare_boot_cpu(void) | |||
1038 | cpu_set(me, cpu_callout_map); | 1051 | cpu_set(me, cpu_callout_map); |
1039 | cpu_set(0, cpu_sibling_map[0]); | 1052 | cpu_set(0, cpu_sibling_map[0]); |
1040 | cpu_set(0, cpu_core_map[0]); | 1053 | cpu_set(0, cpu_core_map[0]); |
1054 | per_cpu(cpu_state, me) = CPU_ONLINE; | ||
1041 | } | 1055 | } |
1042 | 1056 | ||
1043 | /* | 1057 | /* |
@@ -1066,6 +1080,7 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1066 | return -ENOSYS; | 1080 | return -ENOSYS; |
1067 | } | 1081 | } |
1068 | 1082 | ||
1083 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | ||
1069 | /* Boot it! */ | 1084 | /* Boot it! */ |
1070 | err = do_boot_cpu(cpu, apicid); | 1085 | err = do_boot_cpu(cpu, apicid); |
1071 | if (err < 0) { | 1086 | if (err < 0) { |
@@ -1170,8 +1185,10 @@ void __cpu_die(unsigned int cpu) | |||
1170 | 1185 | ||
1171 | for (i = 0; i < 10; i++) { | 1186 | for (i = 0; i < 10; i++) { |
1172 | /* They ack this in play_dead by setting CPU_DEAD */ | 1187 | /* They ack this in play_dead by setting CPU_DEAD */ |
1173 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) | 1188 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1189 | printk ("CPU %d is now offline\n", cpu); | ||
1174 | return; | 1190 | return; |
1191 | } | ||
1175 | current->state = TASK_UNINTERRUPTIBLE; | 1192 | current->state = TASK_UNINTERRUPTIBLE; |
1176 | schedule_timeout(HZ/10); | 1193 | schedule_timeout(HZ/10); |
1177 | } | 1194 | } |