diff options
author | Andi Kleen <ak@suse.de> | 2008-01-30 07:33:17 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:33:17 -0500 |
commit | ca74a6f84e68b44867022f4a4f3ec17c087c864e (patch) | |
tree | a5e84b251b1574b09288fb2636b4e4ea088ae70e /arch/x86/kernel/alternative.c | |
parent | 751752789162fde69474edfa15935d0a77c0bc17 (diff) |
x86: optimize lock prefix switching to run less frequently
On VMs implemented using JITs that cache translated code changing the lock
prefixes is a quite costly operation that forces the JIT to throw away and
retranslate a lot of code.
Previously a SMP kernel would rewrite the locks once for each CPU which
is quite unnecessary. This patch changes the code to never switch at boot in
the normal case (SMP kernel booting with >1 CPU) or only once for SMP kernel
on UP.
This makes a significant difference in boot up performance on AMD SimNow!
Also I expect it to be a little faster on native systems too because a smp
switch does a lot of text_poke()s which each synchronize the pipeline.
v1->v2: Rename max_cpus
v1->v2: Fix off by one in UP check (Thomas Gleixner)
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/alternative.c')
-rw-r--r-- | arch/x86/kernel/alternative.c | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cdc43242da92..318a4f9b7ece 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -273,6 +273,7 @@ struct smp_alt_module { | |||
273 | }; | 273 | }; |
274 | static LIST_HEAD(smp_alt_modules); | 274 | static LIST_HEAD(smp_alt_modules); |
275 | static DEFINE_SPINLOCK(smp_alt); | 275 | static DEFINE_SPINLOCK(smp_alt); |
276 | static int smp_mode = 1; /* protected by smp_alt */ | ||
276 | 277 | ||
277 | void alternatives_smp_module_add(struct module *mod, char *name, | 278 | void alternatives_smp_module_add(struct module *mod, char *name, |
278 | void *locks, void *locks_end, | 279 | void *locks, void *locks_end, |
@@ -354,7 +355,14 @@ void alternatives_smp_switch(int smp) | |||
354 | BUG_ON(!smp && (num_online_cpus() > 1)); | 355 | BUG_ON(!smp && (num_online_cpus() > 1)); |
355 | 356 | ||
356 | spin_lock_irqsave(&smp_alt, flags); | 357 | spin_lock_irqsave(&smp_alt, flags); |
357 | if (smp) { | 358 | |
359 | /* | ||
360 | * Avoid unnecessary switches because it forces JIT based VMs to | ||
361 | * throw away all cached translations, which can be quite costly. | ||
362 | */ | ||
363 | if (smp == smp_mode) { | ||
364 | /* nothing */ | ||
365 | } else if (smp) { | ||
358 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); | 366 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); |
359 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 367 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
360 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 368 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
@@ -369,6 +377,7 @@ void alternatives_smp_switch(int smp) | |||
369 | alternatives_smp_unlock(mod->locks, mod->locks_end, | 377 | alternatives_smp_unlock(mod->locks, mod->locks_end, |
370 | mod->text, mod->text_end); | 378 | mod->text, mod->text_end); |
371 | } | 379 | } |
380 | smp_mode = smp; | ||
372 | spin_unlock_irqrestore(&smp_alt, flags); | 381 | spin_unlock_irqrestore(&smp_alt, flags); |
373 | } | 382 | } |
374 | 383 | ||
@@ -441,7 +450,10 @@ void __init alternative_instructions(void) | |||
441 | alternatives_smp_module_add(NULL, "core kernel", | 450 | alternatives_smp_module_add(NULL, "core kernel", |
442 | __smp_locks, __smp_locks_end, | 451 | __smp_locks, __smp_locks_end, |
443 | _text, _etext); | 452 | _text, _etext); |
444 | alternatives_smp_switch(0); | 453 | |
454 | /* Only switch to UP mode if we don't immediately boot others */ | ||
455 | if (num_possible_cpus() == 1 || setup_max_cpus <= 1) | ||
456 | alternatives_smp_switch(0); | ||
445 | } | 457 | } |
446 | #endif | 458 | #endif |
447 | apply_paravirt(__parainstructions, __parainstructions_end); | 459 | apply_paravirt(__parainstructions, __parainstructions_end); |