aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@redhat.com>2016-08-15 06:17:00 -0400
committerIngo Molnar <mingo@kernel.org>2016-08-18 04:14:48 -0400
commit7b0501b1e7cddd32b265178e32d332bdfbb532d4 (patch)
treea681a67686b331ce2f6b5e77ab645144a78b95d0
parent88b2f634028f1f38dcc3d412e10ff1f224976daa (diff)
x86/smp: Fix __max_logical_packages value setup
Frank reported kernel panic when he disabled several cores in BIOS via following option: Core Disable Bitmap(Hex) [0] with number 0xFFE, which leaves 16 CPUs in system (out of 48). The kernel panic below goes along with following messages: smpboot: Max logical packages: 2^M smpboot: APIC(0) Converting physical 0 to logical package 0^M smpboot: APIC(20) Converting physical 1 to logical package 1^M smpboot: APIC(40) Package 2 exceeds logical package map^M smpboot: CPU 8 APICId 40 disabled^M smpboot: APIC(60) Package 3 exceeds logical package map^M smpboot: CPU 12 APICId 60 disabled^M ... general protection fault: 0000 [#1] SMP^M Modules linked in:^M CPU: 15 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc5+ #1^M Hardware name: SGI UV300/UV300, BIOS SGI UV 300 series BIOS 05/25/2016^M task: ffff8801673e0000 ti: ffff8801673ac000 task.ti: ffff8801673ac000^M RIP: 0010:[<ffffffff81014d54>] [<ffffffff81014d54>] uncore_change_context+0xd4/0x180^M ... [<ffffffff810158ac>] uncore_event_init_cpu+0x6c/0x70^M [<ffffffff81d8c91c>] intel_uncore_init+0x1c2/0x2dd^M [<ffffffff81d8c75a>] ? uncore_cpu_setup+0x17/0x17^M [<ffffffff81002190>] do_one_initcall+0x50/0x190^M [<ffffffff810ab193>] ? parse_args+0x293/0x480^M [<ffffffff81d87365>] kernel_init_freeable+0x1a5/0x249^M [<ffffffff81d86a35>] ? set_debug_rodata+0x12/0x12^M [<ffffffff816dc19e>] kernel_init+0xe/0x110^M [<ffffffff816e93bf>] ret_from_fork+0x1f/0x40^M [<ffffffff816dc190>] ? rest_init+0x80/0x80^M The reason for the panic is wrong value of __max_logical_packages, which lets logical_package_map uninitialized and the uncore code relying on this map being properly initialized (maybe we should add some safety checks there as well). The __max_logical_packages is computed as: DIV_ROUND_UP(total_cpus, ncpus); - ncpus being number of cores With above BIOS setup we get total_cpus == 16 which set __max_logical_packages to 2 (ncpus is 12). Once topology_update_package_map processes CPU with logical pkg over 2 we display above messages and fail to initialize the physical_to_logical_pkg map, which makes the uncore code crash. The fix is to remove logical_package_map bitmap completely and keep and update the logical_packages number instead. After we enumerate all the present CPUs, we check if the enumerated logical packages count is within its computed maximum from BIOS data. If it's not the case, we set this maximum to the new enumerated value and freeze any new addition of logical packages. The freeze is because lot of init code like uncore/rapl/cqm depends on having maximum logical package value set to allocate their data, so we can't change it later on. Prarit Bhargava tested the patch and confirms that it solves the problem: From dmidecode: Core Count: 24 Core Enabled: 24 Thread Count: 48 Orig kernel boot log: [ 0.464981] smpboot: Max logical packages: 19 [ 0.469861] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.477261] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.484760] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.492258] smpboot: APIC(c0) Converting physical 3 to logical package 3 1. nr_cpus=8, should stop enumerating in package 0: [ 0.533664] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.539596] smpboot: Max logical packages: 19 2. max_cpus=8, should still enumerate all packages: [ 0.526494] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.532428] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.538456] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.544486] smpboot: APIC(c0) Converting physical 3 to logical package 3 [ 0.550524] smpboot: Max logical packages: 19 3. nr_cpus=49 ( 2 socket + 1 core on 3rd socket), should stop enumerating in package 2: [ 0.521378] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.527314] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.533345] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.539368] smpboot: Max logical packages: 19 4. maxcpus=49, should still enumerate all packages: [ 0.525591] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.531525] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.537547] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.543579] smpboot: APIC(c0) Converting physical 3 to logical package 3 [ 0.549624] smpboot: Max logical packages: 19 5. kdump (nr_cpus=1) works as well. Reported-by: Frank Ramsay <framsay@redhat.com> Tested-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Reviewed-by: Prarit Bhargava <prarit@redhat.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20160815101700.GA30090@krava Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/smpboot.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2a6e84a30a54..4296beb8fdd3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
100/* Logical package management. We might want to allocate that dynamically */ 100/* Logical package management. We might want to allocate that dynamically */
101static int *physical_to_logical_pkg __read_mostly; 101static int *physical_to_logical_pkg __read_mostly;
102static unsigned long *physical_package_map __read_mostly;; 102static unsigned long *physical_package_map __read_mostly;;
103static unsigned long *logical_package_map __read_mostly;
104static unsigned int max_physical_pkg_id __read_mostly; 103static unsigned int max_physical_pkg_id __read_mostly;
105unsigned int __max_logical_packages __read_mostly; 104unsigned int __max_logical_packages __read_mostly;
106EXPORT_SYMBOL(__max_logical_packages); 105EXPORT_SYMBOL(__max_logical_packages);
106static unsigned int logical_packages __read_mostly;
107static bool logical_packages_frozen __read_mostly;
107 108
108/* Maximum number of SMT threads on any online core */ 109/* Maximum number of SMT threads on any online core */
109int __max_smt_threads __read_mostly; 110int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
277 if (test_and_set_bit(pkg, physical_package_map)) 278 if (test_and_set_bit(pkg, physical_package_map))
278 goto found; 279 goto found;
279 280
280 new = find_first_zero_bit(logical_package_map, __max_logical_packages); 281 if (logical_packages_frozen) {
281 if (new >= __max_logical_packages) {
282 physical_to_logical_pkg[pkg] = -1; 282 physical_to_logical_pkg[pkg] = -1;
283 pr_warn("APIC(%x) Package %u exceeds logical package map\n", 283 pr_warn("APIC(%x) Package %u exceeds logical package max\n",
284 apicid, pkg); 284 apicid, pkg);
285 return -ENOSPC; 285 return -ENOSPC;
286 } 286 }
287 set_bit(new, logical_package_map); 287
288 new = logical_packages++;
288 pr_info("APIC(%x) Converting physical %u to logical package %u\n", 289 pr_info("APIC(%x) Converting physical %u to logical package %u\n",
289 apicid, pkg, new); 290 apicid, pkg, new);
290 physical_to_logical_pkg[pkg] = new; 291 physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
341 } 342 }
342 343
343 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); 344 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
345 logical_packages = 0;
344 346
345 /* 347 /*
346 * Possibly larger than what we need as the number of apic ids per 348 * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
352 memset(physical_to_logical_pkg, 0xff, size); 354 memset(physical_to_logical_pkg, 0xff, size);
353 size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); 355 size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
354 physical_package_map = kzalloc(size, GFP_KERNEL); 356 physical_package_map = kzalloc(size, GFP_KERNEL);
355 size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
356 logical_package_map = kzalloc(size, GFP_KERNEL);
357
358 pr_info("Max logical packages: %u\n", __max_logical_packages);
359 357
360 for_each_present_cpu(cpu) { 358 for_each_present_cpu(cpu) {
361 unsigned int apicid = apic->cpu_present_to_apicid(cpu); 359 unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
369 set_cpu_possible(cpu, false); 367 set_cpu_possible(cpu, false);
370 set_cpu_present(cpu, false); 368 set_cpu_present(cpu, false);
371 } 369 }
370
371 if (logical_packages > __max_logical_packages) {
372 pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
373 logical_packages, __max_logical_packages);
374 logical_packages_frozen = true;
375 __max_logical_packages = logical_packages;
376 }
377
378 pr_info("Max logical packages: %u\n", __max_logical_packages);
372} 379}
373 380
374void __init smp_store_boot_cpu_info(void) 381void __init smp_store_boot_cpu_info(void)