aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2016-09-09 12:08:23 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-09-09 18:00:06 -0400
commit7d762e49c2117d3829eb3355f2617aea080ed3a7 (patch)
tree86945f194b7b59184bed3a384f42c12e79da00c3
parent79d102cbfd2e9d94257fcc7c82807ef1cdf80322 (diff)
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver introduced a regression due to the way the callbacks are invoked at initialization time. The old code called the prepare/starting/online function on each online cpu as a block. The new code registers the hotplug callbacks in the core for each state. The core invokes the callbacks at each registration on all online cpus. The code implicitely relied on the prepare/starting/online callbacks being called as combo on a particular cpu, which was not obvious and completely undocumented. The resulting subtle wreckage happens due to the way how the uncore code manages shared data structures for cpus which share an uncore resource in hardware. The sharing is determined in the cpu starting callback, but the prepare callback allocates per cpu data for the upcoming cpu because potential sharing is unknown at this point. If the starting callback finds a online cpu which shares the hardware resource it takes a refcount on the percpu data of that cpu and puts the own data structure into a 'free_at_online' pointer of that shared data structure. The online callback frees that. With the old model this worked because in a starting callback only one non unused structure (the one of the starting cpu) was available. The new code allocates the data structures for all cpus when the prepare callback is registered. Now the starting function iterates through all online cpus and looks for a data structure (skipping its own) which has a matching hardware id. The id member of the data structure is initialized to 0, but the hardware id can be 0 as well. The resulting wreckage is: CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts its own data structure into CPU1s data structure to be freed. CPU1 skips CPU0 because the data structure is its allegedly unsued own. It finds a matching id on CPU2, takes a refcount on CPU1 data and puts its own data structure into CPU2s data structure to be freed. .... Now the online callbacks are invoked. CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So far so good. CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which is still referenced by CPU0 ---> Booom So there are two issues to be solved here: 1) The id field must be initialized at allocation time to a value which cannot be a valid hardware id, i.e. -1 This prevents the above scenario, but now CPU1 and CPU2 both stick their own data structure into the free_at_online pointer of CPU0. So we leak CPU1s data structure. 2) Fix the memory leak described in #1 Instead of having a single pointer, use a hlist to enqueue the superflous data structures which are then freed by the first cpu invoking the online callback. Ideally we should know the sharing _before_ invoking the prepare callback, but that's way beyond the scope of this bug fix. [ tglx: Rewrote changelog ] Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine") Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Borislav Petkov <bp@suse.de> Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/events/amd/uncore.c22
1 files changed, 18 insertions, 4 deletions
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index e6131d4454e6..65577f081d07 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -29,6 +29,8 @@
29 29
30#define COUNTER_SHIFT 16 30#define COUNTER_SHIFT 16
31 31
32static HLIST_HEAD(uncore_unused_list);
33
32struct amd_uncore { 34struct amd_uncore {
33 int id; 35 int id;
34 int refcnt; 36 int refcnt;
@@ -39,7 +41,7 @@ struct amd_uncore {
39 cpumask_t *active_mask; 41 cpumask_t *active_mask;
40 struct pmu *pmu; 42 struct pmu *pmu;
41 struct perf_event *events[MAX_COUNTERS]; 43 struct perf_event *events[MAX_COUNTERS];
42 struct amd_uncore *free_when_cpu_online; 44 struct hlist_node node;
43}; 45};
44 46
45static struct amd_uncore * __percpu *amd_uncore_nb; 47static struct amd_uncore * __percpu *amd_uncore_nb;
@@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
306 uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; 308 uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
307 uncore_nb->active_mask = &amd_nb_active_mask; 309 uncore_nb->active_mask = &amd_nb_active_mask;
308 uncore_nb->pmu = &amd_nb_pmu; 310 uncore_nb->pmu = &amd_nb_pmu;
311 uncore_nb->id = -1;
309 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; 312 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
310 } 313 }
311 314
@@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
319 uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; 322 uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
320 uncore_l2->active_mask = &amd_l2_active_mask; 323 uncore_l2->active_mask = &amd_l2_active_mask;
321 uncore_l2->pmu = &amd_l2_pmu; 324 uncore_l2->pmu = &amd_l2_pmu;
325 uncore_l2->id = -1;
322 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; 326 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
323 } 327 }
324 328
@@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,
348 continue; 352 continue;
349 353
350 if (this->id == that->id) { 354 if (this->id == that->id) {
351 that->free_when_cpu_online = this; 355 hlist_add_head(&this->node, &uncore_unused_list);
352 this = that; 356 this = that;
353 break; 357 break;
354 } 358 }
@@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
388 return 0; 392 return 0;
389} 393}
390 394
395static void uncore_clean_online(void)
396{
397 struct amd_uncore *uncore;
398 struct hlist_node *n;
399
400 hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
401 hlist_del(&uncore->node);
402 kfree(uncore);
403 }
404}
405
391static void uncore_online(unsigned int cpu, 406static void uncore_online(unsigned int cpu,
392 struct amd_uncore * __percpu *uncores) 407 struct amd_uncore * __percpu *uncores)
393{ 408{
394 struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); 409 struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
395 410
396 kfree(uncore->free_when_cpu_online); 411 uncore_clean_online();
397 uncore->free_when_cpu_online = NULL;
398 412
399 if (cpu == uncore->cpu) 413 if (cpu == uncore->cpu)
400 cpumask_set_cpu(cpu, uncore->active_mask); 414 cpumask_set_cpu(cpu, uncore->active_mask);