diff options
author | Gavin Guo <gavin.guo@canonical.com> | 2016-01-19 23:36:58 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-01-22 07:51:04 -0500 |
commit | 1dff76b92f69051e579bdc131e01500da9fa2a91 (patch) | |
tree | 213992e83611e2d424f180680f9ce93e39069380 /kernel | |
parent | 9c03ee147193645be4c186d3688232fa438c57c7 (diff) |
sched/numa: Fix use-after-free bug in the task_numa_compare
The following message can be observed on the Ubuntu v3.13.0-65 with KASan
backported:
==================================================================
BUG: KASan: use after free in task_numa_find_cpu+0x64c/0x890 at addr ffff880dd393ecd8
Read of size 8 by task qemu-system-x86/3998900
=============================================================================
BUG kmalloc-128 (Tainted: G B ): kasan: bad access detected
-----------------------------------------------------------------------------
INFO: Allocated in task_numa_fault+0xc1b/0xed0 age=41980 cpu=18 pid=3998890
__slab_alloc+0x4f8/0x560
__kmalloc+0x1eb/0x280
task_numa_fault+0xc1b/0xed0
do_numa_page+0x192/0x200
handle_mm_fault+0x808/0x1160
__do_page_fault+0x218/0x750
do_page_fault+0x1a/0x70
page_fault+0x28/0x30
SyS_poll+0x66/0x1a0
system_call_fastpath+0x1a/0x1f
INFO: Freed in task_numa_free+0x1d2/0x200 age=62 cpu=18 pid=0
__slab_free+0x2ab/0x3f0
kfree+0x161/0x170
task_numa_free+0x1d2/0x200
finish_task_switch+0x1d2/0x210
__schedule+0x5d4/0xc60
schedule_preempt_disabled+0x40/0xc0
cpu_startup_entry+0x2da/0x340
start_secondary+0x28f/0x360
Call Trace:
[<ffffffff81a6ce35>] dump_stack+0x45/0x56
[<ffffffff81244aed>] print_trailer+0xfd/0x170
[<ffffffff8124ac36>] object_err+0x36/0x40
[<ffffffff8124cbf9>] kasan_report_error+0x1e9/0x3a0
[<ffffffff8124d260>] kasan_report+0x40/0x50
[<ffffffff810dda7c>] ? task_numa_find_cpu+0x64c/0x890
[<ffffffff8124bee9>] __asan_load8+0x69/0xa0
[<ffffffff814f5c38>] ? find_next_bit+0xd8/0x120
[<ffffffff810dda7c>] task_numa_find_cpu+0x64c/0x890
[<ffffffff810de16c>] task_numa_migrate+0x4ac/0x7b0
[<ffffffff810de523>] numa_migrate_preferred+0xb3/0xc0
[<ffffffff810e0b88>] task_numa_fault+0xb88/0xed0
[<ffffffff8120ef02>] do_numa_page+0x192/0x200
[<ffffffff81211038>] handle_mm_fault+0x808/0x1160
[<ffffffff810d7dbd>] ? sched_clock_cpu+0x10d/0x160
[<ffffffff81068c52>] ? native_load_tls+0x82/0xa0
[<ffffffff81a7bd68>] __do_page_fault+0x218/0x750
[<ffffffff810c2186>] ? hrtimer_try_to_cancel+0x76/0x160
[<ffffffff81a6f5e7>] ? schedule_hrtimeout_range_clock.part.24+0xf7/0x1c0
[<ffffffff81a7c2ba>] do_page_fault+0x1a/0x70
[<ffffffff81a772e8>] page_fault+0x28/0x30
[<ffffffff8128cbd4>] ? do_sys_poll+0x1c4/0x6d0
[<ffffffff810e64f6>] ? enqueue_task_fair+0x4b6/0xaa0
[<ffffffff810233c9>] ? sched_clock+0x9/0x10
[<ffffffff810cf70a>] ? resched_task+0x7a/0xc0
[<ffffffff810d0663>] ? check_preempt_curr+0xb3/0x130
[<ffffffff8128b5c0>] ? poll_select_copy_remaining+0x170/0x170
[<ffffffff810d3bc0>] ? wake_up_state+0x10/0x20
[<ffffffff8112a28f>] ? drop_futex_key_refs.isra.14+0x1f/0x90
[<ffffffff8112d40e>] ? futex_requeue+0x3de/0xba0
[<ffffffff8112e49e>] ? do_futex+0xbe/0x8f0
[<ffffffff81022c89>] ? read_tsc+0x9/0x20
[<ffffffff8111bd9d>] ? ktime_get_ts+0x12d/0x170
[<ffffffff8108f699>] ? timespec_add_safe+0x59/0xe0
[<ffffffff8128d1f6>] SyS_poll+0x66/0x1a0
[<ffffffff81a830dd>] system_call_fastpath+0x1a/0x1f
As commit 1effd9f19324 ("sched/numa: Fix unsafe get_task_struct() in
task_numa_assign()") points out, the rcu_read_lock() cannot protect the
task_struct from being freed in the finish_task_switch(). And the bug
happens in the process of calculation of imp which requires the access of
p->numa_faults being freed in the following path:
do_exit()
current->flags |= PF_EXITING;
release_task()
~~delayed_put_task_struct()~~
schedule()
...
...
rq->curr = next;
context_switch()
finish_task_switch()
put_task_struct()
__put_task_struct()
task_numa_free()
The fix here to get_task_struct() early before end of dst_rq->lock to
protect the calculation process and also put_task_struct() in the
corresponding point if finally the dst_rq->curr somehow cannot be
assigned.
Additional credit to Liang Chen who helped fix the error logic and add the
put_task_struct() to the place it missed.
Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jay.vosburgh@canonical.com
Cc: liang.chen@canonical.com
Link: http://lkml.kernel.org/r/1453264618-17645-1-git-send-email-gavin.guo@canonical.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/fair.c | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1926606ece80..56b7d4b83947 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env, | |||
1220 | { | 1220 | { |
1221 | if (env->best_task) | 1221 | if (env->best_task) |
1222 | put_task_struct(env->best_task); | 1222 | put_task_struct(env->best_task); |
1223 | if (p) | ||
1224 | get_task_struct(p); | ||
1225 | 1223 | ||
1226 | env->best_task = p; | 1224 | env->best_task = p; |
1227 | env->best_imp = imp; | 1225 | env->best_imp = imp; |
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1289 | long imp = env->p->numa_group ? groupimp : taskimp; | 1287 | long imp = env->p->numa_group ? groupimp : taskimp; |
1290 | long moveimp = imp; | 1288 | long moveimp = imp; |
1291 | int dist = env->dist; | 1289 | int dist = env->dist; |
1290 | bool assigned = false; | ||
1292 | 1291 | ||
1293 | rcu_read_lock(); | 1292 | rcu_read_lock(); |
1294 | 1293 | ||
1295 | raw_spin_lock_irq(&dst_rq->lock); | 1294 | raw_spin_lock_irq(&dst_rq->lock); |
1296 | cur = dst_rq->curr; | 1295 | cur = dst_rq->curr; |
1297 | /* | 1296 | /* |
1298 | * No need to move the exiting task, and this ensures that ->curr | 1297 | * No need to move the exiting task or idle task. |
1299 | * wasn't reaped and thus get_task_struct() in task_numa_assign() | ||
1300 | * is safe under RCU read lock. | ||
1301 | * Note that rcu_read_lock() itself can't protect from the final | ||
1302 | * put_task_struct() after the last schedule(). | ||
1303 | */ | 1298 | */ |
1304 | if ((cur->flags & PF_EXITING) || is_idle_task(cur)) | 1299 | if ((cur->flags & PF_EXITING) || is_idle_task(cur)) |
1305 | cur = NULL; | 1300 | cur = NULL; |
1301 | else { | ||
1302 | /* | ||
1303 | * The task_struct must be protected here to protect the | ||
1304 | * p->numa_faults access in the task_weight since the | ||
1305 | * numa_faults could already be freed in the following path: | ||
1306 | * finish_task_switch() | ||
1307 | * --> put_task_struct() | ||
1308 | * --> __put_task_struct() | ||
1309 | * --> task_numa_free() | ||
1310 | */ | ||
1311 | get_task_struct(cur); | ||
1312 | } | ||
1313 | |||
1306 | raw_spin_unlock_irq(&dst_rq->lock); | 1314 | raw_spin_unlock_irq(&dst_rq->lock); |
1307 | 1315 | ||
1308 | /* | 1316 | /* |
@@ -1386,6 +1394,7 @@ balance: | |||
1386 | */ | 1394 | */ |
1387 | if (!load_too_imbalanced(src_load, dst_load, env)) { | 1395 | if (!load_too_imbalanced(src_load, dst_load, env)) { |
1388 | imp = moveimp - 1; | 1396 | imp = moveimp - 1; |
1397 | put_task_struct(cur); | ||
1389 | cur = NULL; | 1398 | cur = NULL; |
1390 | goto assign; | 1399 | goto assign; |
1391 | } | 1400 | } |
@@ -1411,9 +1420,16 @@ balance: | |||
1411 | env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu); | 1420 | env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu); |
1412 | 1421 | ||
1413 | assign: | 1422 | assign: |
1423 | assigned = true; | ||
1414 | task_numa_assign(env, cur, imp); | 1424 | task_numa_assign(env, cur, imp); |
1415 | unlock: | 1425 | unlock: |
1416 | rcu_read_unlock(); | 1426 | rcu_read_unlock(); |
1427 | /* | ||
1428 | * The dst_rq->curr isn't assigned. The protection for task_struct is | ||
1429 | * finished. | ||
1430 | */ | ||
1431 | if (cur && !assigned) | ||
1432 | put_task_struct(cur); | ||
1417 | } | 1433 | } |
1418 | 1434 | ||
1419 | static void task_numa_find_cpu(struct task_numa_env *env, | 1435 | static void task_numa_find_cpu(struct task_numa_env *env, |