diff options
author | Yonghong Song <yhs@fb.com> | 2018-04-10 12:37:32 -0400 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-04-10 19:01:40 -0400 |
commit | 3a38bb98d9abdc3856f26b5ed4332803065cd7cf (patch) | |
tree | 6c9ba982332e1efc49e9820824ca8946d3cf5c22 | |
parent | 0abf854d7cbbb405e39e0f93d5c1da98dca24bc0 (diff) |
bpf/tracing: fix a deadlock in perf_event_detach_bpf_prog
syzbot reported a possible deadlock in perf_event_detach_bpf_prog.
The error details:
======================================================
WARNING: possible circular locking dependency detected
4.16.0-rc7+ #3 Not tainted
------------------------------------------------------
syz-executor7/24531 is trying to acquire lock:
(bpf_event_mutex){+.+.}, at: [<000000008a849b07>] perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854
but task is already holding lock:
(&mm->mmap_sem){++++}, at: [<0000000038768f87>] vm_mmap_pgoff+0x198/0x280 mm/util.c:353
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&mm->mmap_sem){++++}:
__might_fault+0x13a/0x1d0 mm/memory.c:4571
_copy_to_user+0x2c/0xc0 lib/usercopy.c:25
copy_to_user include/linux/uaccess.h:155 [inline]
bpf_prog_array_copy_info+0xf2/0x1c0 kernel/bpf/core.c:1694
perf_event_query_prog_array+0x1c7/0x2c0 kernel/trace/bpf_trace.c:891
_perf_ioctl kernel/events/core.c:4750 [inline]
perf_ioctl+0x3e1/0x1480 kernel/events/core.c:4770
vfs_ioctl fs/ioctl.c:46 [inline]
do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
SYSC_ioctl fs/ioctl.c:701 [inline]
SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
entry_SYSCALL_64_after_hwframe+0x42/0xb7
-> #0 (bpf_event_mutex){+.+.}:
lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920
__mutex_lock_common kernel/locking/mutex.c:756 [inline]
__mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893
mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854
perf_event_free_bpf_prog kernel/events/core.c:8147 [inline]
_free_event+0xbdb/0x10f0 kernel/events/core.c:4116
put_event+0x24/0x30 kernel/events/core.c:4204
perf_mmap_close+0x60d/0x1010 kernel/events/core.c:5172
remove_vma+0xb4/0x1b0 mm/mmap.c:172
remove_vma_list mm/mmap.c:2490 [inline]
do_munmap+0x82a/0xdf0 mm/mmap.c:2731
mmap_region+0x59e/0x15a0 mm/mmap.c:1646
do_mmap+0x6c0/0xe00 mm/mmap.c:1483
do_mmap_pgoff include/linux/mm.h:2223 [inline]
vm_mmap_pgoff+0x1de/0x280 mm/util.c:355
SYSC_mmap_pgoff mm/mmap.c:1533 [inline]
SyS_mmap_pgoff+0x462/0x5f0 mm/mmap.c:1491
SYSC_mmap arch/x86/kernel/sys_x86_64.c:100 [inline]
SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:91
do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
entry_SYSCALL_64_after_hwframe+0x42/0xb7
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&mm->mmap_sem);
lock(bpf_event_mutex);
lock(&mm->mmap_sem);
lock(bpf_event_mutex);
*** DEADLOCK ***
======================================================
The bug is introduced by Commit f371b304f12e ("bpf/tracing: allow
user space to query prog array on the same tp") where copy_to_user,
which requires mm->mmap_sem, is called inside bpf_event_mutex lock.
At the same time, during perf_event file descriptor close,
mm->mmap_sem is held first and then subsequent
perf_event_detach_bpf_prog needs bpf_event_mutex lock.
Such a senario caused a deadlock.
As suggested by Daniel, moving copy_to_user out of the
bpf_event_mutex lock should fix the problem.
Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp")
Reported-by: syzbot+dc5ca0e4c9bfafaf2bae@syzkaller.appspotmail.com
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | include/linux/bpf.h | 4 | ||||
-rw-r--r-- | kernel/bpf/core.c | 45 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 25 |
3 files changed, 52 insertions, 22 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 95a7abd0ee92..486e65e3db26 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -339,8 +339,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, | |||
339 | void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, | 339 | void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, |
340 | struct bpf_prog *old_prog); | 340 | struct bpf_prog *old_prog); |
341 | int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, | 341 | int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, |
342 | __u32 __user *prog_ids, u32 request_cnt, | 342 | u32 *prog_ids, u32 request_cnt, |
343 | __u32 __user *prog_cnt); | 343 | u32 *prog_cnt); |
344 | int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, | 344 | int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, |
345 | struct bpf_prog *exclude_prog, | 345 | struct bpf_prog *exclude_prog, |
346 | struct bpf_prog *include_prog, | 346 | struct bpf_prog *include_prog, |
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d315b393abdd..ba03ec39efb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) | |||
1572 | return cnt; | 1572 | return cnt; |
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | static bool bpf_prog_array_copy_core(struct bpf_prog **prog, | ||
1576 | u32 *prog_ids, | ||
1577 | u32 request_cnt) | ||
1578 | { | ||
1579 | int i = 0; | ||
1580 | |||
1581 | for (; *prog; prog++) { | ||
1582 | if (*prog == &dummy_bpf_prog.prog) | ||
1583 | continue; | ||
1584 | prog_ids[i] = (*prog)->aux->id; | ||
1585 | if (++i == request_cnt) { | ||
1586 | prog++; | ||
1587 | break; | ||
1588 | } | ||
1589 | } | ||
1590 | |||
1591 | return !!(*prog); | ||
1592 | } | ||
1593 | |||
1575 | int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, | 1594 | int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, |
1576 | __u32 __user *prog_ids, u32 cnt) | 1595 | __u32 __user *prog_ids, u32 cnt) |
1577 | { | 1596 | { |
1578 | struct bpf_prog **prog; | 1597 | struct bpf_prog **prog; |
1579 | unsigned long err = 0; | 1598 | unsigned long err = 0; |
1580 | u32 i = 0, *ids; | ||
1581 | bool nospc; | 1599 | bool nospc; |
1600 | u32 *ids; | ||
1582 | 1601 | ||
1583 | /* users of this function are doing: | 1602 | /* users of this function are doing: |
1584 | * cnt = bpf_prog_array_length(); | 1603 | * cnt = bpf_prog_array_length(); |
@@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, | |||
1595 | return -ENOMEM; | 1614 | return -ENOMEM; |
1596 | rcu_read_lock(); | 1615 | rcu_read_lock(); |
1597 | prog = rcu_dereference(progs)->progs; | 1616 | prog = rcu_dereference(progs)->progs; |
1598 | for (; *prog; prog++) { | 1617 | nospc = bpf_prog_array_copy_core(prog, ids, cnt); |
1599 | if (*prog == &dummy_bpf_prog.prog) | ||
1600 | continue; | ||
1601 | ids[i] = (*prog)->aux->id; | ||
1602 | if (++i == cnt) { | ||
1603 | prog++; | ||
1604 | break; | ||
1605 | } | ||
1606 | } | ||
1607 | nospc = !!(*prog); | ||
1608 | rcu_read_unlock(); | 1618 | rcu_read_unlock(); |
1609 | err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); | 1619 | err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); |
1610 | kfree(ids); | 1620 | kfree(ids); |
@@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, | |||
1683 | } | 1693 | } |
1684 | 1694 | ||
1685 | int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, | 1695 | int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, |
1686 | __u32 __user *prog_ids, u32 request_cnt, | 1696 | u32 *prog_ids, u32 request_cnt, |
1687 | __u32 __user *prog_cnt) | 1697 | u32 *prog_cnt) |
1688 | { | 1698 | { |
1699 | struct bpf_prog **prog; | ||
1689 | u32 cnt = 0; | 1700 | u32 cnt = 0; |
1690 | 1701 | ||
1691 | if (array) | 1702 | if (array) |
1692 | cnt = bpf_prog_array_length(array); | 1703 | cnt = bpf_prog_array_length(array); |
1693 | 1704 | ||
1694 | if (copy_to_user(prog_cnt, &cnt, sizeof(cnt))) | 1705 | *prog_cnt = cnt; |
1695 | return -EFAULT; | ||
1696 | 1706 | ||
1697 | /* return early if user requested only program count or nothing to copy */ | 1707 | /* return early if user requested only program count or nothing to copy */ |
1698 | if (!request_cnt || !cnt) | 1708 | if (!request_cnt || !cnt) |
1699 | return 0; | 1709 | return 0; |
1700 | 1710 | ||
1701 | return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt); | 1711 | /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ |
1712 | prog = rcu_dereference_check(array, 1)->progs; | ||
1713 | return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC | ||
1714 | : 0; | ||
1702 | } | 1715 | } |
1703 | 1716 | ||
1704 | static void bpf_prog_free_deferred(struct work_struct *work) | 1717 | static void bpf_prog_free_deferred(struct work_struct *work) |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d88e96d4e12c..56ba0f2a01db 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) | |||
977 | { | 977 | { |
978 | struct perf_event_query_bpf __user *uquery = info; | 978 | struct perf_event_query_bpf __user *uquery = info; |
979 | struct perf_event_query_bpf query = {}; | 979 | struct perf_event_query_bpf query = {}; |
980 | u32 *ids, prog_cnt, ids_len; | ||
980 | int ret; | 981 | int ret; |
981 | 982 | ||
982 | if (!capable(CAP_SYS_ADMIN)) | 983 | if (!capable(CAP_SYS_ADMIN)) |
@@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) | |||
985 | return -EINVAL; | 986 | return -EINVAL; |
986 | if (copy_from_user(&query, uquery, sizeof(query))) | 987 | if (copy_from_user(&query, uquery, sizeof(query))) |
987 | return -EFAULT; | 988 | return -EFAULT; |
988 | if (query.ids_len > BPF_TRACE_MAX_PROGS) | 989 | |
990 | ids_len = query.ids_len; | ||
991 | if (ids_len > BPF_TRACE_MAX_PROGS) | ||
989 | return -E2BIG; | 992 | return -E2BIG; |
993 | ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); | ||
994 | if (!ids) | ||
995 | return -ENOMEM; | ||
996 | /* | ||
997 | * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which | ||
998 | * is required when user only wants to check for uquery->prog_cnt. | ||
999 | * There is no need to check for it since the case is handled | ||
1000 | * gracefully in bpf_prog_array_copy_info. | ||
1001 | */ | ||
990 | 1002 | ||
991 | mutex_lock(&bpf_event_mutex); | 1003 | mutex_lock(&bpf_event_mutex); |
992 | ret = bpf_prog_array_copy_info(event->tp_event->prog_array, | 1004 | ret = bpf_prog_array_copy_info(event->tp_event->prog_array, |
993 | uquery->ids, | 1005 | ids, |
994 | query.ids_len, | 1006 | ids_len, |
995 | &uquery->prog_cnt); | 1007 | &prog_cnt); |
996 | mutex_unlock(&bpf_event_mutex); | 1008 | mutex_unlock(&bpf_event_mutex); |
997 | 1009 | ||
1010 | if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || | ||
1011 | copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) | ||
1012 | ret = -EFAULT; | ||
1013 | |||
1014 | kfree(ids); | ||
998 | return ret; | 1015 | return ret; |
999 | } | 1016 | } |
1000 | 1017 | ||