diff options
author | Mahesh Bandewar <maheshb@google.com> | 2018-09-24 17:40:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-09-26 23:22:19 -0400 |
commit | d4859d749aa7090ffb743d15648adb962a1baeae (patch) | |
tree | 82b9964215a051d0fd95efacd5a792d9d2e661ba | |
parent | 6a9e461f6fe4434e6172304b69774daff9a3ac4c (diff) |
bonding: avoid possible dead-lock
Syzkaller reported this on a slightly older kernel but it's still
applicable to the current kernel -
======================================================
WARNING: possible circular locking dependency detected
4.18.0-next-20180823+ #46 Not tainted
------------------------------------------------------
syz-executor4/26841 is trying to acquire lock:
00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652
but task is already holding lock:
00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (rtnl_mutex){+.+.}:
__mutex_lock_common kernel/locking/mutex.c:925 [inline]
__mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073
mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088
rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77
bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline]
bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320
process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153
worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
kthread+0x35a/0x420 kernel/kthread.c:246
ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415
-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}:
process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129
worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
kthread+0x35a/0x420 kernel/kthread.c:246
ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415
-> #0 ((wq_completion)bond_dev->name){+.+.}:
lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
__alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
register_netdevice+0x337/0x1100 net/core/dev.c:8410
bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg+0xd5/0x120 net/socket.c:632
___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
__sys_sendmsg+0x11d/0x290 net/socket.c:2153
__do_sys_sendmsg net/socket.c:2162 [inline]
__se_sys_sendmsg net/socket.c:2160 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Chain exists of:
(wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(rtnl_mutex);
lock((work_completion)(&(&nnw->work)->work));
lock(rtnl_mutex);
lock((wq_completion)bond_dev->name);
*** DEADLOCK ***
1 lock held by syz-executor4/26841:
stack backtrace:
CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222
check_prev_add kernel/locking/lockdep.c:1862 [inline]
check_prevs_add kernel/locking/lockdep.c:1975 [inline]
validate_chain kernel/locking/lockdep.c:2416 [inline]
__lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412
lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
__alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
register_netdevice+0x337/0x1100 net/core/dev.c:8410
bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg+0xd5/0x120 net/socket.c:632
___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
__sys_sendmsg+0x11d/0x290 net/socket.c:2153
__do_sys_sendmsg net/socket.c:2162 [inline]
__se_sys_sendmsg net/socket.c:2160 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457089
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089
RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003
RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/bonding/bond_main.c | 43 | ||||
-rw-r--r-- | include/net/bonding.h | 7 |
2 files changed, 18 insertions, 32 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8c0a0908875d..c05c01a00755 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c | |||
@@ -210,6 +210,7 @@ static void bond_get_stats(struct net_device *bond_dev, | |||
210 | static void bond_slave_arr_handler(struct work_struct *work); | 210 | static void bond_slave_arr_handler(struct work_struct *work); |
211 | static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, | 211 | static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, |
212 | int mod); | 212 | int mod); |
213 | static void bond_netdev_notify_work(struct work_struct *work); | ||
213 | 214 | ||
214 | /*---------------------------- General routines -----------------------------*/ | 215 | /*---------------------------- General routines -----------------------------*/ |
215 | 216 | ||
@@ -1286,6 +1287,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond) | |||
1286 | return NULL; | 1287 | return NULL; |
1287 | } | 1288 | } |
1288 | } | 1289 | } |
1290 | INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); | ||
1291 | |||
1289 | return slave; | 1292 | return slave; |
1290 | } | 1293 | } |
1291 | 1294 | ||
@@ -1293,6 +1296,7 @@ static void bond_free_slave(struct slave *slave) | |||
1293 | { | 1296 | { |
1294 | struct bonding *bond = bond_get_bond_by_slave(slave); | 1297 | struct bonding *bond = bond_get_bond_by_slave(slave); |
1295 | 1298 | ||
1299 | cancel_delayed_work_sync(&slave->notify_work); | ||
1296 | if (BOND_MODE(bond) == BOND_MODE_8023AD) | 1300 | if (BOND_MODE(bond) == BOND_MODE_8023AD) |
1297 | kfree(SLAVE_AD_INFO(slave)); | 1301 | kfree(SLAVE_AD_INFO(slave)); |
1298 | 1302 | ||
@@ -1314,39 +1318,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) | |||
1314 | info->link_failure_count = slave->link_failure_count; | 1318 | info->link_failure_count = slave->link_failure_count; |
1315 | } | 1319 | } |
1316 | 1320 | ||
1317 | static void bond_netdev_notify(struct net_device *dev, | ||
1318 | struct netdev_bonding_info *info) | ||
1319 | { | ||
1320 | rtnl_lock(); | ||
1321 | netdev_bonding_info_change(dev, info); | ||
1322 | rtnl_unlock(); | ||
1323 | } | ||
1324 | |||
1325 | static void bond_netdev_notify_work(struct work_struct *_work) | 1321 | static void bond_netdev_notify_work(struct work_struct *_work) |
1326 | { | 1322 | { |
1327 | struct netdev_notify_work *w = | 1323 | struct slave *slave = container_of(_work, struct slave, |
1328 | container_of(_work, struct netdev_notify_work, work.work); | 1324 | notify_work.work); |
1325 | |||
1326 | if (rtnl_trylock()) { | ||
1327 | struct netdev_bonding_info binfo; | ||
1329 | 1328 | ||
1330 | bond_netdev_notify(w->dev, &w->bonding_info); | 1329 | bond_fill_ifslave(slave, &binfo.slave); |
1331 | dev_put(w->dev); | 1330 | bond_fill_ifbond(slave->bond, &binfo.master); |
1332 | kfree(w); | 1331 | netdev_bonding_info_change(slave->dev, &binfo); |
1332 | rtnl_unlock(); | ||
1333 | } else { | ||
1334 | queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); | ||
1335 | } | ||
1333 | } | 1336 | } |
1334 | 1337 | ||
1335 | void bond_queue_slave_event(struct slave *slave) | 1338 | void bond_queue_slave_event(struct slave *slave) |
1336 | { | 1339 | { |
1337 | struct bonding *bond = slave->bond; | 1340 | queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); |
1338 | struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); | ||
1339 | |||
1340 | if (!nnw) | ||
1341 | return; | ||
1342 | |||
1343 | dev_hold(slave->dev); | ||
1344 | nnw->dev = slave->dev; | ||
1345 | bond_fill_ifslave(slave, &nnw->bonding_info.slave); | ||
1346 | bond_fill_ifbond(bond, &nnw->bonding_info.master); | ||
1347 | INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); | ||
1348 | |||
1349 | queue_delayed_work(slave->bond->wq, &nnw->work, 0); | ||
1350 | } | 1341 | } |
1351 | 1342 | ||
1352 | void bond_lower_state_changed(struct slave *slave) | 1343 | void bond_lower_state_changed(struct slave *slave) |
diff --git a/include/net/bonding.h b/include/net/bonding.h index a2d058170ea3..b46d68acf701 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h | |||
@@ -139,12 +139,6 @@ struct bond_parm_tbl { | |||
139 | int mode; | 139 | int mode; |
140 | }; | 140 | }; |
141 | 141 | ||
142 | struct netdev_notify_work { | ||
143 | struct delayed_work work; | ||
144 | struct net_device *dev; | ||
145 | struct netdev_bonding_info bonding_info; | ||
146 | }; | ||
147 | |||
148 | struct slave { | 142 | struct slave { |
149 | struct net_device *dev; /* first - useful for panic debug */ | 143 | struct net_device *dev; /* first - useful for panic debug */ |
150 | struct bonding *bond; /* our master */ | 144 | struct bonding *bond; /* our master */ |
@@ -172,6 +166,7 @@ struct slave { | |||
172 | #ifdef CONFIG_NET_POLL_CONTROLLER | 166 | #ifdef CONFIG_NET_POLL_CONTROLLER |
173 | struct netpoll *np; | 167 | struct netpoll *np; |
174 | #endif | 168 | #endif |
169 | struct delayed_work notify_work; | ||
175 | struct kobject kobj; | 170 | struct kobject kobj; |
176 | struct rtnl_link_stats64 slave_stats; | 171 | struct rtnl_link_stats64 slave_stats; |
177 | }; | 172 | }; |