aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Bandewar <maheshb@google.com>2018-09-24 17:40:11 -0400
committerDavid S. Miller <davem@davemloft.net>2018-09-26 23:22:19 -0400
commitd4859d749aa7090ffb743d15648adb962a1baeae (patch)
tree82b9964215a051d0fd95efacd5a792d9d2e661ba
parent6a9e461f6fe4434e6172304b69774daff9a3ac4c (diff)
bonding: avoid possible dead-lock
Syzkaller reported this on a slightly older kernel but it's still applicable to the current kernel - ====================================================== WARNING: possible circular locking dependency detected 4.18.0-next-20180823+ #46 Not tainted ------------------------------------------------------ syz-executor4/26841 is trying to acquire lock: 00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 but task is already holding lock: 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #0 ((wq_completion)bond_dev->name){+.+.}: lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Chain exists of: (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock((work_completion)(&(&nnw->work)->work)); lock(rtnl_mutex); lock((wq_completion)bond_dev->name); *** DEADLOCK *** 1 lock held by syz-executor4/26841: stack backtrace: CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 check_prev_add kernel/locking/lockdep.c:1862 [inline] check_prevs_add kernel/locking/lockdep.c:1975 [inline] validate_chain kernel/locking/lockdep.c:2416 [inline] __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457089 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 Signed-off-by: Mahesh Bandewar <maheshb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_main.c43
-rw-r--r--include/net/bonding.h7
2 files changed, 18 insertions, 32 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8c0a0908875d..c05c01a00755 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -210,6 +210,7 @@ static void bond_get_stats(struct net_device *bond_dev,
210static void bond_slave_arr_handler(struct work_struct *work); 210static void bond_slave_arr_handler(struct work_struct *work);
211static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, 211static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
212 int mod); 212 int mod);
213static void bond_netdev_notify_work(struct work_struct *work);
213 214
214/*---------------------------- General routines -----------------------------*/ 215/*---------------------------- General routines -----------------------------*/
215 216
@@ -1286,6 +1287,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
1286 return NULL; 1287 return NULL;
1287 } 1288 }
1288 } 1289 }
1290 INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
1291
1289 return slave; 1292 return slave;
1290} 1293}
1291 1294
@@ -1293,6 +1296,7 @@ static void bond_free_slave(struct slave *slave)
1293{ 1296{
1294 struct bonding *bond = bond_get_bond_by_slave(slave); 1297 struct bonding *bond = bond_get_bond_by_slave(slave);
1295 1298
1299 cancel_delayed_work_sync(&slave->notify_work);
1296 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1300 if (BOND_MODE(bond) == BOND_MODE_8023AD)
1297 kfree(SLAVE_AD_INFO(slave)); 1301 kfree(SLAVE_AD_INFO(slave));
1298 1302
@@ -1314,39 +1318,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
1314 info->link_failure_count = slave->link_failure_count; 1318 info->link_failure_count = slave->link_failure_count;
1315} 1319}
1316 1320
1317static void bond_netdev_notify(struct net_device *dev,
1318 struct netdev_bonding_info *info)
1319{
1320 rtnl_lock();
1321 netdev_bonding_info_change(dev, info);
1322 rtnl_unlock();
1323}
1324
1325static void bond_netdev_notify_work(struct work_struct *_work) 1321static void bond_netdev_notify_work(struct work_struct *_work)
1326{ 1322{
1327 struct netdev_notify_work *w = 1323 struct slave *slave = container_of(_work, struct slave,
1328 container_of(_work, struct netdev_notify_work, work.work); 1324 notify_work.work);
1325
1326 if (rtnl_trylock()) {
1327 struct netdev_bonding_info binfo;
1329 1328
1330 bond_netdev_notify(w->dev, &w->bonding_info); 1329 bond_fill_ifslave(slave, &binfo.slave);
1331 dev_put(w->dev); 1330 bond_fill_ifbond(slave->bond, &binfo.master);
1332 kfree(w); 1331 netdev_bonding_info_change(slave->dev, &binfo);
1332 rtnl_unlock();
1333 } else {
1334 queue_delayed_work(slave->bond->wq, &slave->notify_work, 1);
1335 }
1333} 1336}
1334 1337
1335void bond_queue_slave_event(struct slave *slave) 1338void bond_queue_slave_event(struct slave *slave)
1336{ 1339{
1337 struct bonding *bond = slave->bond; 1340 queue_delayed_work(slave->bond->wq, &slave->notify_work, 0);
1338 struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
1339
1340 if (!nnw)
1341 return;
1342
1343 dev_hold(slave->dev);
1344 nnw->dev = slave->dev;
1345 bond_fill_ifslave(slave, &nnw->bonding_info.slave);
1346 bond_fill_ifbond(bond, &nnw->bonding_info.master);
1347 INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
1348
1349 queue_delayed_work(slave->bond->wq, &nnw->work, 0);
1350} 1341}
1351 1342
1352void bond_lower_state_changed(struct slave *slave) 1343void bond_lower_state_changed(struct slave *slave)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index a2d058170ea3..b46d68acf701 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -139,12 +139,6 @@ struct bond_parm_tbl {
139 int mode; 139 int mode;
140}; 140};
141 141
142struct netdev_notify_work {
143 struct delayed_work work;
144 struct net_device *dev;
145 struct netdev_bonding_info bonding_info;
146};
147
148struct slave { 142struct slave {
149 struct net_device *dev; /* first - useful for panic debug */ 143 struct net_device *dev; /* first - useful for panic debug */
150 struct bonding *bond; /* our master */ 144 struct bonding *bond; /* our master */
@@ -172,6 +166,7 @@ struct slave {
172#ifdef CONFIG_NET_POLL_CONTROLLER 166#ifdef CONFIG_NET_POLL_CONTROLLER
173 struct netpoll *np; 167 struct netpoll *np;
174#endif 168#endif
169 struct delayed_work notify_work;
175 struct kobject kobj; 170 struct kobject kobj;
176 struct rtnl_link_stats64 slave_stats; 171 struct rtnl_link_stats64 slave_stats;
177}; 172};