aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Love <robert.w.love@intel.com>2013-03-25 14:00:27 -0400
committerRobert Love <robert.w.love@intel.com>2013-03-25 18:55:56 -0400
commitf9c4358edb285cead00a0d6cf0644c84ee773026 (patch)
tree7ba4fd79620f04e1af563d573b57b8b94a28cd43
parent01bdcb626f80838f55ec2af993daef50b9c59c49 (diff)
fcoe: Fix deadlock between create and destroy paths
We can deadlock (s_active and fcoe_config_mutex) if a port is being destroyed at the same time one is being created. [ 4200.503113] ====================================================== [ 4200.503114] [ INFO: possible circular locking dependency detected ] [ 4200.503116] 3.8.0-rc5+ #8 Not tainted [ 4200.503117] ------------------------------------------------------- [ 4200.503118] kworker/3:2/2492 is trying to acquire lock: [ 4200.503119] (s_active#292){++++.+}, at: [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70 [ 4200.503127] but task is already holding lock: [ 4200.503128] (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02f3338>] fcoe_destroy_work+0xe8/0x120 [fcoe] [ 4200.503133] which lock already depends on the new lock. [ 4200.503135] the existing dependency chain (in reverse order) is: [ 4200.503136] -> #1 (fcoe_config_mutex){+.+.+.}: [ 4200.503139] [<ffffffff810c7711>] lock_acquire+0xa1/0x140 [ 4200.503143] [<ffffffff816ca7be>] mutex_lock_nested+0x6e/0x360 [ 4200.503146] [<ffffffffa02f11bd>] fcoe_enable+0x1d/0xb0 [fcoe] [ 4200.503148] [<ffffffffa02f127d>] fcoe_ctlr_enabled+0x2d/0x50 [fcoe] [ 4200.503151] [<ffffffffa02ffbe8>] store_ctlr_enabled+0x38/0x90 [libfcoe] [ 4200.503154] [<ffffffff81424878>] dev_attr_store+0x18/0x30 [ 4200.503157] [<ffffffff8122b750>] sysfs_write_file+0xe0/0x150 [ 4200.503160] [<ffffffff811b334c>] vfs_write+0xac/0x180 [ 4200.503162] [<ffffffff811b3692>] sys_write+0x52/0xa0 [ 4200.503164] [<ffffffff816d7159>] system_call_fastpath+0x16/0x1b [ 4200.503167] -> #0 (s_active#292){++++.+}: [ 4200.503170] [<ffffffff810c680f>] __lock_acquire+0x135f/0x1c90 [ 4200.503172] [<ffffffff810c7711>] lock_acquire+0xa1/0x140 [ 4200.503174] [<ffffffff8122c626>] sysfs_deactivate+0x116/0x160 [ 4200.503176] [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70 [ 4200.503178] [<ffffffff8122b2eb>] sysfs_hash_and_remove+0x5b/0xb0 [ 4200.503180] [<ffffffff8122f3d1>] sysfs_remove_group+0x61/0x100 [ 4200.503183] [<ffffffff814251eb>] device_remove_groups+0x3b/0x60 [ 4200.503185] [<ffffffff81425534>] device_remove_attrs+0x44/0x80 [ 4200.503187] [<ffffffff81425e97>] device_del+0x127/0x1c0 [ 4200.503189] [<ffffffff81425f52>] device_unregister+0x22/0x60 [ 4200.503191] [<ffffffffa0300970>] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe] [ 4200.503194] [<ffffffffa02f1b5c>] fcoe_interface_cleanup+0x6c/0xa0 [fcoe] [ 4200.503196] [<ffffffffa02f3355>] fcoe_destroy_work+0x105/0x120 [fcoe] [ 4200.503198] [<ffffffff8107ee91>] process_one_work+0x1a1/0x580 [ 4200.503203] [<ffffffff81080c6e>] worker_thread+0x15e/0x440 [ 4200.503205] [<ffffffff8108715a>] kthread+0xea/0xf0 [ 4200.503207] [<ffffffff816d70ac>] ret_from_fork+0x7c/0xb0 [ 4200.503209] other info that might help us debug this: [ 4200.503211] Possible unsafe locking scenario: [ 4200.503212] CPU0 CPU1 [ 4200.503213] ---- ---- [ 4200.503214] lock(fcoe_config_mutex); [ 4200.503215] lock(s_active#292); [ 4200.503218] lock(fcoe_config_mutex); [ 4200.503219] lock(s_active#292); [ 4200.503221] *** DEADLOCK *** [ 4200.503223] 3 locks held by kworker/3:2/2492: [ 4200.503224] #0: (fcoe){.+.+.+}, at: [<ffffffff8107ee2b>] process_one_work+0x13b/0x580 [ 4200.503228] #1: ((&port->destroy_work)){+.+.+.}, at: [<ffffffff8107ee2b>] process_one_work+0x13b/0x580 [ 4200.503232] #2: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02f3338>] fcoe_destroy_work+0xe8/0x120 [fcoe] [ 4200.503236] stack backtrace: [ 4200.503238] Pid: 2492, comm: kworker/3:2 Not tainted 3.8.0-rc5+ #8 [ 4200.503240] Call Trace: [ 4200.503243] [<ffffffff816c2f09>] print_circular_bug+0x1fb/0x20c [ 4200.503246] [<ffffffff810c680f>] __lock_acquire+0x135f/0x1c90 [ 4200.503248] [<ffffffff810c463a>] ? debug_check_no_locks_freed+0x9a/0x180 [ 4200.503250] [<ffffffff810c7711>] lock_acquire+0xa1/0x140 [ 4200.503253] [<ffffffff8122d20b>] ? sysfs_addrm_finish+0x3b/0x70 [ 4200.503255] [<ffffffff8122c626>] sysfs_deactivate+0x116/0x160 [ 4200.503258] [<ffffffff8122d20b>] ? sysfs_addrm_finish+0x3b/0x70 [ 4200.503260] [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70 [ 4200.503262] [<ffffffff8122b2eb>] sysfs_hash_and_remove+0x5b/0xb0 [ 4200.503265] [<ffffffff8122f3d1>] sysfs_remove_group+0x61/0x100 [ 4200.503273] [<ffffffff814251eb>] device_remove_groups+0x3b/0x60 [ 4200.503275] [<ffffffff81425534>] device_remove_attrs+0x44/0x80 [ 4200.503277] [<ffffffff81425e97>] device_del+0x127/0x1c0 [ 4200.503279] [<ffffffff81425f52>] device_unregister+0x22/0x60 [ 4200.503282] [<ffffffffa0300970>] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe] [ 4200.503285] [<ffffffffa02f1b5c>] fcoe_interface_cleanup+0x6c/0xa0 [fcoe] [ 4200.503287] [<ffffffffa02f3355>] fcoe_destroy_work+0x105/0x120 [fcoe] [ 4200.503290] [<ffffffff8107ee91>] process_one_work+0x1a1/0x580 [ 4200.503292] [<ffffffff8107ee2b>] ? process_one_work+0x13b/0x580 [ 4200.503295] [<ffffffffa02f3250>] ? fcoe_if_destroy+0x230/0x230 [fcoe] [ 4200.503297] [<ffffffff81080c6e>] worker_thread+0x15e/0x440 [ 4200.503299] [<ffffffff81080b10>] ? busy_worker_rebind_fn+0x100/0x100 [ 4200.503301] [<ffffffff8108715a>] kthread+0xea/0xf0 [ 4200.503304] [<ffffffff81087070>] ? kthread_create_on_node+0x160/0x160 [ 4200.503306] [<ffffffff816d70ac>] ret_from_fork+0x7c/0xb0 [ 4200.503308] [<ffffffff81087070>] ? kthread_create_on_node+0x160/0x160 Signed-off-by: Robert Love <robert.w.love@intel.com> Tested-by: Jack Morgan <jack.morgan@intel.com>
-rw-r--r--drivers/scsi/fcoe/fcoe.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index b5d92fc93c70..9bfdc9a3f897 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -490,7 +490,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
490{ 490{
491 struct net_device *netdev = fcoe->netdev; 491 struct net_device *netdev = fcoe->netdev;
492 struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe); 492 struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe);
493 struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip);
494 493
495 rtnl_lock(); 494 rtnl_lock();
496 if (!fcoe->removed) 495 if (!fcoe->removed)
@@ -501,7 +500,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
501 /* tear-down the FCoE controller */ 500 /* tear-down the FCoE controller */
502 fcoe_ctlr_destroy(fip); 501 fcoe_ctlr_destroy(fip);
503 scsi_host_put(fip->lp->host); 502 scsi_host_put(fip->lp->host);
504 fcoe_ctlr_device_delete(ctlr_dev);
505 dev_put(netdev); 503 dev_put(netdev);
506 module_put(THIS_MODULE); 504 module_put(THIS_MODULE);
507} 505}
@@ -2194,6 +2192,8 @@ out_nodev:
2194 */ 2192 */
2195static void fcoe_destroy_work(struct work_struct *work) 2193static void fcoe_destroy_work(struct work_struct *work)
2196{ 2194{
2195 struct fcoe_ctlr_device *cdev;
2196 struct fcoe_ctlr *ctlr;
2197 struct fcoe_port *port; 2197 struct fcoe_port *port;
2198 struct fcoe_interface *fcoe; 2198 struct fcoe_interface *fcoe;
2199 struct Scsi_Host *shost; 2199 struct Scsi_Host *shost;
@@ -2224,10 +2224,15 @@ static void fcoe_destroy_work(struct work_struct *work)
2224 mutex_lock(&fcoe_config_mutex); 2224 mutex_lock(&fcoe_config_mutex);
2225 2225
2226 fcoe = port->priv; 2226 fcoe = port->priv;
2227 ctlr = fcoe_to_ctlr(fcoe);
2228 cdev = fcoe_ctlr_to_ctlr_dev(ctlr);
2229
2227 fcoe_if_destroy(port->lport); 2230 fcoe_if_destroy(port->lport);
2228 fcoe_interface_cleanup(fcoe); 2231 fcoe_interface_cleanup(fcoe);
2229 2232
2230 mutex_unlock(&fcoe_config_mutex); 2233 mutex_unlock(&fcoe_config_mutex);
2234
2235 fcoe_ctlr_device_delete(cdev);
2231} 2236}
2232 2237
2233/** 2238/**
@@ -2335,7 +2340,9 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode,
2335 rc = -EIO; 2340 rc = -EIO;
2336 rtnl_unlock(); 2341 rtnl_unlock();
2337 fcoe_interface_cleanup(fcoe); 2342 fcoe_interface_cleanup(fcoe);
2338 goto out_nortnl; 2343 mutex_unlock(&fcoe_config_mutex);
2344 fcoe_ctlr_device_delete(ctlr_dev);
2345 goto out;
2339 } 2346 }
2340 2347
2341 /* Make this the "master" N_Port */ 2348 /* Make this the "master" N_Port */
@@ -2375,8 +2382,8 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode,
2375 2382
2376out_nodev: 2383out_nodev:
2377 rtnl_unlock(); 2384 rtnl_unlock();
2378out_nortnl:
2379 mutex_unlock(&fcoe_config_mutex); 2385 mutex_unlock(&fcoe_config_mutex);
2386out:
2380 return rc; 2387 return rc;
2381} 2388}
2382 2389