diff options
| author | Robert Love <robert.w.love@intel.com> | 2013-03-25 14:00:27 -0400 |
|---|---|---|
| committer | Robert Love <robert.w.love@intel.com> | 2013-03-25 18:55:56 -0400 |
| commit | f9c4358edb285cead00a0d6cf0644c84ee773026 (patch) | |
| tree | 7ba4fd79620f04e1af563d573b57b8b94a28cd43 | |
| parent | 01bdcb626f80838f55ec2af993daef50b9c59c49 (diff) | |
fcoe: Fix deadlock between create and destroy paths
We can deadlock (s_active and fcoe_config_mutex) if a
port is being destroyed at the same time one is being created.
[ 4200.503113] ======================================================
[ 4200.503114] [ INFO: possible circular locking dependency detected ]
[ 4200.503116] 3.8.0-rc5+ #8 Not tainted
[ 4200.503117] -------------------------------------------------------
[ 4200.503118] kworker/3:2/2492 is trying to acquire lock:
[ 4200.503119] (s_active#292){++++.+}, at: [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70
[ 4200.503127]
but task is already holding lock:
[ 4200.503128] (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02f3338>] fcoe_destroy_work+0xe8/0x120 [fcoe]
[ 4200.503133]
which lock already depends on the new lock.
[ 4200.503135]
the existing dependency chain (in reverse order) is:
[ 4200.503136]
-> #1 (fcoe_config_mutex){+.+.+.}:
[ 4200.503139] [<ffffffff810c7711>] lock_acquire+0xa1/0x140
[ 4200.503143] [<ffffffff816ca7be>] mutex_lock_nested+0x6e/0x360
[ 4200.503146] [<ffffffffa02f11bd>] fcoe_enable+0x1d/0xb0 [fcoe]
[ 4200.503148] [<ffffffffa02f127d>] fcoe_ctlr_enabled+0x2d/0x50 [fcoe]
[ 4200.503151] [<ffffffffa02ffbe8>] store_ctlr_enabled+0x38/0x90 [libfcoe]
[ 4200.503154] [<ffffffff81424878>] dev_attr_store+0x18/0x30
[ 4200.503157] [<ffffffff8122b750>] sysfs_write_file+0xe0/0x150
[ 4200.503160] [<ffffffff811b334c>] vfs_write+0xac/0x180
[ 4200.503162] [<ffffffff811b3692>] sys_write+0x52/0xa0
[ 4200.503164] [<ffffffff816d7159>] system_call_fastpath+0x16/0x1b
[ 4200.503167]
-> #0 (s_active#292){++++.+}:
[ 4200.503170] [<ffffffff810c680f>] __lock_acquire+0x135f/0x1c90
[ 4200.503172] [<ffffffff810c7711>] lock_acquire+0xa1/0x140
[ 4200.503174] [<ffffffff8122c626>] sysfs_deactivate+0x116/0x160
[ 4200.503176] [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70
[ 4200.503178] [<ffffffff8122b2eb>] sysfs_hash_and_remove+0x5b/0xb0
[ 4200.503180] [<ffffffff8122f3d1>] sysfs_remove_group+0x61/0x100
[ 4200.503183] [<ffffffff814251eb>] device_remove_groups+0x3b/0x60
[ 4200.503185] [<ffffffff81425534>] device_remove_attrs+0x44/0x80
[ 4200.503187] [<ffffffff81425e97>] device_del+0x127/0x1c0
[ 4200.503189] [<ffffffff81425f52>] device_unregister+0x22/0x60
[ 4200.503191] [<ffffffffa0300970>] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe]
[ 4200.503194] [<ffffffffa02f1b5c>] fcoe_interface_cleanup+0x6c/0xa0 [fcoe]
[ 4200.503196] [<ffffffffa02f3355>] fcoe_destroy_work+0x105/0x120 [fcoe]
[ 4200.503198] [<ffffffff8107ee91>] process_one_work+0x1a1/0x580
[ 4200.503203] [<ffffffff81080c6e>] worker_thread+0x15e/0x440
[ 4200.503205] [<ffffffff8108715a>] kthread+0xea/0xf0
[ 4200.503207] [<ffffffff816d70ac>] ret_from_fork+0x7c/0xb0
[ 4200.503209]
other info that might help us debug this:
[ 4200.503211] Possible unsafe locking scenario:
[ 4200.503212] CPU0 CPU1
[ 4200.503213] ---- ----
[ 4200.503214] lock(fcoe_config_mutex);
[ 4200.503215] lock(s_active#292);
[ 4200.503218] lock(fcoe_config_mutex);
[ 4200.503219] lock(s_active#292);
[ 4200.503221]
*** DEADLOCK ***
[ 4200.503223] 3 locks held by kworker/3:2/2492:
[ 4200.503224] #0: (fcoe){.+.+.+}, at: [<ffffffff8107ee2b>] process_one_work+0x13b/0x580
[ 4200.503228] #1: ((&port->destroy_work)){+.+.+.}, at: [<ffffffff8107ee2b>] process_one_work+0x13b/0x580
[ 4200.503232] #2: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02f3338>] fcoe_destroy_work+0xe8/0x120 [fcoe]
[ 4200.503236]
stack backtrace:
[ 4200.503238] Pid: 2492, comm: kworker/3:2 Not tainted 3.8.0-rc5+ #8
[ 4200.503240] Call Trace:
[ 4200.503243] [<ffffffff816c2f09>] print_circular_bug+0x1fb/0x20c
[ 4200.503246] [<ffffffff810c680f>] __lock_acquire+0x135f/0x1c90
[ 4200.503248] [<ffffffff810c463a>] ? debug_check_no_locks_freed+0x9a/0x180
[ 4200.503250] [<ffffffff810c7711>] lock_acquire+0xa1/0x140
[ 4200.503253] [<ffffffff8122d20b>] ? sysfs_addrm_finish+0x3b/0x70
[ 4200.503255] [<ffffffff8122c626>] sysfs_deactivate+0x116/0x160
[ 4200.503258] [<ffffffff8122d20b>] ? sysfs_addrm_finish+0x3b/0x70
[ 4200.503260] [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70
[ 4200.503262] [<ffffffff8122b2eb>] sysfs_hash_and_remove+0x5b/0xb0
[ 4200.503265] [<ffffffff8122f3d1>] sysfs_remove_group+0x61/0x100
[ 4200.503273] [<ffffffff814251eb>] device_remove_groups+0x3b/0x60
[ 4200.503275] [<ffffffff81425534>] device_remove_attrs+0x44/0x80
[ 4200.503277] [<ffffffff81425e97>] device_del+0x127/0x1c0
[ 4200.503279] [<ffffffff81425f52>] device_unregister+0x22/0x60
[ 4200.503282] [<ffffffffa0300970>] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe]
[ 4200.503285] [<ffffffffa02f1b5c>] fcoe_interface_cleanup+0x6c/0xa0 [fcoe]
[ 4200.503287] [<ffffffffa02f3355>] fcoe_destroy_work+0x105/0x120 [fcoe]
[ 4200.503290] [<ffffffff8107ee91>] process_one_work+0x1a1/0x580
[ 4200.503292] [<ffffffff8107ee2b>] ? process_one_work+0x13b/0x580
[ 4200.503295] [<ffffffffa02f3250>] ? fcoe_if_destroy+0x230/0x230 [fcoe]
[ 4200.503297] [<ffffffff81080c6e>] worker_thread+0x15e/0x440
[ 4200.503299] [<ffffffff81080b10>] ? busy_worker_rebind_fn+0x100/0x100
[ 4200.503301] [<ffffffff8108715a>] kthread+0xea/0xf0
[ 4200.503304] [<ffffffff81087070>] ? kthread_create_on_node+0x160/0x160
[ 4200.503306] [<ffffffff816d70ac>] ret_from_fork+0x7c/0xb0
[ 4200.503308] [<ffffffff81087070>] ? kthread_create_on_node+0x160/0x160
Signed-off-by: Robert Love <robert.w.love@intel.com>
Tested-by: Jack Morgan <jack.morgan@intel.com>
| -rw-r--r-- | drivers/scsi/fcoe/fcoe.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index b5d92fc93c70..9bfdc9a3f897 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c | |||
| @@ -490,7 +490,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) | |||
| 490 | { | 490 | { |
| 491 | struct net_device *netdev = fcoe->netdev; | 491 | struct net_device *netdev = fcoe->netdev; |
| 492 | struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe); | 492 | struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe); |
| 493 | struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); | ||
| 494 | 493 | ||
| 495 | rtnl_lock(); | 494 | rtnl_lock(); |
| 496 | if (!fcoe->removed) | 495 | if (!fcoe->removed) |
| @@ -501,7 +500,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) | |||
| 501 | /* tear-down the FCoE controller */ | 500 | /* tear-down the FCoE controller */ |
| 502 | fcoe_ctlr_destroy(fip); | 501 | fcoe_ctlr_destroy(fip); |
| 503 | scsi_host_put(fip->lp->host); | 502 | scsi_host_put(fip->lp->host); |
| 504 | fcoe_ctlr_device_delete(ctlr_dev); | ||
| 505 | dev_put(netdev); | 503 | dev_put(netdev); |
| 506 | module_put(THIS_MODULE); | 504 | module_put(THIS_MODULE); |
| 507 | } | 505 | } |
| @@ -2194,6 +2192,8 @@ out_nodev: | |||
| 2194 | */ | 2192 | */ |
| 2195 | static void fcoe_destroy_work(struct work_struct *work) | 2193 | static void fcoe_destroy_work(struct work_struct *work) |
| 2196 | { | 2194 | { |
| 2195 | struct fcoe_ctlr_device *cdev; | ||
| 2196 | struct fcoe_ctlr *ctlr; | ||
| 2197 | struct fcoe_port *port; | 2197 | struct fcoe_port *port; |
| 2198 | struct fcoe_interface *fcoe; | 2198 | struct fcoe_interface *fcoe; |
| 2199 | struct Scsi_Host *shost; | 2199 | struct Scsi_Host *shost; |
| @@ -2224,10 +2224,15 @@ static void fcoe_destroy_work(struct work_struct *work) | |||
| 2224 | mutex_lock(&fcoe_config_mutex); | 2224 | mutex_lock(&fcoe_config_mutex); |
| 2225 | 2225 | ||
| 2226 | fcoe = port->priv; | 2226 | fcoe = port->priv; |
| 2227 | ctlr = fcoe_to_ctlr(fcoe); | ||
| 2228 | cdev = fcoe_ctlr_to_ctlr_dev(ctlr); | ||
| 2229 | |||
| 2227 | fcoe_if_destroy(port->lport); | 2230 | fcoe_if_destroy(port->lport); |
| 2228 | fcoe_interface_cleanup(fcoe); | 2231 | fcoe_interface_cleanup(fcoe); |
| 2229 | 2232 | ||
| 2230 | mutex_unlock(&fcoe_config_mutex); | 2233 | mutex_unlock(&fcoe_config_mutex); |
| 2234 | |||
| 2235 | fcoe_ctlr_device_delete(cdev); | ||
| 2231 | } | 2236 | } |
| 2232 | 2237 | ||
| 2233 | /** | 2238 | /** |
| @@ -2335,7 +2340,9 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, | |||
| 2335 | rc = -EIO; | 2340 | rc = -EIO; |
| 2336 | rtnl_unlock(); | 2341 | rtnl_unlock(); |
| 2337 | fcoe_interface_cleanup(fcoe); | 2342 | fcoe_interface_cleanup(fcoe); |
| 2338 | goto out_nortnl; | 2343 | mutex_unlock(&fcoe_config_mutex); |
| 2344 | fcoe_ctlr_device_delete(ctlr_dev); | ||
| 2345 | goto out; | ||
| 2339 | } | 2346 | } |
| 2340 | 2347 | ||
| 2341 | /* Make this the "master" N_Port */ | 2348 | /* Make this the "master" N_Port */ |
| @@ -2375,8 +2382,8 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, | |||
| 2375 | 2382 | ||
| 2376 | out_nodev: | 2383 | out_nodev: |
| 2377 | rtnl_unlock(); | 2384 | rtnl_unlock(); |
| 2378 | out_nortnl: | ||
| 2379 | mutex_unlock(&fcoe_config_mutex); | 2385 | mutex_unlock(&fcoe_config_mutex); |
| 2386 | out: | ||
| 2380 | return rc; | 2387 | return rc; |
| 2381 | } | 2388 | } |
| 2382 | 2389 | ||
