diff options
author | Robert Love <robert.w.love@intel.com> | 2011-08-25 15:40:47 -0400 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2011-08-28 22:38:43 -0400 |
commit | 848e7d5b46b9b0ee613a106bc460acf6a09a8546 (patch) | |
tree | 0695610dd1bc9ae207aa12c76013ab788b06d8ae | |
parent | 7ca3c803e85080afdff4097e60fefec865027809 (diff) |
[SCSI] fcoe: Fix deadlock between fip's recv_work and rtnl
The rtnl cannot be held durrng the fcoe_interface_put.
If it is the last reference on the fcoe_interface the
fcoe_ctlr_destroy will be called as a part of the
cleanup, ultimately calling cancel_work_sync(&fip->recv_work);
If we are processing a flogi response we will be in
the recv_work context and we will lock the rtnl to
add a new unicast MAC address. This is how the deadlock
can occur.
The fix is simply to move the rtnl_lock/unlock into
fcoe_interface_cleanup so that it can be unlocked before
fcoe_interface_put is called.
Here is the lockdep report:
Jul 21 11:26:35 bubba [ 223.870702]
ul 21 11:26:35 bubba [ 223.870704] =======================================================
Jul 21 11:26:35 bubba [ 223.871255] [ INFO: possible circular locking dependency detected ]
Jul 21 11:26:35 bubba [ 223.871530] 3.0.0-rc7+ #1
Jul 21 11:26:35 bubba [ 223.871797] -------------------------------------------------------
Jul 21 11:26:35 bubba [ 223.872072] lockdeptest.sh/3464 is trying to acquire lock:
Jul 21 11:26:35 bubba [ 223.872345] ((&fip->recv_work)
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffff810531f1>] wait_on_work+0x0/0xbd
Jul 21 11:26:35 bubba [ 223.873022]
Jul 21 11:26:35 bubba [ 223.873023] but task is already holding lock:
Jul 21 11:26:35 bubba [ 223.873555] (rtnl_mutex
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14
Jul 21 11:26:35 bubba [ 223.874229]
Jul 21 11:26:35 bubba [ 223.874230] which lock already depends on the new lock.
Jul 21 11:26:35 bubba [ 223.874231]
Jul 21 11:26:35 bubba [ 223.875032]
Jul 21 11:26:35 bubba [ 223.875033] the existing dependency chain (in reverse order) is:
Jul 21 11:26:35 bubba [ 223.875573]
Jul 21 11:26:35 bubba [ 223.875573] -> #1
Jul 21 11:26:35 bubba (rtnl_mutex
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba :
Jul 21 11:26:35 bubba [ 223.876301]
Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7
Jul 21 11:26:35 bubba [ 223.876645]
Jul 21 11:26:35 bubba [<ffffffff8151d975>] __mutex_lock_common+0x47/0x30d
Jul 21 11:26:35 bubba [ 223.876991]
Jul 21 11:26:35 bubba [<ffffffff8151dd36>] mutex_lock_nested+0x3b/0x40
Jul 21 11:26:35 bubba [ 223.877334]
Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14
Jul 21 11:26:35 bubba [ 223.877675]
Jul 21 11:26:35 bubba [<ffffffffa003d5a0>] fcoe_update_src_mac+0x2b/0x80 [fcoe]
Jul 21 11:26:35 bubba [ 223.878022]
Jul 21 11:26:35 bubba [<ffffffffa003d698>] fcoe_flogi_resp+0x5e/0x79 [fcoe]
Jul 21 11:26:35 bubba [ 223.878366]
Jul 21 11:26:35 bubba [<ffffffffa001566f>] fc_exch_recv+0x7f5/0x9da [libfc]
Jul 21 11:26:35 bubba [ 223.878713]
Jul 21 11:26:35 bubba [<ffffffffa00327d8>] fcoe_ctlr_recv_work+0x71f/0x10dc [libfcoe]
Jul 21 11:26:35 bubba [ 223.879258]
Jul 21 11:26:35 bubba [<ffffffff81053761>] process_one_work+0x1d7/0x347
Jul 21 11:26:35 bubba [ 223.879601]
Jul 21 11:26:35 bubba [<ffffffff81054ade>] worker_thread+0xf8/0x17c
Jul 21 11:26:35 bubba [ 223.879944]
Jul 21 11:26:35 bubba [<ffffffff81058184>] kthread+0x7d/0x85
Jul 21 11:26:35 bubba [ 223.880287]
Jul 21 11:26:35 bubba [<ffffffff81526414>] kernel_thread_helper+0x4/0x10
Jul 21 11:26:35 bubba [ 223.880634]
Jul 21 11:26:35 bubba [ 223.880635] -> #0
Jul 21 11:26:35 bubba ((&fip->recv_work)
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba :
Jul 21 11:26:35 bubba [ 223.881357]
Jul 21 11:26:35 bubba [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c
Jul 21 11:26:35 bubba [ 223.881695]
Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7
Jul 21 11:26:35 bubba [ 223.882033]
Jul 21 11:26:35 bubba [<ffffffff81053241>] wait_on_work+0x50/0xbd
Jul 21 11:26:35 bubba [ 223.882378]
Jul 21 11:26:35 bubba [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4
Jul 21 11:26:35 bubba [ 223.882718]
Jul 21 11:26:35 bubba [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd
Jul 21 11:26:35 bubba [ 223.883057]
Jul 21 11:26:35 bubba [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe]
Jul 21 11:26:35 bubba [ 223.883399]
Jul 21 11:26:35 bubba [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe]
Jul 21 11:26:35 bubba [ 223.883940]
Jul 21 11:26:35 bubba [<ffffffff811fbbe6>] kref_put+0x43/0x4d
Jul 21 11:26:35 bubba [ 223.884280]
Jul 21 11:26:35 bubba [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe]
Jul 21 11:26:35 bubba [ 223.884624]
Jul 21 11:26:35 bubba [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe]
Jul 21 11:26:35 bubba [ 223.885163]
Jul 21 11:26:35 bubba [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe]
Jul 21 11:26:35 bubba [ 223.885502]
Jul 21 11:26:35 bubba [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe]
Jul 21 11:26:35 bubba [ 223.886045]
Jul 21 11:26:35 bubba [<ffffffff81056153>] param_attr_store+0x43/0x62
Jul 21 11:26:35 bubba [ 223.886385]
Jul 21 11:26:35 bubba [<ffffffff8105602d>] module_attr_store+0x21/0x25
Jul 21 11:26:35 bubba [ 223.886728]
Jul 21 11:26:35 bubba [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f
Jul 21 11:26:35 bubba [ 223.887068]
Jul 21 11:26:35 bubba [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa
Jul 21 11:26:35 bubba [ 223.887406]
Jul 21 11:26:35 bubba [<ffffffff810f4073>] sys_write+0x45/0x69
Jul 21 11:26:35 bubba [ 223.887742]
Jul 21 11:26:35 bubba [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b
Jul 21 11:26:35 bubba [ 223.888083]
Jul 21 11:26:35 bubba [ 223.888084] other info that might help us debug this:
Jul 21 11:26:35 bubba [ 223.888085]
Jul 21 11:26:35 bubba [ 223.888879] Possible unsafe locking scenario:
Jul 21 11:26:35 bubba [ 223.888881]
Jul 21 11:26:35 bubba [ 223.889411] CPU0 CPU1
Jul 21 11:26:35 bubba [ 223.889683] ---- ----
Jul 21 11:26:35 bubba [ 223.889955] lock(
Jul 21 11:26:35 bubba rtnl_mutex
Jul 21 11:26:35 bubba );
Jul 21 11:26:35 bubba [ 223.890349] lock(
Jul 21 11:26:35 bubba (&fip->recv_work)
Jul 21 11:26:35 bubba );
Jul 21 11:26:35 bubba [ 223.890751] lock(
Jul 21 11:26:35 bubba rtnl_mutex
Jul 21 11:26:35 bubba );
Jul 21 11:26:35 bubba [ 223.891154] lock(
Jul 21 11:26:35 bubba (&fip->recv_work)
Jul 21 11:26:35 bubba );
Jul 21 11:26:35 bubba [ 223.891549]
Jul 21 11:26:35 bubba [ 223.891550] *** DEADLOCK ***
Jul 21 11:26:35 bubba [ 223.891551]
Jul 21 11:26:35 bubba [ 223.892347] 6 locks held by lockdeptest.sh/3464:
Jul 21 11:26:35 bubba [ 223.892621] #0:
Jul 21 11:26:35 bubba (&buffer->mutex
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffff8114c171>] sysfs_write_file+0x37/0x13f
Jul 21 11:26:35 bubba [ 223.893359] #1:
Jul 21 11:26:35 bubba (s_active
Jul 21 11:26:35 bubba ){++++.+}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffff8114c21c>] sysfs_write_file+0xe2/0x13f
Jul 21 11:26:35 bubba [ 223.894094] #2:
Jul 21 11:26:35 bubba (param_lock
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffff81056146>] param_attr_store+0x36/0x62
Jul 21 11:26:35 bubba [ 223.894835] #3:
Jul 21 11:26:35 bubba (ft_mutex
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffffa0034017>] fcoe_transport_destroy+0x1e/0x110 [libfcoe]
Jul 21 11:26:35 bubba [ 223.895574] #4:
Jul 21 11:26:35 bubba (fcoe_config_mutex
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffffa003f2c9>] fcoe_destroy+0x18/0x72 [fcoe]
Jul 21 11:26:35 bubba [ 223.896314] #5:
Jul 21 11:26:35 bubba (rtnl_mutex
Jul 21 11:26:35 bubba ){+.+.+.}
Jul 21 11:26:35 bubba , at:
Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14
Jul 21 11:26:35 bubba [ 223.897047]
Jul 21 11:26:35 bubba [ 223.897048] stack backtrace:
Jul 21 11:26:35 bubba [ 223.897578] Pid: 3464, comm: lockdeptest.sh Not tainted 3.0.0-rc7+ #1
Jul 21 11:26:35 bubba [ 223.897853] Call Trace:
Jul 21 11:26:35 bubba [ 223.898128] [<ffffffff81068e16>] print_circular_bug+0x1f8/0x209
Jul 21 11:26:35 bubba [ 223.898416] [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c
Jul 21 11:26:35 bubba [ 223.898699] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6
Jul 21 11:26:35 bubba [ 223.898982] [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7
Jul 21 11:26:35 bubba [ 223.899263] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6
Jul 21 11:26:35 bubba [ 223.899547] [<ffffffff8104a097>] ? mod_timer+0x8f/0x98
Jul 21 11:26:35 bubba [ 223.899827] [<ffffffff81053241>] wait_on_work+0x50/0xbd
Jul 21 11:26:35 bubba [ 223.900108] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6
Jul 21 11:26:35 bubba [ 223.900390] [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4
Jul 21 11:26:35 bubba [ 223.900671] [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd
Jul 21 11:26:35 bubba [ 223.900953] [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe]
Jul 21 11:26:35 bubba [ 223.901237] [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe]
Jul 21 11:26:35 bubba [ 223.901522] [<ffffffffa003e4fd>] ? fcoe_enable+0x6b/0x6b [fcoe]
Jul 21 11:26:35 bubba [ 223.901803] [<ffffffff811fbbe6>] kref_put+0x43/0x4d
Jul 21 11:26:35 bubba [ 223.902083] [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe]
Jul 21 11:26:35 bubba [ 223.902367] [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe]
Jul 21 11:26:35 bubba [ 223.902653] [<ffffffff8151dd36>] ? mutex_lock_nested+0x3b/0x40
Jul 21 11:26:35 bubba [ 223.902939] [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe]
Jul 21 11:26:35 bubba [ 223.903223] [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe]
Jul 21 11:26:35 bubba [ 223.903508] [<ffffffff81056153>] param_attr_store+0x43/0x62
Jul 21 11:26:35 bubba [ 223.903792] [<ffffffff8105602d>] module_attr_store+0x21/0x25
Jul 21 11:26:35 bubba [ 223.904075] [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f
Jul 21 11:26:35 bubba [ 223.904357] [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa
Jul 21 11:26:35 bubba [ 223.904642] [<ffffffff810f51d6>] ? fget_light+0x35/0x96
Jul 21 11:26:35 bubba [ 223.904923] [<ffffffff810f4073>] sys_write+0x45/0x69
Jul 21 11:26:35 bubba [ 223.905204] [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b
Jul 21 11:26:36 bubba [ 223.964438] ixgbe 0000:05:00.0: eth3: detected SFP+: 5
Jul 21 11:26:37 bubba [ 225.196702] ixgbe 0000:05:00.0: eth3: NIC Link is Up 10 Gbps, Flow Control: None
Signed-off-by: Robert Love <robert.w.love@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Reviewed-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r-- | drivers/scsi/fcoe/fcoe.c | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index ba710e350ac5..5d0e9a24ae94 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c | |||
@@ -432,6 +432,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) | |||
432 | u8 flogi_maddr[ETH_ALEN]; | 432 | u8 flogi_maddr[ETH_ALEN]; |
433 | const struct net_device_ops *ops; | 433 | const struct net_device_ops *ops; |
434 | 434 | ||
435 | rtnl_lock(); | ||
436 | |||
435 | /* | 437 | /* |
436 | * Don't listen for Ethernet packets anymore. | 438 | * Don't listen for Ethernet packets anymore. |
437 | * synchronize_net() ensures that the packet handlers are not running | 439 | * synchronize_net() ensures that the packet handlers are not running |
@@ -461,6 +463,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) | |||
461 | " specific feature for LLD.\n"); | 463 | " specific feature for LLD.\n"); |
462 | } | 464 | } |
463 | 465 | ||
466 | rtnl_unlock(); | ||
467 | |||
464 | /* Release the self-reference taken during fcoe_interface_create() */ | 468 | /* Release the self-reference taken during fcoe_interface_create() */ |
465 | fcoe_interface_put(fcoe); | 469 | fcoe_interface_put(fcoe); |
466 | } | 470 | } |
@@ -1951,11 +1955,8 @@ static void fcoe_destroy_work(struct work_struct *work) | |||
1951 | fcoe_if_destroy(port->lport); | 1955 | fcoe_if_destroy(port->lport); |
1952 | 1956 | ||
1953 | /* Do not tear down the fcoe interface for NPIV port */ | 1957 | /* Do not tear down the fcoe interface for NPIV port */ |
1954 | if (!npiv) { | 1958 | if (!npiv) |
1955 | rtnl_lock(); | ||
1956 | fcoe_interface_cleanup(fcoe); | 1959 | fcoe_interface_cleanup(fcoe); |
1957 | rtnl_unlock(); | ||
1958 | } | ||
1959 | 1960 | ||
1960 | mutex_unlock(&fcoe_config_mutex); | 1961 | mutex_unlock(&fcoe_config_mutex); |
1961 | } | 1962 | } |
@@ -2009,8 +2010,9 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) | |||
2009 | printk(KERN_ERR "fcoe: Failed to create interface (%s)\n", | 2010 | printk(KERN_ERR "fcoe: Failed to create interface (%s)\n", |
2010 | netdev->name); | 2011 | netdev->name); |
2011 | rc = -EIO; | 2012 | rc = -EIO; |
2013 | rtnl_unlock(); | ||
2012 | fcoe_interface_cleanup(fcoe); | 2014 | fcoe_interface_cleanup(fcoe); |
2013 | goto out_nodev; | 2015 | goto out_nortnl; |
2014 | } | 2016 | } |
2015 | 2017 | ||
2016 | /* Make this the "master" N_Port */ | 2018 | /* Make this the "master" N_Port */ |
@@ -2027,6 +2029,7 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) | |||
2027 | 2029 | ||
2028 | out_nodev: | 2030 | out_nodev: |
2029 | rtnl_unlock(); | 2031 | rtnl_unlock(); |
2032 | out_nortnl: | ||
2030 | mutex_unlock(&fcoe_config_mutex); | 2033 | mutex_unlock(&fcoe_config_mutex); |
2031 | return rc; | 2034 | return rc; |
2032 | } | 2035 | } |