aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/fcoe/fcoe.c
diff options
context:
space:
mode:
authorVasu Dev <vasu.dev@intel.com>2010-05-07 18:18:46 -0400
committerJames Bottomley <James.Bottomley@suse.de>2010-05-16 22:22:35 -0400
commit34ce27bcf96f5f366e1fa8c4729ffc8a55de4cc3 (patch)
treef4650760844e41851b1f4dc301a35601025d2699 /drivers/scsi/fcoe/fcoe.c
parent7b2787ec15b9d1c2f716da61b0eec21a3f5e6520 (diff)
[SCSI] fcoe: fix a circular locking issue with rtnl and sysfs mutex
Currently rtnl mutex is grabbed during fcoe create, destroy, enable and disable operations while sysfs s_active read mutex is already held, but simultaneously other networking events could try grabbing write s_active mutex while rtnl is already held and that is causing circular lock warning, its detailed log pasted at end. In this log, the rtnl was held before write s_active during device renaming but there are more such cases as Joe reported another instance with tg3 open at:- http://www.open-fcoe.org/pipermail/devel/2010-February/008263.html This patch fixes this issue by not waiting for rtnl mutex during fcoe ops, that means if rtnl mutex is not immediately available then restart_syscall() to allow others waiting in line to grab s_active along with rtnl mutex to finish their work first under these mutex. Currently rtnl mutex was grabbed twice during fcoe_destroy call flow, second grab was from fcoe_if_destroy called from fcoe_destroy after dropping rtnl mutex before calling fcoe_if_destroy, so instead made fcoe_if_destroy always called with rtnl mutex held to have this mutex grabbed only once in this code path. However left matching rtnl_unlock as-is in its original place as it was dropped there for good reason since very next call causes synchronous fip worker flush and if rtnl mutex is still held before flush then that would cause new circular warning between fip->recv_work and rtnl mutex, I've added detailed comment for this on fcoe_if_destroy calling and rtnl muxtes unlocking. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.33.1linux-stable-2.6.33 #1 ------------------------------------------------------- fcoemon/18823 is trying to acquire lock: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] but task is already holding lock: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (s_active){++++.+}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8115e5df>] sysfs_deactivate+0x8b/0xe0 [<ffffffff8115edfb>] sysfs_addrm_finish+0x36/0x55 [<ffffffff8115d0cc>] sysfs_hash_and_remove+0x53/0x6a [<ffffffff8115f353>] sysfs_remove_link+0x21/0x23 [<ffffffff812b6c93>] device_rename+0x99/0xcb [<ffffffff8138dbf0>] dev_change_name+0xd5/0x1d2 [<ffffffff8138deee>] dev_ifsioc+0x201/0x2ac [<ffffffff8138e4ba>] dev_ioctl+0x521/0x632 [<ffffffff81379e43>] sock_do_ioctl+0x3d/0x47 [<ffffffff8137a254>] sock_ioctl+0x213/0x222 [<ffffffff81114614>] vfs_ioctl+0x32/0xa6 [<ffffffff81114b94>] do_vfs_ioctl+0x490/0x4d6 [<ffffffff81114c30>] sys_ioctl+0x56/0x79 [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #1 (rtnl_mutex){+.+.+.}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffff813959f9>] rtnl_lock+0x17/0x19 [<ffffffff8138ccae>] register_netdevice_notifier+0x1e/0x19b [<ffffffffa02580c1>] 0xffffffffa02580c1 [<ffffffff81002069>] do_one_initcall+0x5e/0x15e [<ffffffff81084094>] sys_init_module+0xd8/0x23a [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #0 (fcoe_config_mutex){+.+.+.}: [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b other info that might help us debug this: 3 locks held by fcoemon/18823: #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8115da17>] sysfs_write_file+0x3c/0x144 #1: (s_active){++++.+}, at: [<ffffffff8115ef86>] sysfs_get_active_two+0x24/0x48 #2: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 stack backtrace: Pid: 18823, comm: fcoemon Tainted: G W 2.6.33.1linux-stable-2.6.33 #1 Call Trace: [<ffffffff81076c38>] print_circular_bug+0xa8/0xb6 [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8106ac70>] ? cpu_clock+0x43/0x5e [<ffffffff81074e12>] ? lockstat_clock+0x11/0x13 [<ffffffff81074e40>] ? lock_release_holdtime+0x2c/0x127 [<ffffffff8115ef93>] ? sysfs_get_active_two+0x31/0x48 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81076596>] ? trace_hardirqs_on_caller+0x125/0x150 [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/fcoe/fcoe.c')
-rw-r--r--drivers/scsi/fcoe/fcoe.c41
1 files changed, 34 insertions, 7 deletions
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 4834d3c130d6..0c825c0944f7 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -801,6 +801,12 @@ skip_oem:
801/** 801/**
802 * fcoe_if_destroy() - Tear down a SW FCoE instance 802 * fcoe_if_destroy() - Tear down a SW FCoE instance
803 * @lport: The local port to be destroyed 803 * @lport: The local port to be destroyed
804 *
805 * Locking: must be called with the RTNL mutex held and RTNL mutex
806 * needed to be dropped by this function since not dropping RTNL
807 * would cause circular locking warning on synchronous fip worker
808 * cancelling thru fcoe_interface_put invoked by this function.
809 *
804 */ 810 */
805static void fcoe_if_destroy(struct fc_lport *lport) 811static void fcoe_if_destroy(struct fc_lport *lport)
806{ 812{
@@ -823,7 +829,6 @@ static void fcoe_if_destroy(struct fc_lport *lport)
823 /* Free existing transmit skbs */ 829 /* Free existing transmit skbs */
824 fcoe_clean_pending_queue(lport); 830 fcoe_clean_pending_queue(lport);
825 831
826 rtnl_lock();
827 if (!is_zero_ether_addr(port->data_src_addr)) 832 if (!is_zero_ether_addr(port->data_src_addr))
828 dev_unicast_delete(netdev, port->data_src_addr); 833 dev_unicast_delete(netdev, port->data_src_addr);
829 rtnl_unlock(); 834 rtnl_unlock();
@@ -1902,7 +1907,12 @@ static int fcoe_disable(const char *buffer, struct kernel_param *kp)
1902 goto out_nodev; 1907 goto out_nodev;
1903 } 1908 }
1904 1909
1905 rtnl_lock(); 1910 if (!rtnl_trylock()) {
1911 dev_put(netdev);
1912 mutex_unlock(&fcoe_config_mutex);
1913 return restart_syscall();
1914 }
1915
1906 fcoe = fcoe_hostlist_lookup_port(netdev); 1916 fcoe = fcoe_hostlist_lookup_port(netdev);
1907 rtnl_unlock(); 1917 rtnl_unlock();
1908 1918
@@ -1952,7 +1962,12 @@ static int fcoe_enable(const char *buffer, struct kernel_param *kp)
1952 goto out_nodev; 1962 goto out_nodev;
1953 } 1963 }
1954 1964
1955 rtnl_lock(); 1965 if (!rtnl_trylock()) {
1966 dev_put(netdev);
1967 mutex_unlock(&fcoe_config_mutex);
1968 return restart_syscall();
1969 }
1970
1956 fcoe = fcoe_hostlist_lookup_port(netdev); 1971 fcoe = fcoe_hostlist_lookup_port(netdev);
1957 rtnl_unlock(); 1972 rtnl_unlock();
1958 1973
@@ -2003,7 +2018,12 @@ static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
2003 goto out_nodev; 2018 goto out_nodev;
2004 } 2019 }
2005 2020
2006 rtnl_lock(); 2021 if (!rtnl_trylock()) {
2022 dev_put(netdev);
2023 mutex_unlock(&fcoe_config_mutex);
2024 return restart_syscall();
2025 }
2026
2007 fcoe = fcoe_hostlist_lookup_port(netdev); 2027 fcoe = fcoe_hostlist_lookup_port(netdev);
2008 if (!fcoe) { 2028 if (!fcoe) {
2009 rtnl_unlock(); 2029 rtnl_unlock();
@@ -2012,7 +2032,7 @@ static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
2012 } 2032 }
2013 list_del(&fcoe->list); 2033 list_del(&fcoe->list);
2014 fcoe_interface_cleanup(fcoe); 2034 fcoe_interface_cleanup(fcoe);
2015 rtnl_unlock(); 2035 /* RTNL mutex is dropped by fcoe_if_destroy */
2016 fcoe_if_destroy(fcoe->ctlr.lp); 2036 fcoe_if_destroy(fcoe->ctlr.lp);
2017 module_put(THIS_MODULE); 2037 module_put(THIS_MODULE);
2018 2038
@@ -2033,6 +2053,8 @@ static void fcoe_destroy_work(struct work_struct *work)
2033 2053
2034 port = container_of(work, struct fcoe_port, destroy_work); 2054 port = container_of(work, struct fcoe_port, destroy_work);
2035 mutex_lock(&fcoe_config_mutex); 2055 mutex_lock(&fcoe_config_mutex);
2056 rtnl_lock();
2057 /* RTNL mutex is dropped by fcoe_if_destroy */
2036 fcoe_if_destroy(port->lport); 2058 fcoe_if_destroy(port->lport);
2037 mutex_unlock(&fcoe_config_mutex); 2059 mutex_unlock(&fcoe_config_mutex);
2038} 2060}
@@ -2054,6 +2076,12 @@ static int fcoe_create(const char *buffer, struct kernel_param *kp)
2054 struct net_device *netdev; 2076 struct net_device *netdev;
2055 2077
2056 mutex_lock(&fcoe_config_mutex); 2078 mutex_lock(&fcoe_config_mutex);
2079
2080 if (!rtnl_trylock()) {
2081 mutex_unlock(&fcoe_config_mutex);
2082 return restart_syscall();
2083 }
2084
2057#ifdef CONFIG_FCOE_MODULE 2085#ifdef CONFIG_FCOE_MODULE
2058 /* 2086 /*
2059 * Make sure the module has been initialized, and is not about to be 2087 * Make sure the module has been initialized, and is not about to be
@@ -2071,7 +2099,6 @@ static int fcoe_create(const char *buffer, struct kernel_param *kp)
2071 goto out_nomod; 2099 goto out_nomod;
2072 } 2100 }
2073 2101
2074 rtnl_lock();
2075 netdev = fcoe_if_to_netdev(buffer); 2102 netdev = fcoe_if_to_netdev(buffer);
2076 if (!netdev) { 2103 if (!netdev) {
2077 rc = -ENODEV; 2104 rc = -ENODEV;
@@ -2126,9 +2153,9 @@ out_free:
2126out_putdev: 2153out_putdev:
2127 dev_put(netdev); 2154 dev_put(netdev);
2128out_nodev: 2155out_nodev:
2129 rtnl_unlock();
2130 module_put(THIS_MODULE); 2156 module_put(THIS_MODULE);
2131out_nomod: 2157out_nomod:
2158 rtnl_unlock();
2132 mutex_unlock(&fcoe_config_mutex); 2159 mutex_unlock(&fcoe_config_mutex);
2133 return rc; 2160 return rc;
2134} 2161}