aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2007-09-11 05:28:26 -0400
committerDavid S. Miller <davem@davemloft.net>2007-09-11 05:28:26 -0400
commit16fcec35e7d7c4faaa4709f6434a4a25b06d25e3 (patch)
tree5febf4d688f2c32ed55e02bc20246388b74d85e4
parent0fb96701376874c9f1f80322f89a5bf4457c709f (diff)
[NETFILTER]: Fix/improve deadlock condition on module removal netfilter
So I've had a deadlock reported to me. I've found that the sequence of events goes like this: 1) process A (modprobe) runs to remove ip_tables.ko 2) process B (iptables-restore) runs and calls setsockopt on a netfilter socket, increasing the ip_tables socket_ops use count 3) process A acquires a file lock on the file ip_tables.ko, calls remove_module in the kernel, which in turn executes the ip_tables module cleanup routine, which calls nf_unregister_sockopt 4) nf_unregister_sockopt, seeing that the use count is non-zero, puts the calling process into uninterruptible sleep, expecting the process using the socket option code to wake it up when it exits the kernel 4) the user of the socket option code (process B) in do_ipt_get_ctl, calls ipt_find_table_lock, which in this case calls request_module to load ip_tables_nat.ko 5) request_module forks a copy of modprobe (process C) to load the module and blocks until modprobe exits. 6) Process C. forked by request_module process the dependencies of ip_tables_nat.ko, of which ip_tables.ko is one. 7) Process C attempts to lock the request module and all its dependencies, it blocks when it attempts to lock ip_tables.ko (which was previously locked in step 3) Theres not really any great permanent solution to this that I can see, but I've developed a two part solution that corrects the problem Part 1) Modifies the nf_sockopt registration code so that, instead of using a use counter internal to the nf_sockopt_ops structure, we instead use a pointer to the registering modules owner to do module reference counting when nf_sockopt calls a modules set/get routine. This prevents the deadlock by preventing set 4 from happening. Part 2) Enhances the modprobe utilty so that by default it preforms non-blocking remove operations (the same way rmmod does), and add an option to explicity request blocking operation. So if you select blocking operation in modprobe you can still cause the above deadlock, but only if you explicity try (and since root can do any old stupid thing it would like.... :) ). Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netfilter.h5
-rw-r--r--net/bridge/netfilter/ebtables.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/netfilter/nf_sockopt.c36
8 files changed, 19 insertions, 28 deletions
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0eed0b7ab2df..1dd075eda595 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -88,9 +88,8 @@ struct nf_sockopt_ops
88 int (*compat_get)(struct sock *sk, int optval, 88 int (*compat_get)(struct sock *sk, int optval,
89 void __user *user, int *len); 89 void __user *user, int *len);
90 90
91 /* Number of users inside set() or get(). */ 91 /* Use the module struct to lock set/get code in place */
92 unsigned int use; 92 struct module *owner;
93 struct task_struct *cleanup_task;
94}; 93};
95 94
96/* Each queued (to userspace) skbuff has one of these. */ 95/* Each queued (to userspace) skbuff has one of these. */
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 4169a2a89a39..6018d0e51938 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1513,6 +1513,7 @@ static struct nf_sockopt_ops ebt_sockopts =
1513 .get_optmin = EBT_BASE_CTL, 1513 .get_optmin = EBT_BASE_CTL,
1514 .get_optmax = EBT_SO_GET_MAX + 1, 1514 .get_optmax = EBT_SO_GET_MAX + 1,
1515 .get = do_ebt_get_ctl, 1515 .get = do_ebt_get_ctl,
1516 .owner = THIS_MODULE,
1516}; 1517};
1517 1518
1518static int __init ebtables_init(void) 1519static int __init ebtables_init(void)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 902fd578aa3c..f656d41d8d41 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -2339,6 +2339,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
2339 .get_optmin = IP_VS_BASE_CTL, 2339 .get_optmin = IP_VS_BASE_CTL,
2340 .get_optmax = IP_VS_SO_GET_MAX+1, 2340 .get_optmax = IP_VS_SO_GET_MAX+1,
2341 .get = do_ip_vs_get_ctl, 2341 .get = do_ip_vs_get_ctl,
2342 .owner = THIS_MODULE,
2342}; 2343};
2343 2344
2344 2345
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d1149aba9351..29114a9ccd1d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1161,6 +1161,7 @@ static struct nf_sockopt_ops arpt_sockopts = {
1161 .get_optmin = ARPT_BASE_CTL, 1161 .get_optmin = ARPT_BASE_CTL,
1162 .get_optmax = ARPT_SO_GET_MAX+1, 1162 .get_optmax = ARPT_SO_GET_MAX+1,
1163 .get = do_arpt_get_ctl, 1163 .get = do_arpt_get_ctl,
1164 .owner = THIS_MODULE,
1164}; 1165};
1165 1166
1166static int __init arp_tables_init(void) 1167static int __init arp_tables_init(void)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e1b402c6b855..6486894f450c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2296,6 +2296,7 @@ static struct nf_sockopt_ops ipt_sockopts = {
2296#ifdef CONFIG_COMPAT 2296#ifdef CONFIG_COMPAT
2297 .compat_get = compat_do_ipt_get_ctl, 2297 .compat_get = compat_do_ipt_get_ctl,
2298#endif 2298#endif
2299 .owner = THIS_MODULE,
2299}; 2300};
2300 2301
2301static struct xt_match icmp_matchstruct __read_mostly = { 2302static struct xt_match icmp_matchstruct __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 53cb1772f38f..f813e02aab30 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -399,6 +399,7 @@ static struct nf_sockopt_ops so_getorigdst = {
399 .get_optmin = SO_ORIGINAL_DST, 399 .get_optmin = SO_ORIGINAL_DST,
400 .get_optmax = SO_ORIGINAL_DST+1, 400 .get_optmax = SO_ORIGINAL_DST+1,
401 .get = &getorigdst, 401 .get = &getorigdst,
402 .owner = THIS_MODULE,
402}; 403};
403 404
404struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { 405struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index aeda617246b7..cd9df02bb85c 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1462,6 +1462,7 @@ static struct nf_sockopt_ops ip6t_sockopts = {
1462 .get_optmin = IP6T_BASE_CTL, 1462 .get_optmin = IP6T_BASE_CTL,
1463 .get_optmax = IP6T_SO_GET_MAX+1, 1463 .get_optmax = IP6T_SO_GET_MAX+1,
1464 .get = do_ip6t_get_ctl, 1464 .get = do_ip6t_get_ctl,
1465 .owner = THIS_MODULE,
1465}; 1466};
1466 1467
1467static struct xt_match icmp6_matchstruct __read_mostly = { 1468static struct xt_match icmp6_matchstruct __read_mostly = {
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 8b8ece750313..e32761ce260c 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -55,18 +55,7 @@ EXPORT_SYMBOL(nf_register_sockopt);
55 55
56void nf_unregister_sockopt(struct nf_sockopt_ops *reg) 56void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
57{ 57{
58 /* No point being interruptible: we're probably in cleanup_module() */
59 restart:
60 mutex_lock(&nf_sockopt_mutex); 58 mutex_lock(&nf_sockopt_mutex);
61 if (reg->use != 0) {
62 /* To be woken by nf_sockopt call... */
63 /* FIXME: Stuart Young's name appears gratuitously. */
64 set_current_state(TASK_UNINTERRUPTIBLE);
65 reg->cleanup_task = current;
66 mutex_unlock(&nf_sockopt_mutex);
67 schedule();
68 goto restart;
69 }
70 list_del(&reg->list); 59 list_del(&reg->list);
71 mutex_unlock(&nf_sockopt_mutex); 60 mutex_unlock(&nf_sockopt_mutex);
72} 61}
@@ -86,10 +75,11 @@ static int nf_sockopt(struct sock *sk, int pf, int val,
86 list_for_each(i, &nf_sockopts) { 75 list_for_each(i, &nf_sockopts) {
87 ops = (struct nf_sockopt_ops *)i; 76 ops = (struct nf_sockopt_ops *)i;
88 if (ops->pf == pf) { 77 if (ops->pf == pf) {
78 if (!try_module_get(ops->owner))
79 goto out_nosup;
89 if (get) { 80 if (get) {
90 if (val >= ops->get_optmin 81 if (val >= ops->get_optmin
91 && val < ops->get_optmax) { 82 && val < ops->get_optmax) {
92 ops->use++;
93 mutex_unlock(&nf_sockopt_mutex); 83 mutex_unlock(&nf_sockopt_mutex);
94 ret = ops->get(sk, val, opt, len); 84 ret = ops->get(sk, val, opt, len);
95 goto out; 85 goto out;
@@ -97,23 +87,20 @@ static int nf_sockopt(struct sock *sk, int pf, int val,
97 } else { 87 } else {
98 if (val >= ops->set_optmin 88 if (val >= ops->set_optmin
99 && val < ops->set_optmax) { 89 && val < ops->set_optmax) {
100 ops->use++;
101 mutex_unlock(&nf_sockopt_mutex); 90 mutex_unlock(&nf_sockopt_mutex);
102 ret = ops->set(sk, val, opt, *len); 91 ret = ops->set(sk, val, opt, *len);
103 goto out; 92 goto out;
104 } 93 }
105 } 94 }
95 module_put(ops->owner);
106 } 96 }
107 } 97 }
98 out_nosup:
108 mutex_unlock(&nf_sockopt_mutex); 99 mutex_unlock(&nf_sockopt_mutex);
109 return -ENOPROTOOPT; 100 return -ENOPROTOOPT;
110 101
111 out: 102 out:
112 mutex_lock(&nf_sockopt_mutex); 103 module_put(ops->owner);
113 ops->use--;
114 if (ops->cleanup_task)
115 wake_up_process(ops->cleanup_task);
116 mutex_unlock(&nf_sockopt_mutex);
117 return ret; 104 return ret;
118} 105}
119 106
@@ -144,10 +131,12 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val,
144 list_for_each(i, &nf_sockopts) { 131 list_for_each(i, &nf_sockopts) {
145 ops = (struct nf_sockopt_ops *)i; 132 ops = (struct nf_sockopt_ops *)i;
146 if (ops->pf == pf) { 133 if (ops->pf == pf) {
134 if (!try_module_get(ops->owner))
135 goto out_nosup;
136
147 if (get) { 137 if (get) {
148 if (val >= ops->get_optmin 138 if (val >= ops->get_optmin
149 && val < ops->get_optmax) { 139 && val < ops->get_optmax) {
150 ops->use++;
151 mutex_unlock(&nf_sockopt_mutex); 140 mutex_unlock(&nf_sockopt_mutex);
152 if (ops->compat_get) 141 if (ops->compat_get)
153 ret = ops->compat_get(sk, 142 ret = ops->compat_get(sk,
@@ -160,7 +149,6 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val,
160 } else { 149 } else {
161 if (val >= ops->set_optmin 150 if (val >= ops->set_optmin
162 && val < ops->set_optmax) { 151 && val < ops->set_optmax) {
163 ops->use++;
164 mutex_unlock(&nf_sockopt_mutex); 152 mutex_unlock(&nf_sockopt_mutex);
165 if (ops->compat_set) 153 if (ops->compat_set)
166 ret = ops->compat_set(sk, 154 ret = ops->compat_set(sk,
@@ -171,17 +159,15 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val,
171 goto out; 159 goto out;
172 } 160 }
173 } 161 }
162 module_put(ops->owner);
174 } 163 }
175 } 164 }
165 out_nosup:
176 mutex_unlock(&nf_sockopt_mutex); 166 mutex_unlock(&nf_sockopt_mutex);
177 return -ENOPROTOOPT; 167 return -ENOPROTOOPT;
178 168
179 out: 169 out:
180 mutex_lock(&nf_sockopt_mutex); 170 module_put(ops->owner);
181 ops->use--;
182 if (ops->cleanup_task)
183 wake_up_process(ops->cleanup_task);
184 mutex_unlock(&nf_sockopt_mutex);
185 return ret; 171 return ret;
186} 172}
187 173