diff options
| author | Julian Anastasov <ja@ssi.bg> | 2015-06-29 14:51:40 -0400 |
|---|---|---|
| committer | Simon Horman <horms@verge.net.au> | 2015-07-14 03:41:27 -0400 |
| commit | 05f00505a89acd21f5d0d20f5797dfbc4cf85243 (patch) | |
| tree | 9e4948698b71fcec2dc3b233c22893efded4c16b /net/netfilter | |
| parent | 4754957f04f5f368792a0eb7dab0ae89fb93dcfd (diff) | |
ipvs: fix crash if scheduler is changed
I overlooked the svc->sched_data usage from schedulers
when the services were converted to RCU in 3.10. Now
the rare ipvsadm -E command can change the scheduler
but due to the reverse order of ip_vs_bind_scheduler
and ip_vs_unbind_scheduler we provide new sched_data
to the old scheduler resulting in a crash.
To fix it without changing the scheduler methods we
have to use synchronize_rcu() only for the editing case.
It means all svc->scheduler readers should expect a
NULL value. To avoid breakage for the service listing
and ipvsadm -R we can use the "none" name to indicate
that scheduler is not assigned, a state when we drop
new connections.
Reported-by: Alexander Vasiliev <a.vasylev@404-group.com>
Fixes: ceec4c381681 ("ipvs: convert services to rcu")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net/netfilter')
| -rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 16 | ||||
| -rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 78 | ||||
| -rw-r--r-- | net/netfilter/ipvs/ip_vs_sched.c | 12 |
3 files changed, 69 insertions, 37 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806a862e..38fbc194b9cb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
| @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
| 319 | * return *ignored=0 i.e. ICMP and NF_DROP | 319 | * return *ignored=0 i.e. ICMP and NF_DROP |
| 320 | */ | 320 | */ |
| 321 | sched = rcu_dereference(svc->scheduler); | 321 | sched = rcu_dereference(svc->scheduler); |
| 322 | dest = sched->schedule(svc, skb, iph); | 322 | if (sched) { |
| 323 | /* read svc->sched_data after svc->scheduler */ | ||
| 324 | smp_rmb(); | ||
| 325 | dest = sched->schedule(svc, skb, iph); | ||
| 326 | } else { | ||
| 327 | dest = NULL; | ||
| 328 | } | ||
| 323 | if (!dest) { | 329 | if (!dest) { |
| 324 | IP_VS_DBG(1, "p-schedule: no dest found.\n"); | 330 | IP_VS_DBG(1, "p-schedule: no dest found.\n"); |
| 325 | kfree(param.pe_data); | 331 | kfree(param.pe_data); |
| @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
| 467 | } | 473 | } |
| 468 | 474 | ||
| 469 | sched = rcu_dereference(svc->scheduler); | 475 | sched = rcu_dereference(svc->scheduler); |
| 470 | dest = sched->schedule(svc, skb, iph); | 476 | if (sched) { |
| 477 | /* read svc->sched_data after svc->scheduler */ | ||
| 478 | smp_rmb(); | ||
| 479 | dest = sched->schedule(svc, skb, iph); | ||
| 480 | } else { | ||
| 481 | dest = NULL; | ||
| 482 | } | ||
| 471 | if (dest == NULL) { | 483 | if (dest == NULL) { |
| 472 | IP_VS_DBG(1, "Schedule: no dest found.\n"); | 484 | IP_VS_DBG(1, "Schedule: no dest found.\n"); |
| 473 | return NULL; | 485 | return NULL; |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3a1454..24c554201a76 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
| @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
| 842 | __ip_vs_dst_cache_reset(dest); | 842 | __ip_vs_dst_cache_reset(dest); |
| 843 | spin_unlock_bh(&dest->dst_lock); | 843 | spin_unlock_bh(&dest->dst_lock); |
| 844 | 844 | ||
| 845 | sched = rcu_dereference_protected(svc->scheduler, 1); | ||
| 846 | if (add) { | 845 | if (add) { |
| 847 | ip_vs_start_estimator(svc->net, &dest->stats); | 846 | ip_vs_start_estimator(svc->net, &dest->stats); |
| 848 | list_add_rcu(&dest->n_list, &svc->destinations); | 847 | list_add_rcu(&dest->n_list, &svc->destinations); |
| 849 | svc->num_dests++; | 848 | svc->num_dests++; |
| 850 | if (sched->add_dest) | 849 | sched = rcu_dereference_protected(svc->scheduler, 1); |
| 850 | if (sched && sched->add_dest) | ||
| 851 | sched->add_dest(svc, dest); | 851 | sched->add_dest(svc, dest); |
| 852 | } else { | 852 | } else { |
| 853 | if (sched->upd_dest) | 853 | sched = rcu_dereference_protected(svc->scheduler, 1); |
| 854 | if (sched && sched->upd_dest) | ||
| 854 | sched->upd_dest(svc, dest); | 855 | sched->upd_dest(svc, dest); |
| 855 | } | 856 | } |
| 856 | } | 857 | } |
| @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, | |||
| 1084 | struct ip_vs_scheduler *sched; | 1085 | struct ip_vs_scheduler *sched; |
| 1085 | 1086 | ||
| 1086 | sched = rcu_dereference_protected(svc->scheduler, 1); | 1087 | sched = rcu_dereference_protected(svc->scheduler, 1); |
| 1087 | if (sched->del_dest) | 1088 | if (sched && sched->del_dest) |
| 1088 | sched->del_dest(svc, dest); | 1089 | sched->del_dest(svc, dest); |
| 1089 | } | 1090 | } |
| 1090 | } | 1091 | } |
| @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, | |||
| 1175 | ip_vs_use_count_inc(); | 1176 | ip_vs_use_count_inc(); |
| 1176 | 1177 | ||
| 1177 | /* Lookup the scheduler by 'u->sched_name' */ | 1178 | /* Lookup the scheduler by 'u->sched_name' */ |
| 1178 | sched = ip_vs_scheduler_get(u->sched_name); | 1179 | if (strcmp(u->sched_name, "none")) { |
| 1179 | if (sched == NULL) { | 1180 | sched = ip_vs_scheduler_get(u->sched_name); |
| 1180 | pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); | 1181 | if (!sched) { |
| 1181 | ret = -ENOENT; | 1182 | pr_info("Scheduler module ip_vs_%s not found\n", |
| 1182 | goto out_err; | 1183 | u->sched_name); |
| 1184 | ret = -ENOENT; | ||
| 1185 | goto out_err; | ||
| 1186 | } | ||
| 1183 | } | 1187 | } |
| 1184 | 1188 | ||
| 1185 | if (u->pe_name && *u->pe_name) { | 1189 | if (u->pe_name && *u->pe_name) { |
| @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, | |||
| 1240 | spin_lock_init(&svc->stats.lock); | 1244 | spin_lock_init(&svc->stats.lock); |
| 1241 | 1245 | ||
| 1242 | /* Bind the scheduler */ | 1246 | /* Bind the scheduler */ |
| 1243 | ret = ip_vs_bind_scheduler(svc, sched); | 1247 | if (sched) { |
| 1244 | if (ret) | 1248 | ret = ip_vs_bind_scheduler(svc, sched); |
| 1245 | goto out_err; | 1249 | if (ret) |
| 1246 | sched = NULL; | 1250 | goto out_err; |
| 1251 | sched = NULL; | ||
| 1252 | } | ||
| 1247 | 1253 | ||
| 1248 | /* Bind the ct retriever */ | 1254 | /* Bind the ct retriever */ |
| 1249 | RCU_INIT_POINTER(svc->pe, pe); | 1255 | RCU_INIT_POINTER(svc->pe, pe); |
| @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, | |||
| 1291 | static int | 1297 | static int |
| 1292 | ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) | 1298 | ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) |
| 1293 | { | 1299 | { |
| 1294 | struct ip_vs_scheduler *sched, *old_sched; | 1300 | struct ip_vs_scheduler *sched = NULL, *old_sched; |
| 1295 | struct ip_vs_pe *pe = NULL, *old_pe = NULL; | 1301 | struct ip_vs_pe *pe = NULL, *old_pe = NULL; |
| 1296 | int ret = 0; | 1302 | int ret = 0; |
| 1297 | 1303 | ||
| 1298 | /* | 1304 | /* |
| 1299 | * Lookup the scheduler, by 'u->sched_name' | 1305 | * Lookup the scheduler, by 'u->sched_name' |
| 1300 | */ | 1306 | */ |
| 1301 | sched = ip_vs_scheduler_get(u->sched_name); | 1307 | if (strcmp(u->sched_name, "none")) { |
| 1302 | if (sched == NULL) { | 1308 | sched = ip_vs_scheduler_get(u->sched_name); |
| 1303 | pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); | 1309 | if (!sched) { |
| 1304 | return -ENOENT; | 1310 | pr_info("Scheduler module ip_vs_%s not found\n", |
| 1311 | u->sched_name); | ||
| 1312 | return -ENOENT; | ||
| 1313 | } | ||
| 1305 | } | 1314 | } |
| 1306 | old_sched = sched; | 1315 | old_sched = sched; |
| 1307 | 1316 | ||
| @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) | |||
| 1329 | 1338 | ||
| 1330 | old_sched = rcu_dereference_protected(svc->scheduler, 1); | 1339 | old_sched = rcu_dereference_protected(svc->scheduler, 1); |
| 1331 | if (sched != old_sched) { | 1340 | if (sched != old_sched) { |
| 1341 | if (old_sched) { | ||
| 1342 | ip_vs_unbind_scheduler(svc, old_sched); | ||
| 1343 | RCU_INIT_POINTER(svc->scheduler, NULL); | ||
| 1344 | /* Wait all svc->sched_data users */ | ||
| 1345 | synchronize_rcu(); | ||
| 1346 | } | ||
| 1332 | /* Bind the new scheduler */ | 1347 | /* Bind the new scheduler */ |
| 1333 | ret = ip_vs_bind_scheduler(svc, sched); | 1348 | if (sched) { |
| 1334 | if (ret) { | 1349 | ret = ip_vs_bind_scheduler(svc, sched); |
| 1335 | old_sched = sched; | 1350 | if (ret) { |
| 1336 | goto out; | 1351 | ip_vs_scheduler_put(sched); |
| 1352 | goto out; | ||
| 1353 | } | ||
| 1337 | } | 1354 | } |
| 1338 | /* Unbind the old scheduler on success */ | ||
| 1339 | ip_vs_unbind_scheduler(svc, old_sched); | ||
| 1340 | } | 1355 | } |
| 1341 | 1356 | ||
| 1342 | /* | 1357 | /* |
| @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) | |||
| 1982 | const struct ip_vs_iter *iter = seq->private; | 1997 | const struct ip_vs_iter *iter = seq->private; |
| 1983 | const struct ip_vs_dest *dest; | 1998 | const struct ip_vs_dest *dest; |
| 1984 | struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); | 1999 | struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); |
| 2000 | char *sched_name = sched ? sched->name : "none"; | ||
| 1985 | 2001 | ||
| 1986 | if (iter->table == ip_vs_svc_table) { | 2002 | if (iter->table == ip_vs_svc_table) { |
| 1987 | #ifdef CONFIG_IP_VS_IPV6 | 2003 | #ifdef CONFIG_IP_VS_IPV6 |
| @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) | |||
| 1990 | ip_vs_proto_name(svc->protocol), | 2006 | ip_vs_proto_name(svc->protocol), |
| 1991 | &svc->addr.in6, | 2007 | &svc->addr.in6, |
| 1992 | ntohs(svc->port), | 2008 | ntohs(svc->port), |
| 1993 | sched->name); | 2009 | sched_name); |
| 1994 | else | 2010 | else |
| 1995 | #endif | 2011 | #endif |
| 1996 | seq_printf(seq, "%s %08X:%04X %s %s ", | 2012 | seq_printf(seq, "%s %08X:%04X %s %s ", |
| 1997 | ip_vs_proto_name(svc->protocol), | 2013 | ip_vs_proto_name(svc->protocol), |
| 1998 | ntohl(svc->addr.ip), | 2014 | ntohl(svc->addr.ip), |
| 1999 | ntohs(svc->port), | 2015 | ntohs(svc->port), |
| 2000 | sched->name, | 2016 | sched_name, |
| 2001 | (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); | 2017 | (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); |
| 2002 | } else { | 2018 | } else { |
| 2003 | seq_printf(seq, "FWM %08X %s %s", | 2019 | seq_printf(seq, "FWM %08X %s %s", |
| 2004 | svc->fwmark, sched->name, | 2020 | svc->fwmark, sched_name, |
| 2005 | (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); | 2021 | (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); |
| 2006 | } | 2022 | } |
| 2007 | 2023 | ||
| @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) | |||
| 2427 | { | 2443 | { |
| 2428 | struct ip_vs_scheduler *sched; | 2444 | struct ip_vs_scheduler *sched; |
| 2429 | struct ip_vs_kstats kstats; | 2445 | struct ip_vs_kstats kstats; |
| 2446 | char *sched_name; | ||
| 2430 | 2447 | ||
| 2431 | sched = rcu_dereference_protected(src->scheduler, 1); | 2448 | sched = rcu_dereference_protected(src->scheduler, 1); |
| 2449 | sched_name = sched ? sched->name : "none"; | ||
| 2432 | dst->protocol = src->protocol; | 2450 | dst->protocol = src->protocol; |
| 2433 | dst->addr = src->addr.ip; | 2451 | dst->addr = src->addr.ip; |
| 2434 | dst->port = src->port; | 2452 | dst->port = src->port; |
| 2435 | dst->fwmark = src->fwmark; | 2453 | dst->fwmark = src->fwmark; |
| 2436 | strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); | 2454 | strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); |
| 2437 | dst->flags = src->flags; | 2455 | dst->flags = src->flags; |
| 2438 | dst->timeout = src->timeout / HZ; | 2456 | dst->timeout = src->timeout / HZ; |
| 2439 | dst->netmask = src->netmask; | 2457 | dst->netmask = src->netmask; |
| @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, | |||
| 2892 | struct ip_vs_flags flags = { .flags = svc->flags, | 2910 | struct ip_vs_flags flags = { .flags = svc->flags, |
| 2893 | .mask = ~0 }; | 2911 | .mask = ~0 }; |
| 2894 | struct ip_vs_kstats kstats; | 2912 | struct ip_vs_kstats kstats; |
| 2913 | char *sched_name; | ||
| 2895 | 2914 | ||
| 2896 | nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); | 2915 | nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); |
| 2897 | if (!nl_service) | 2916 | if (!nl_service) |
| @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, | |||
| 2910 | } | 2929 | } |
| 2911 | 2930 | ||
| 2912 | sched = rcu_dereference_protected(svc->scheduler, 1); | 2931 | sched = rcu_dereference_protected(svc->scheduler, 1); |
| 2932 | sched_name = sched ? sched->name : "none"; | ||
| 2913 | pe = rcu_dereference_protected(svc->pe, 1); | 2933 | pe = rcu_dereference_protected(svc->pe, 1); |
| 2914 | if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || | 2934 | if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || |
| 2915 | (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || | 2935 | (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || |
| 2916 | nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || | 2936 | nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || |
| 2917 | nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || | 2937 | nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || |
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c71f39..7e8141647943 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c | |||
| @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, | |||
| 74 | 74 | ||
| 75 | if (sched->done_service) | 75 | if (sched->done_service) |
| 76 | sched->done_service(svc); | 76 | sched->done_service(svc); |
| 77 | /* svc->scheduler can not be set to NULL */ | 77 | /* svc->scheduler can be set to NULL only by caller */ |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | 80 | ||
| @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) | |||
| 147 | 147 | ||
| 148 | void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) | 148 | void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) |
| 149 | { | 149 | { |
| 150 | struct ip_vs_scheduler *sched; | 150 | struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); |
| 151 | char *sched_name = sched ? sched->name : "none"; | ||
| 151 | 152 | ||
| 152 | sched = rcu_dereference(svc->scheduler); | ||
| 153 | if (svc->fwmark) { | 153 | if (svc->fwmark) { |
| 154 | IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", | 154 | IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", |
| 155 | sched->name, svc->fwmark, svc->fwmark, msg); | 155 | sched_name, svc->fwmark, svc->fwmark, msg); |
| 156 | #ifdef CONFIG_IP_VS_IPV6 | 156 | #ifdef CONFIG_IP_VS_IPV6 |
| 157 | } else if (svc->af == AF_INET6) { | 157 | } else if (svc->af == AF_INET6) { |
| 158 | IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", | 158 | IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", |
| 159 | sched->name, ip_vs_proto_name(svc->protocol), | 159 | sched_name, ip_vs_proto_name(svc->protocol), |
| 160 | &svc->addr.in6, ntohs(svc->port), msg); | 160 | &svc->addr.in6, ntohs(svc->port), msg); |
| 161 | #endif | 161 | #endif |
| 162 | } else { | 162 | } else { |
| 163 | IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", | 163 | IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", |
| 164 | sched->name, ip_vs_proto_name(svc->protocol), | 164 | sched_name, ip_vs_proto_name(svc->protocol), |
| 165 | &svc->addr.ip, ntohs(svc->port), msg); | 165 | &svc->addr.ip, ntohs(svc->port), msg); |
| 166 | } | 166 | } |
| 167 | } | 167 | } |
