diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-11-23 08:12:15 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-11-24 14:16:42 -0500 |
commit | 456b61bca8ee324ab6c18b065e632c9a8c88aa39 (patch) | |
tree | d1d458d78b6fffa32558ffbf7cdfc49a316a660c | |
parent | 2757a15f08adbed9480c30bdb4e9a0bbf2b6f33a (diff) |
ipv6: mcast: RCU conversion
ipv6_sk_mc_lock rwlock becomes a spinlock.
readers (inet6_mc_check()) now takes rcu_read_lock() instead of read
lock. Writers dont need to disable BH anymore.
struct ipv6_mc_socklist objects are reclaimed after one RCU grace
period.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/ipv6.h | 2 | ||||
-rw-r--r-- | include/net/if_inet6.h | 3 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 75 |
3 files changed, 47 insertions, 33 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8e429d0e0405..0c997767429a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h | |||
@@ -364,7 +364,7 @@ struct ipv6_pinfo { | |||
364 | 364 | ||
365 | __u32 dst_cookie; | 365 | __u32 dst_cookie; |
366 | 366 | ||
367 | struct ipv6_mc_socklist *ipv6_mc_list; | 367 | struct ipv6_mc_socklist __rcu *ipv6_mc_list; |
368 | struct ipv6_ac_socklist *ipv6_ac_list; | 368 | struct ipv6_ac_socklist *ipv6_ac_list; |
369 | struct ipv6_fl_socklist *ipv6_fl_list; | 369 | struct ipv6_fl_socklist *ipv6_fl_list; |
370 | 370 | ||
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f95ff8d9aa47..04977eefb0ee 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h | |||
@@ -89,10 +89,11 @@ struct ip6_sf_socklist { | |||
89 | struct ipv6_mc_socklist { | 89 | struct ipv6_mc_socklist { |
90 | struct in6_addr addr; | 90 | struct in6_addr addr; |
91 | int ifindex; | 91 | int ifindex; |
92 | struct ipv6_mc_socklist *next; | 92 | struct ipv6_mc_socklist __rcu *next; |
93 | rwlock_t sflock; | 93 | rwlock_t sflock; |
94 | unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ | 94 | unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ |
95 | struct ip6_sf_socklist *sflist; | 95 | struct ip6_sf_socklist *sflist; |
96 | struct rcu_head rcu; | ||
96 | }; | 97 | }; |
97 | 98 | ||
98 | struct ip6_sf_list { | 99 | struct ip6_sf_list { |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9c5074528a71..49f986d626a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { | |||
82 | static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; | 82 | static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; |
83 | 83 | ||
84 | /* Big mc list lock for all the sockets */ | 84 | /* Big mc list lock for all the sockets */ |
85 | static DEFINE_RWLOCK(ipv6_sk_mc_lock); | 85 | static DEFINE_SPINLOCK(ipv6_sk_mc_lock); |
86 | 86 | ||
87 | static void igmp6_join_group(struct ifmcaddr6 *ma); | 87 | static void igmp6_join_group(struct ifmcaddr6 *ma); |
88 | static void igmp6_leave_group(struct ifmcaddr6 *ma); | 88 | static void igmp6_leave_group(struct ifmcaddr6 *ma); |
@@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; | |||
123 | * socket join on multicast group | 123 | * socket join on multicast group |
124 | */ | 124 | */ |
125 | 125 | ||
126 | #define for_each_pmc_rcu(np, pmc) \ | ||
127 | for (pmc = rcu_dereference(np->ipv6_mc_list); \ | ||
128 | pmc != NULL; \ | ||
129 | pmc = rcu_dereference(pmc->next)) | ||
130 | |||
126 | int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) | 131 | int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) |
127 | { | 132 | { |
128 | struct net_device *dev = NULL; | 133 | struct net_device *dev = NULL; |
@@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) | |||
134 | if (!ipv6_addr_is_multicast(addr)) | 139 | if (!ipv6_addr_is_multicast(addr)) |
135 | return -EINVAL; | 140 | return -EINVAL; |
136 | 141 | ||
137 | read_lock_bh(&ipv6_sk_mc_lock); | 142 | rcu_read_lock(); |
138 | for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { | 143 | for_each_pmc_rcu(np, mc_lst) { |
139 | if ((ifindex == 0 || mc_lst->ifindex == ifindex) && | 144 | if ((ifindex == 0 || mc_lst->ifindex == ifindex) && |
140 | ipv6_addr_equal(&mc_lst->addr, addr)) { | 145 | ipv6_addr_equal(&mc_lst->addr, addr)) { |
141 | read_unlock_bh(&ipv6_sk_mc_lock); | 146 | rcu_read_unlock(); |
142 | return -EADDRINUSE; | 147 | return -EADDRINUSE; |
143 | } | 148 | } |
144 | } | 149 | } |
145 | read_unlock_bh(&ipv6_sk_mc_lock); | 150 | rcu_read_unlock(); |
146 | 151 | ||
147 | mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); | 152 | mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); |
148 | 153 | ||
@@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) | |||
186 | return err; | 191 | return err; |
187 | } | 192 | } |
188 | 193 | ||
189 | write_lock_bh(&ipv6_sk_mc_lock); | 194 | spin_lock(&ipv6_sk_mc_lock); |
190 | mc_lst->next = np->ipv6_mc_list; | 195 | mc_lst->next = np->ipv6_mc_list; |
191 | np->ipv6_mc_list = mc_lst; | 196 | rcu_assign_pointer(np->ipv6_mc_list, mc_lst); |
192 | write_unlock_bh(&ipv6_sk_mc_lock); | 197 | spin_unlock(&ipv6_sk_mc_lock); |
193 | 198 | ||
194 | rcu_read_unlock(); | 199 | rcu_read_unlock(); |
195 | 200 | ||
196 | return 0; | 201 | return 0; |
197 | } | 202 | } |
198 | 203 | ||
204 | static void ipv6_mc_socklist_reclaim(struct rcu_head *head) | ||
205 | { | ||
206 | kfree(container_of(head, struct ipv6_mc_socklist, rcu)); | ||
207 | } | ||
199 | /* | 208 | /* |
200 | * socket leave on multicast group | 209 | * socket leave on multicast group |
201 | */ | 210 | */ |
202 | int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) | 211 | int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) |
203 | { | 212 | { |
204 | struct ipv6_pinfo *np = inet6_sk(sk); | 213 | struct ipv6_pinfo *np = inet6_sk(sk); |
205 | struct ipv6_mc_socklist *mc_lst, **lnk; | 214 | struct ipv6_mc_socklist *mc_lst; |
215 | struct ipv6_mc_socklist __rcu **lnk; | ||
206 | struct net *net = sock_net(sk); | 216 | struct net *net = sock_net(sk); |
207 | 217 | ||
208 | write_lock_bh(&ipv6_sk_mc_lock); | 218 | spin_lock(&ipv6_sk_mc_lock); |
209 | for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { | 219 | for (lnk = &np->ipv6_mc_list; |
220 | (mc_lst = rcu_dereference_protected(*lnk, | ||
221 | lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; | ||
222 | lnk = &mc_lst->next) { | ||
210 | if ((ifindex == 0 || mc_lst->ifindex == ifindex) && | 223 | if ((ifindex == 0 || mc_lst->ifindex == ifindex) && |
211 | ipv6_addr_equal(&mc_lst->addr, addr)) { | 224 | ipv6_addr_equal(&mc_lst->addr, addr)) { |
212 | struct net_device *dev; | 225 | struct net_device *dev; |
213 | 226 | ||
214 | *lnk = mc_lst->next; | 227 | *lnk = mc_lst->next; |
215 | write_unlock_bh(&ipv6_sk_mc_lock); | 228 | spin_unlock(&ipv6_sk_mc_lock); |
216 | 229 | ||
217 | rcu_read_lock(); | 230 | rcu_read_lock(); |
218 | dev = dev_get_by_index_rcu(net, mc_lst->ifindex); | 231 | dev = dev_get_by_index_rcu(net, mc_lst->ifindex); |
@@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) | |||
225 | } else | 238 | } else |
226 | (void) ip6_mc_leave_src(sk, mc_lst, NULL); | 239 | (void) ip6_mc_leave_src(sk, mc_lst, NULL); |
227 | rcu_read_unlock(); | 240 | rcu_read_unlock(); |
228 | sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); | 241 | atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); |
242 | call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); | ||
229 | return 0; | 243 | return 0; |
230 | } | 244 | } |
231 | } | 245 | } |
232 | write_unlock_bh(&ipv6_sk_mc_lock); | 246 | spin_unlock(&ipv6_sk_mc_lock); |
233 | 247 | ||
234 | return -EADDRNOTAVAIL; | 248 | return -EADDRNOTAVAIL; |
235 | } | 249 | } |
@@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) | |||
272 | struct ipv6_mc_socklist *mc_lst; | 286 | struct ipv6_mc_socklist *mc_lst; |
273 | struct net *net = sock_net(sk); | 287 | struct net *net = sock_net(sk); |
274 | 288 | ||
275 | write_lock_bh(&ipv6_sk_mc_lock); | 289 | spin_lock(&ipv6_sk_mc_lock); |
276 | while ((mc_lst = np->ipv6_mc_list) != NULL) { | 290 | while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, |
291 | lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { | ||
277 | struct net_device *dev; | 292 | struct net_device *dev; |
278 | 293 | ||
279 | np->ipv6_mc_list = mc_lst->next; | 294 | np->ipv6_mc_list = mc_lst->next; |
280 | write_unlock_bh(&ipv6_sk_mc_lock); | 295 | spin_unlock(&ipv6_sk_mc_lock); |
281 | 296 | ||
282 | rcu_read_lock(); | 297 | rcu_read_lock(); |
283 | dev = dev_get_by_index_rcu(net, mc_lst->ifindex); | 298 | dev = dev_get_by_index_rcu(net, mc_lst->ifindex); |
@@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) | |||
290 | } else | 305 | } else |
291 | (void) ip6_mc_leave_src(sk, mc_lst, NULL); | 306 | (void) ip6_mc_leave_src(sk, mc_lst, NULL); |
292 | rcu_read_unlock(); | 307 | rcu_read_unlock(); |
293 | sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); | ||
294 | 308 | ||
295 | write_lock_bh(&ipv6_sk_mc_lock); | 309 | atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); |
310 | call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); | ||
311 | |||
312 | spin_lock(&ipv6_sk_mc_lock); | ||
296 | } | 313 | } |
297 | write_unlock_bh(&ipv6_sk_mc_lock); | 314 | spin_unlock(&ipv6_sk_mc_lock); |
298 | } | 315 | } |
299 | 316 | ||
300 | int ip6_mc_source(int add, int omode, struct sock *sk, | 317 | int ip6_mc_source(int add, int omode, struct sock *sk, |
@@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, | |||
328 | 345 | ||
329 | err = -EADDRNOTAVAIL; | 346 | err = -EADDRNOTAVAIL; |
330 | 347 | ||
331 | read_lock(&ipv6_sk_mc_lock); | 348 | for_each_pmc_rcu(inet6, pmc) { |
332 | for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { | ||
333 | if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) | 349 | if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) |
334 | continue; | 350 | continue; |
335 | if (ipv6_addr_equal(&pmc->addr, group)) | 351 | if (ipv6_addr_equal(&pmc->addr, group)) |
@@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, | |||
428 | done: | 444 | done: |
429 | if (pmclocked) | 445 | if (pmclocked) |
430 | write_unlock(&pmc->sflock); | 446 | write_unlock(&pmc->sflock); |
431 | read_unlock(&ipv6_sk_mc_lock); | ||
432 | read_unlock_bh(&idev->lock); | 447 | read_unlock_bh(&idev->lock); |
433 | rcu_read_unlock(); | 448 | rcu_read_unlock(); |
434 | if (leavegroup) | 449 | if (leavegroup) |
@@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) | |||
466 | dev = idev->dev; | 481 | dev = idev->dev; |
467 | 482 | ||
468 | err = 0; | 483 | err = 0; |
469 | read_lock(&ipv6_sk_mc_lock); | ||
470 | 484 | ||
471 | if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { | 485 | if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { |
472 | leavegroup = 1; | 486 | leavegroup = 1; |
473 | goto done; | 487 | goto done; |
474 | } | 488 | } |
475 | 489 | ||
476 | for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { | 490 | for_each_pmc_rcu(inet6, pmc) { |
477 | if (pmc->ifindex != gsf->gf_interface) | 491 | if (pmc->ifindex != gsf->gf_interface) |
478 | continue; | 492 | continue; |
479 | if (ipv6_addr_equal(&pmc->addr, group)) | 493 | if (ipv6_addr_equal(&pmc->addr, group)) |
@@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) | |||
521 | write_unlock(&pmc->sflock); | 535 | write_unlock(&pmc->sflock); |
522 | err = 0; | 536 | err = 0; |
523 | done: | 537 | done: |
524 | read_unlock(&ipv6_sk_mc_lock); | ||
525 | read_unlock_bh(&idev->lock); | 538 | read_unlock_bh(&idev->lock); |
526 | rcu_read_unlock(); | 539 | rcu_read_unlock(); |
527 | if (leavegroup) | 540 | if (leavegroup) |
@@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, | |||
562 | * so reading the list is safe. | 575 | * so reading the list is safe. |
563 | */ | 576 | */ |
564 | 577 | ||
565 | for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { | 578 | for_each_pmc_rcu(inet6, pmc) { |
566 | if (pmc->ifindex != gsf->gf_interface) | 579 | if (pmc->ifindex != gsf->gf_interface) |
567 | continue; | 580 | continue; |
568 | if (ipv6_addr_equal(group, &pmc->addr)) | 581 | if (ipv6_addr_equal(group, &pmc->addr)) |
@@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, | |||
612 | struct ip6_sf_socklist *psl; | 625 | struct ip6_sf_socklist *psl; |
613 | int rv = 1; | 626 | int rv = 1; |
614 | 627 | ||
615 | read_lock(&ipv6_sk_mc_lock); | 628 | rcu_read_lock(); |
616 | for (mc = np->ipv6_mc_list; mc; mc = mc->next) { | 629 | for_each_pmc_rcu(np, mc) { |
617 | if (ipv6_addr_equal(&mc->addr, mc_addr)) | 630 | if (ipv6_addr_equal(&mc->addr, mc_addr)) |
618 | break; | 631 | break; |
619 | } | 632 | } |
620 | if (!mc) { | 633 | if (!mc) { |
621 | read_unlock(&ipv6_sk_mc_lock); | 634 | rcu_read_unlock(); |
622 | return 1; | 635 | return 1; |
623 | } | 636 | } |
624 | read_lock(&mc->sflock); | 637 | read_lock(&mc->sflock); |
@@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, | |||
638 | rv = 0; | 651 | rv = 0; |
639 | } | 652 | } |
640 | read_unlock(&mc->sflock); | 653 | read_unlock(&mc->sflock); |
641 | read_unlock(&ipv6_sk_mc_lock); | 654 | rcu_read_unlock(); |
642 | 655 | ||
643 | return rv; | 656 | return rv; |
644 | } | 657 | } |