diff options
author | Eric Dumazet <edumazet@google.com> | 2013-06-07 11:48:57 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-06-12 03:25:23 -0400 |
commit | e9897071350bd9d94a56b5b6f79c85b1a98fc7e7 (patch) | |
tree | d01026afb4450ef685722cd7a0fe1815336170ca | |
parent | 64153ce0a7b61b2a5cacb01805cbf670142339e9 (diff) |
igmp: hash a hash table to speedup ip_check_mc_rcu()
After IP route cache removal, multicast applications using
a lot of multicast addresses hit a O(N) behavior in ip_check_mc_rcu()
Add a per in_device hash table to get faster lookup.
This hash table is created only if the number of items in mc_list is
above 4.
Reported-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/igmp.h | 1 | ||||
-rw-r--r-- | include/linux/inetdevice.h | 5 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 1 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 73 |
4 files changed, 77 insertions, 3 deletions
diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 7f2bf1518480..e3362b5f13e8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h | |||
@@ -84,6 +84,7 @@ struct ip_mc_list { | |||
84 | struct ip_mc_list *next; | 84 | struct ip_mc_list *next; |
85 | struct ip_mc_list __rcu *next_rcu; | 85 | struct ip_mc_list __rcu *next_rcu; |
86 | }; | 86 | }; |
87 | struct ip_mc_list __rcu *next_hash; | ||
87 | struct timer_list timer; | 88 | struct timer_list timer; |
88 | int users; | 89 | int users; |
89 | atomic_t refcnt; | 90 | atomic_t refcnt; |
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ea1e3b863890..b99cd23f3474 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h | |||
@@ -50,12 +50,17 @@ struct ipv4_devconf { | |||
50 | DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); | 50 | DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); |
51 | }; | 51 | }; |
52 | 52 | ||
53 | #define MC_HASH_SZ_LOG 9 | ||
54 | |||
53 | struct in_device { | 55 | struct in_device { |
54 | struct net_device *dev; | 56 | struct net_device *dev; |
55 | atomic_t refcnt; | 57 | atomic_t refcnt; |
56 | int dead; | 58 | int dead; |
57 | struct in_ifaddr *ifa_list; /* IP ifaddr chain */ | 59 | struct in_ifaddr *ifa_list; /* IP ifaddr chain */ |
60 | |||
58 | struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ | 61 | struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ |
62 | struct ip_mc_list __rcu * __rcu *mc_hash; | ||
63 | |||
59 | int mc_count; /* Number of installed mcasts */ | 64 | int mc_count; /* Number of installed mcasts */ |
60 | spinlock_t mc_tomb_lock; | 65 | spinlock_t mc_tomb_lock; |
61 | struct ip_mc_list *mc_tomb; | 66 | struct ip_mc_list *mc_tomb; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b047e2d8a614..3469506c106d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) | |||
215 | 215 | ||
216 | WARN_ON(idev->ifa_list); | 216 | WARN_ON(idev->ifa_list); |
217 | WARN_ON(idev->mc_list); | 217 | WARN_ON(idev->mc_list); |
218 | kfree(rcu_dereference_protected(idev->mc_hash, 1)); | ||
218 | #ifdef NET_REFCNT_DEBUG | 219 | #ifdef NET_REFCNT_DEBUG |
219 | pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); | 220 | pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); |
220 | #endif | 221 | #endif |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 450f625361e4..f72011df9c59 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) | |||
1217 | * Multicast list managers | 1217 | * Multicast list managers |
1218 | */ | 1218 | */ |
1219 | 1219 | ||
1220 | static u32 ip_mc_hash(const struct ip_mc_list *im) | ||
1221 | { | ||
1222 | return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG); | ||
1223 | } | ||
1224 | |||
1225 | static void ip_mc_hash_add(struct in_device *in_dev, | ||
1226 | struct ip_mc_list *im) | ||
1227 | { | ||
1228 | struct ip_mc_list __rcu **mc_hash; | ||
1229 | u32 hash; | ||
1230 | |||
1231 | mc_hash = rtnl_dereference(in_dev->mc_hash); | ||
1232 | if (mc_hash) { | ||
1233 | hash = ip_mc_hash(im); | ||
1234 | im->next_hash = rtnl_dereference(mc_hash[hash]); | ||
1235 | rcu_assign_pointer(mc_hash[hash], im); | ||
1236 | return; | ||
1237 | } | ||
1238 | |||
1239 | /* do not use a hash table for small number of items */ | ||
1240 | if (in_dev->mc_count < 4) | ||
1241 | return; | ||
1242 | |||
1243 | mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, | ||
1244 | GFP_KERNEL); | ||
1245 | if (!mc_hash) | ||
1246 | return; | ||
1247 | |||
1248 | for_each_pmc_rtnl(in_dev, im) { | ||
1249 | hash = ip_mc_hash(im); | ||
1250 | im->next_hash = rtnl_dereference(mc_hash[hash]); | ||
1251 | RCU_INIT_POINTER(mc_hash[hash], im); | ||
1252 | } | ||
1253 | |||
1254 | rcu_assign_pointer(in_dev->mc_hash, mc_hash); | ||
1255 | } | ||
1256 | |||
1257 | static void ip_mc_hash_remove(struct in_device *in_dev, | ||
1258 | struct ip_mc_list *im) | ||
1259 | { | ||
1260 | struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); | ||
1261 | struct ip_mc_list *aux; | ||
1262 | |||
1263 | if (!mc_hash) | ||
1264 | return; | ||
1265 | mc_hash += ip_mc_hash(im); | ||
1266 | while ((aux = rtnl_dereference(*mc_hash)) != im) | ||
1267 | mc_hash = &aux->next_hash; | ||
1268 | *mc_hash = im->next_hash; | ||
1269 | } | ||
1270 | |||
1220 | 1271 | ||
1221 | /* | 1272 | /* |
1222 | * A socket has joined a multicast group on device dev. | 1273 | * A socket has joined a multicast group on device dev. |
@@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1258 | in_dev->mc_count++; | 1309 | in_dev->mc_count++; |
1259 | rcu_assign_pointer(in_dev->mc_list, im); | 1310 | rcu_assign_pointer(in_dev->mc_list, im); |
1260 | 1311 | ||
1312 | ip_mc_hash_add(in_dev, im); | ||
1313 | |||
1261 | #ifdef CONFIG_IP_MULTICAST | 1314 | #ifdef CONFIG_IP_MULTICAST |
1262 | igmpv3_del_delrec(in_dev, im->multiaddr); | 1315 | igmpv3_del_delrec(in_dev, im->multiaddr); |
1263 | #endif | 1316 | #endif |
@@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | |||
1314 | ip = &i->next_rcu) { | 1367 | ip = &i->next_rcu) { |
1315 | if (i->multiaddr == addr) { | 1368 | if (i->multiaddr == addr) { |
1316 | if (--i->users == 0) { | 1369 | if (--i->users == 0) { |
1370 | ip_mc_hash_remove(in_dev, i); | ||
1317 | *ip = i->next_rcu; | 1371 | *ip = i->next_rcu; |
1318 | in_dev->mc_count--; | 1372 | in_dev->mc_count--; |
1319 | igmp_group_dropped(i); | 1373 | igmp_group_dropped(i); |
@@ -2321,12 +2375,25 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2321 | int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | 2375 | int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) |
2322 | { | 2376 | { |
2323 | struct ip_mc_list *im; | 2377 | struct ip_mc_list *im; |
2378 | struct ip_mc_list __rcu **mc_hash; | ||
2324 | struct ip_sf_list *psf; | 2379 | struct ip_sf_list *psf; |
2325 | int rv = 0; | 2380 | int rv = 0; |
2326 | 2381 | ||
2327 | for_each_pmc_rcu(in_dev, im) { | 2382 | mc_hash = rcu_dereference(in_dev->mc_hash); |
2328 | if (im->multiaddr == mc_addr) | 2383 | if (mc_hash) { |
2329 | break; | 2384 | u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG); |
2385 | |||
2386 | for (im = rcu_dereference(mc_hash[hash]); | ||
2387 | im != NULL; | ||
2388 | im = rcu_dereference(im->next_hash)) { | ||
2389 | if (im->multiaddr == mc_addr) | ||
2390 | break; | ||
2391 | } | ||
2392 | } else { | ||
2393 | for_each_pmc_rcu(in_dev, im) { | ||
2394 | if (im->multiaddr == mc_addr) | ||
2395 | break; | ||
2396 | } | ||
2330 | } | 2397 | } |
2331 | if (im && proto == IPPROTO_IGMP) { | 2398 | if (im && proto == IPPROTO_IGMP) { |
2332 | rv = 1; | 2399 | rv = 1; |