aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-06-07 11:48:57 -0400
committerDavid S. Miller <davem@davemloft.net>2013-06-12 03:25:23 -0400
commite9897071350bd9d94a56b5b6f79c85b1a98fc7e7 (patch)
treed01026afb4450ef685722cd7a0fe1815336170ca
parent64153ce0a7b61b2a5cacb01805cbf670142339e9 (diff)
igmp: hash a hash table to speedup ip_check_mc_rcu()
After IP route cache removal, multicast applications using a lot of multicast addresses hit a O(N) behavior in ip_check_mc_rcu() Add a per in_device hash table to get faster lookup. This hash table is created only if the number of items in mc_list is above 4. Reported-by: Shawn Bohrer <sbohrer@rgmadvisors.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Tested-by: Shawn Bohrer <sbohrer@rgmadvisors.com> Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/igmp.h1
-rw-r--r--include/linux/inetdevice.h5
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/igmp.c73
4 files changed, 77 insertions, 3 deletions
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 7f2bf1518480..e3362b5f13e8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -84,6 +84,7 @@ struct ip_mc_list {
84 struct ip_mc_list *next; 84 struct ip_mc_list *next;
85 struct ip_mc_list __rcu *next_rcu; 85 struct ip_mc_list __rcu *next_rcu;
86 }; 86 };
87 struct ip_mc_list __rcu *next_hash;
87 struct timer_list timer; 88 struct timer_list timer;
88 int users; 89 int users;
89 atomic_t refcnt; 90 atomic_t refcnt;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ea1e3b863890..b99cd23f3474 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -50,12 +50,17 @@ struct ipv4_devconf {
50 DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); 50 DECLARE_BITMAP(state, IPV4_DEVCONF_MAX);
51}; 51};
52 52
53#define MC_HASH_SZ_LOG 9
54
53struct in_device { 55struct in_device {
54 struct net_device *dev; 56 struct net_device *dev;
55 atomic_t refcnt; 57 atomic_t refcnt;
56 int dead; 58 int dead;
57 struct in_ifaddr *ifa_list; /* IP ifaddr chain */ 59 struct in_ifaddr *ifa_list; /* IP ifaddr chain */
60
58 struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ 61 struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */
62 struct ip_mc_list __rcu * __rcu *mc_hash;
63
59 int mc_count; /* Number of installed mcasts */ 64 int mc_count; /* Number of installed mcasts */
60 spinlock_t mc_tomb_lock; 65 spinlock_t mc_tomb_lock;
61 struct ip_mc_list *mc_tomb; 66 struct ip_mc_list *mc_tomb;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b047e2d8a614..3469506c106d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev)
215 215
216 WARN_ON(idev->ifa_list); 216 WARN_ON(idev->ifa_list);
217 WARN_ON(idev->mc_list); 217 WARN_ON(idev->mc_list);
218 kfree(rcu_dereference_protected(idev->mc_hash, 1));
218#ifdef NET_REFCNT_DEBUG 219#ifdef NET_REFCNT_DEBUG
219 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 220 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220#endif 221#endif
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 450f625361e4..f72011df9c59 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im)
1217 * Multicast list managers 1217 * Multicast list managers
1218 */ 1218 */
1219 1219
1220static u32 ip_mc_hash(const struct ip_mc_list *im)
1221{
1222 return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG);
1223}
1224
1225static void ip_mc_hash_add(struct in_device *in_dev,
1226 struct ip_mc_list *im)
1227{
1228 struct ip_mc_list __rcu **mc_hash;
1229 u32 hash;
1230
1231 mc_hash = rtnl_dereference(in_dev->mc_hash);
1232 if (mc_hash) {
1233 hash = ip_mc_hash(im);
1234 im->next_hash = rtnl_dereference(mc_hash[hash]);
1235 rcu_assign_pointer(mc_hash[hash], im);
1236 return;
1237 }
1238
1239 /* do not use a hash table for small number of items */
1240 if (in_dev->mc_count < 4)
1241 return;
1242
1243 mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG,
1244 GFP_KERNEL);
1245 if (!mc_hash)
1246 return;
1247
1248 for_each_pmc_rtnl(in_dev, im) {
1249 hash = ip_mc_hash(im);
1250 im->next_hash = rtnl_dereference(mc_hash[hash]);
1251 RCU_INIT_POINTER(mc_hash[hash], im);
1252 }
1253
1254 rcu_assign_pointer(in_dev->mc_hash, mc_hash);
1255}
1256
1257static void ip_mc_hash_remove(struct in_device *in_dev,
1258 struct ip_mc_list *im)
1259{
1260 struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash);
1261 struct ip_mc_list *aux;
1262
1263 if (!mc_hash)
1264 return;
1265 mc_hash += ip_mc_hash(im);
1266 while ((aux = rtnl_dereference(*mc_hash)) != im)
1267 mc_hash = &aux->next_hash;
1268 *mc_hash = im->next_hash;
1269}
1270
1220 1271
1221/* 1272/*
1222 * A socket has joined a multicast group on device dev. 1273 * A socket has joined a multicast group on device dev.
@@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1258 in_dev->mc_count++; 1309 in_dev->mc_count++;
1259 rcu_assign_pointer(in_dev->mc_list, im); 1310 rcu_assign_pointer(in_dev->mc_list, im);
1260 1311
1312 ip_mc_hash_add(in_dev, im);
1313
1261#ifdef CONFIG_IP_MULTICAST 1314#ifdef CONFIG_IP_MULTICAST
1262 igmpv3_del_delrec(in_dev, im->multiaddr); 1315 igmpv3_del_delrec(in_dev, im->multiaddr);
1263#endif 1316#endif
@@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1314 ip = &i->next_rcu) { 1367 ip = &i->next_rcu) {
1315 if (i->multiaddr == addr) { 1368 if (i->multiaddr == addr) {
1316 if (--i->users == 0) { 1369 if (--i->users == 0) {
1370 ip_mc_hash_remove(in_dev, i);
1317 *ip = i->next_rcu; 1371 *ip = i->next_rcu;
1318 in_dev->mc_count--; 1372 in_dev->mc_count--;
1319 igmp_group_dropped(i); 1373 igmp_group_dropped(i);
@@ -2321,12 +2375,25 @@ void ip_mc_drop_socket(struct sock *sk)
2321int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) 2375int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
2322{ 2376{
2323 struct ip_mc_list *im; 2377 struct ip_mc_list *im;
2378 struct ip_mc_list __rcu **mc_hash;
2324 struct ip_sf_list *psf; 2379 struct ip_sf_list *psf;
2325 int rv = 0; 2380 int rv = 0;
2326 2381
2327 for_each_pmc_rcu(in_dev, im) { 2382 mc_hash = rcu_dereference(in_dev->mc_hash);
2328 if (im->multiaddr == mc_addr) 2383 if (mc_hash) {
2329 break; 2384 u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG);
2385
2386 for (im = rcu_dereference(mc_hash[hash]);
2387 im != NULL;
2388 im = rcu_dereference(im->next_hash)) {
2389 if (im->multiaddr == mc_addr)
2390 break;
2391 }
2392 } else {
2393 for_each_pmc_rcu(in_dev, im) {
2394 if (im->multiaddr == mc_addr)
2395 break;
2396 }
2330 } 2397 }
2331 if (im && proto == IPPROTO_IGMP) { 2398 if (im && proto == IPPROTO_IGMP) {
2332 rv = 1; 2399 rv = 1;