diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2009-11-08 05:18:44 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-11-08 23:53:08 -0500 |
commit | 1240d1373cd7f874dd0f3057c3e9643e71ef75c6 (patch) | |
tree | 5bb5b7f72be25210a899e60f9137a4dad899bf05 /net/ipv4/udp.c | |
parent | fddc17defa22d8caba1cdfb2e22b50bb4b9f35c0 (diff) |
ipv4: udp: Optimise multicast reception
UDP multicast rx path is a bit complex and can hold a spinlock
for a long time.
Using a small (32 or 64 entries) stack of socket pointers can help
to perform expensive operations (skb_clone(), udp_queue_rcv_skb())
outside of the lock, in most cases.
It's also a base for a future RCU conversion of multicast recption.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Lucian Adrian Grijincu <lgrijincu@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 76 |
1 files changed, 50 insertions, 26 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dd7f3d20989a..9d9072c6cce7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -1329,49 +1329,73 @@ drop: | |||
1329 | return -1; | 1329 | return -1; |
1330 | } | 1330 | } |
1331 | 1331 | ||
1332 | |||
1333 | static void flush_stack(struct sock **stack, unsigned int count, | ||
1334 | struct sk_buff *skb, unsigned int final) | ||
1335 | { | ||
1336 | unsigned int i; | ||
1337 | struct sk_buff *skb1 = NULL; | ||
1338 | |||
1339 | for (i = 0; i < count; i++) { | ||
1340 | if (likely(skb1 == NULL)) | ||
1341 | skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); | ||
1342 | |||
1343 | if (skb1 && udp_queue_rcv_skb(stack[i], skb1) <= 0) | ||
1344 | skb1 = NULL; | ||
1345 | } | ||
1346 | if (unlikely(skb1)) | ||
1347 | kfree_skb(skb1); | ||
1348 | } | ||
1349 | |||
1332 | /* | 1350 | /* |
1333 | * Multicasts and broadcasts go to each listener. | 1351 | * Multicasts and broadcasts go to each listener. |
1334 | * | 1352 | * |
1335 | * Note: called only from the BH handler context, | 1353 | * Note: called only from the BH handler context. |
1336 | * so we don't need to lock the hashes. | ||
1337 | */ | 1354 | */ |
1338 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 1355 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
1339 | struct udphdr *uh, | 1356 | struct udphdr *uh, |
1340 | __be32 saddr, __be32 daddr, | 1357 | __be32 saddr, __be32 daddr, |
1341 | struct udp_table *udptable) | 1358 | struct udp_table *udptable) |
1342 | { | 1359 | { |
1343 | struct sock *sk; | 1360 | struct sock *sk, *stack[256 / sizeof(struct sock *)]; |
1344 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); | 1361 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); |
1345 | int dif; | 1362 | int dif; |
1363 | unsigned int i, count = 0; | ||
1346 | 1364 | ||
1347 | spin_lock(&hslot->lock); | 1365 | spin_lock(&hslot->lock); |
1348 | sk = sk_nulls_head(&hslot->head); | 1366 | sk = sk_nulls_head(&hslot->head); |
1349 | dif = skb->dev->ifindex; | 1367 | dif = skb->dev->ifindex; |
1350 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1368 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1351 | if (sk) { | 1369 | while (sk) { |
1352 | struct sock *sknext = NULL; | 1370 | stack[count++] = sk; |
1353 | 1371 | sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | |
1354 | do { | 1372 | daddr, uh->source, saddr, dif); |
1355 | struct sk_buff *skb1 = skb; | 1373 | if (unlikely(count == ARRAY_SIZE(stack))) { |
1356 | 1374 | if (!sk) | |
1357 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | 1375 | break; |
1358 | daddr, uh->source, saddr, | 1376 | flush_stack(stack, count, skb, ~0); |
1359 | dif); | 1377 | count = 0; |
1360 | if (sknext) | 1378 | } |
1361 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1379 | } |
1362 | 1380 | /* | |
1363 | if (skb1) { | 1381 | * before releasing chain lock, we must take a reference on sockets |
1364 | int ret = udp_queue_rcv_skb(sk, skb1); | 1382 | */ |
1365 | if (ret > 0) | 1383 | for (i = 0; i < count; i++) |
1366 | /* we should probably re-process instead | 1384 | sock_hold(stack[i]); |
1367 | * of dropping packets here. */ | 1385 | |
1368 | kfree_skb(skb1); | ||
1369 | } | ||
1370 | sk = sknext; | ||
1371 | } while (sknext); | ||
1372 | } else | ||
1373 | consume_skb(skb); | ||
1374 | spin_unlock(&hslot->lock); | 1386 | spin_unlock(&hslot->lock); |
1387 | |||
1388 | /* | ||
1389 | * do the slow work with no lock held | ||
1390 | */ | ||
1391 | if (count) { | ||
1392 | flush_stack(stack, count, skb, count - 1); | ||
1393 | |||
1394 | for (i = 0; i < count; i++) | ||
1395 | sock_put(stack[i]); | ||
1396 | } else { | ||
1397 | kfree_skb(skb); | ||
1398 | } | ||
1375 | return 0; | 1399 | return 0; |
1376 | } | 1400 | } |
1377 | 1401 | ||