aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/udp.c
diff options
context:
space:
mode:
authorDavid Held <drheld@google.com>2014-07-15 23:28:32 -0400
committerDavid S. Miller <davem@davemloft.net>2014-07-17 02:29:52 -0400
commit2dc41cff7545d55c6294525c811594576f8e119c (patch)
treecab09b28d188606139b1b50b661f42da157c52ba /net/ipv6/udp.c
parent5cf3d46192fccf68b4a4759e4d7346e41c669a76 (diff)
udp: Use hash2 for long hash1 chains in __udp*_lib_mcast_deliver.
Many multicast sources can have the same port which can result in a very large list when hashing by port only. Hash by address and port instead if this is the case. This makes multicast more similar to unicast. On a 24-core machine receiving from 500 multicast sockets on the same port, before this patch 80% of system CPU was used up by spin locking and only ~25% of packets were successfully delivered. With this patch, all packets are delivered and kernel overhead is ~8% system CPU on spinlocks. Signed-off-by: David Held <drheld@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/udp.c')
-rw-r--r--net/ipv6/udp.c30
1 files changed, 20 insertions, 10 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7d3bd80085be..f9d8800bb72f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -745,6 +745,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
745 745
746 if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) 746 if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
747 skb1 = NULL; 747 skb1 = NULL;
748 sock_put(sk);
748 } 749 }
749 if (unlikely(skb1)) 750 if (unlikely(skb1))
750 kfree_skb(skb1); 751 kfree_skb(skb1);
@@ -774,10 +775,20 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
774 unsigned short hnum = ntohs(uh->dest); 775 unsigned short hnum = ntohs(uh->dest);
775 struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); 776 struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
776 int dif = inet6_iif(skb); 777 int dif = inet6_iif(skb);
777 unsigned int i, count = 0; 778 unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
779 unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
780
781 if (use_hash2) {
782 hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
783 udp_table.mask;
784 hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
785start_lookup:
786 hslot = &udp_table.hash2[hash2];
787 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
788 }
778 789
779 spin_lock(&hslot->lock); 790 spin_lock(&hslot->lock);
780 sk_nulls_for_each(sk, node, &hslot->head) { 791 sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
781 if (__udp_v6_is_mcast_sock(net, sk, 792 if (__udp_v6_is_mcast_sock(net, sk,
782 uh->dest, daddr, 793 uh->dest, daddr,
783 uh->source, saddr, 794 uh->source, saddr,
@@ -791,21 +802,20 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
791 count = 0; 802 count = 0;
792 } 803 }
793 stack[count++] = sk; 804 stack[count++] = sk;
805 sock_hold(sk);
794 } 806 }
795 } 807 }
796 /*
797 * before releasing the lock, we must take reference on sockets
798 */
799 for (i = 0; i < count; i++)
800 sock_hold(stack[i]);
801 808
802 spin_unlock(&hslot->lock); 809 spin_unlock(&hslot->lock);
803 810
811 /* Also lookup *:port if we are using hash2 and haven't done so yet. */
812 if (use_hash2 && hash2 != hash2_any) {
813 hash2 = hash2_any;
814 goto start_lookup;
815 }
816
804 if (count) { 817 if (count) {
805 flush_stack(stack, count, skb, count - 1); 818 flush_stack(stack, count, skb, count - 1);
806
807 for (i = 0; i < count; i++)
808 sock_put(stack[i]);
809 } else { 819 } else {
810 kfree_skb(skb); 820 kfree_skb(skb);
811 } 821 }