aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet
diff options
context:
space:
mode:
authorDaniel Borkmann <dborkman@redhat.com>2014-01-15 10:25:34 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-16 19:17:11 -0500
commit902fefb82ef72a50c78cb4a20cc954b037a98d1c (patch)
treed6cd83b38438a03d52a3fbbcba382f51cfd6511f /net/packet
parentec48a7879e4bd9b95a076c7e999d4c3bd7093554 (diff)
packet: improve socket create/bind latency in some cases
Most people acquire PF_PACKET sockets with a protocol argument in the socket call, e.g. libpcap does so with htons(ETH_P_ALL) for all its sockets. Most likely, at some point in time a subsequent bind() call will follow, e.g. in libpcap with ... memset(&sll, 0, sizeof(sll)); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; sll.sll_protocol = htons(ETH_P_ALL); ... as arguments. What happens in the kernel is that already in socket() syscall, we install a proto hook via register_prot_hook() if our protocol argument is != 0. Yet, in bind() we're almost doing the same work by doing a unregister_prot_hook() with an expensive synchronize_net() call in case during socket() the proto was != 0, plus follow-up register_prot_hook() with a bound device to it this time, in order to limit traffic we get. In the case when the protocol and user supplied device index (== 0) does not change from socket() to bind(), we can spare us doing the same work twice. Similarly for re-binding to the same device and protocol. For these scenarios, we can decrease create/bind latency from ~7447us (sock-bind-2 case) to ~89us (sock-bind-1 case) with this patch. Alternatively, for the first case, if people care, they should simply create their sockets with proto == 0 argument and define the protocol during bind() as this saves a call to synchronize_net() as well (sock-bind-3 case). In all other cases, we're tied to user space behaviour we must not change, also since a bind() is not strictly required. Thus, we need the synchronize_net() to make sure no asynchronous packet processing paths still refer to the previous elements of po->prot_hook. In case of mmap()ed sockets, the workflow that includes bind() is socket() -> setsockopt(<ring>) -> bind(). In that case, a pair of {__unregister, register}_prot_hook is being called from setsockopt() in order to install the new protocol receive handler. Thus, when we call bind and can skip a re-hook, we have already previously installed the new handler. For fanout, this is handled different entirely, so we should be good. Timings on an i7-3520M machine: * sock-bind-1: 89 us * sock-bind-2: 7447 us * sock-bind-3: 75 us sock-bind-1: socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)) = 3 bind(3, {sa_family=AF_PACKET, proto=htons(ETH_P_IP), if=all(0), pkttype=PACKET_HOST, addr(0)={0, }, 20) = 0 sock-bind-2: socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)) = 3 bind(3, {sa_family=AF_PACKET, proto=htons(ETH_P_IP), if=lo(1), pkttype=PACKET_HOST, addr(0)={0, }, 20) = 0 sock-bind-3: socket(PF_PACKET, SOCK_RAW, 0) = 3 bind(3, {sa_family=AF_PACKET, proto=htons(ETH_P_IP), if=lo(1), pkttype=PACKET_HOST, addr(0)={0, }, 20) = 0 Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/packet')
-rw-r--r--net/packet/af_packet.c33
1 files changed, 22 insertions, 11 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 279467b74eb7..85bb38cb56fd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2567,9 +2567,12 @@ static int packet_release(struct socket *sock)
2567 * Attach a packet hook. 2567 * Attach a packet hook.
2568 */ 2568 */
2569 2569
2570static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) 2570static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2571{ 2571{
2572 struct packet_sock *po = pkt_sk(sk); 2572 struct packet_sock *po = pkt_sk(sk);
2573 const struct net_device *dev_curr;
2574 __be16 proto_curr;
2575 bool need_rehook;
2573 2576
2574 if (po->fanout) { 2577 if (po->fanout) {
2575 if (dev) 2578 if (dev)
@@ -2579,21 +2582,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc
2579 } 2582 }
2580 2583
2581 lock_sock(sk); 2584 lock_sock(sk);
2582
2583 spin_lock(&po->bind_lock); 2585 spin_lock(&po->bind_lock);
2584 unregister_prot_hook(sk, true);
2585 2586
2586 po->num = protocol; 2587 proto_curr = po->prot_hook.type;
2587 po->prot_hook.type = protocol; 2588 dev_curr = po->prot_hook.dev;
2588 if (po->prot_hook.dev) 2589
2589 dev_put(po->prot_hook.dev); 2590 need_rehook = proto_curr != proto || dev_curr != dev;
2591
2592 if (need_rehook) {
2593 unregister_prot_hook(sk, true);
2590 2594
2591 po->prot_hook.dev = dev; 2595 po->num = proto;
2592 po->ifindex = dev ? dev->ifindex : 0; 2596 po->prot_hook.type = proto;
2597
2598 if (po->prot_hook.dev)
2599 dev_put(po->prot_hook.dev);
2593 2600
2594 packet_cached_dev_assign(po, dev); 2601 po->prot_hook.dev = dev;
2602
2603 po->ifindex = dev ? dev->ifindex : 0;
2604 packet_cached_dev_assign(po, dev);
2605 }
2595 2606
2596 if (protocol == 0) 2607 if (proto == 0 || !need_rehook)
2597 goto out_unlock; 2608 goto out_unlock;
2598 2609
2599 if (!dev || (dev->flags & IFF_UP)) { 2610 if (!dev || (dev->flags & IFF_UP)) {