diff options
author | David S. Miller <davem@davemloft.net> | 2012-06-20 00:22:05 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-06-20 00:22:05 -0400 |
commit | 41063e9dd11956f2d285e12e4342e1d232ba0ea2 (patch) | |
tree | d4df2f51044b4724a4767f0498c3df2f606b5ad7 /net/ipv4/tcp_ipv4.c | |
parent | f9242b6b28d61295f2bf7e8adfb1060b382e5381 (diff) |
ipv4: Early TCP socket demux.
Input packet processing for local sockets involves two major demuxes.
One for the route and one for the socket.
But we can optimize this down to one demux for certain kinds of local
sockets.
Currently we only do this for established TCP sockets, but it could
at least in theory be expanded to other kinds of connections.
If a TCP socket is established then it's identity is fully specified.
This means that whatever input route was used during the three-way
handshake must work equally well for the rest of the connection since
the keys will not change.
Once we move to established state, we cache the receive packet's input
route to use later.
Like the existing cached route in sk->sk_dst_cache used for output
packets, we have to check for route invalidations using dst->obsolete
and dst->ops->check().
Early demux occurs outside of a socket locked section, so when a route
invalidation occurs we defer the fixup of sk->sk_rx_dst until we are
actually inside of established state packet processing and thus have
the socket locked.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fda2ca17135e..13857df1dae1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1671,6 +1671,52 @@ csum_err: | |||
1671 | } | 1671 | } |
1672 | EXPORT_SYMBOL(tcp_v4_do_rcv); | 1672 | EXPORT_SYMBOL(tcp_v4_do_rcv); |
1673 | 1673 | ||
1674 | int tcp_v4_early_demux(struct sk_buff *skb) | ||
1675 | { | ||
1676 | struct net *net = dev_net(skb->dev); | ||
1677 | const struct iphdr *iph; | ||
1678 | const struct tcphdr *th; | ||
1679 | struct sock *sk; | ||
1680 | int err; | ||
1681 | |||
1682 | err = -ENOENT; | ||
1683 | if (skb->pkt_type != PACKET_HOST) | ||
1684 | goto out_err; | ||
1685 | |||
1686 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) | ||
1687 | goto out_err; | ||
1688 | |||
1689 | iph = ip_hdr(skb); | ||
1690 | th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); | ||
1691 | |||
1692 | if (th->doff < sizeof(struct tcphdr) / 4) | ||
1693 | goto out_err; | ||
1694 | |||
1695 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) | ||
1696 | goto out_err; | ||
1697 | |||
1698 | sk = __inet_lookup_established(net, &tcp_hashinfo, | ||
1699 | iph->saddr, th->source, | ||
1700 | iph->daddr, th->dest, | ||
1701 | skb->dev->ifindex); | ||
1702 | if (sk) { | ||
1703 | skb->sk = sk; | ||
1704 | skb->destructor = sock_edemux; | ||
1705 | if (sk->sk_state != TCP_TIME_WAIT) { | ||
1706 | struct dst_entry *dst = sk->sk_rx_dst; | ||
1707 | if (dst) | ||
1708 | dst = dst_check(dst, 0); | ||
1709 | if (dst) { | ||
1710 | skb_dst_set_noref(skb, dst); | ||
1711 | err = 0; | ||
1712 | } | ||
1713 | } | ||
1714 | } | ||
1715 | |||
1716 | out_err: | ||
1717 | return err; | ||
1718 | } | ||
1719 | |||
1674 | /* | 1720 | /* |
1675 | * From tcp_input.c | 1721 | * From tcp_input.c |
1676 | */ | 1722 | */ |