diff options
-rw-r--r-- | include/net/inet_hashtables.h | 4 | ||||
-rw-r--r-- | include/net/protocol.h | 1 | ||||
-rw-r--r-- | include/net/sock.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 1 | ||||
-rw-r--r-- | net/core/sock.c | 5 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 18 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 39 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 |
10 files changed, 110 insertions, 24 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 808fc5f76b03..54be0287eb98 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -379,10 +379,10 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, | |||
379 | const __be16 sport, | 379 | const __be16 sport, |
380 | const __be16 dport) | 380 | const __be16 dport) |
381 | { | 381 | { |
382 | struct sock *sk; | 382 | struct sock *sk = skb_steal_sock(skb); |
383 | const struct iphdr *iph = ip_hdr(skb); | 383 | const struct iphdr *iph = ip_hdr(skb); |
384 | 384 | ||
385 | if (unlikely(sk = skb_steal_sock(skb))) | 385 | if (sk) |
386 | return sk; | 386 | return sk; |
387 | else | 387 | else |
388 | return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, | 388 | return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, |
diff --git a/include/net/protocol.h b/include/net/protocol.h index a1b1b530c338..967b926cbfb1 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | /* This is used to register protocols. */ | 38 | /* This is used to register protocols. */ |
39 | struct net_protocol { | 39 | struct net_protocol { |
40 | int (*early_demux)(struct sk_buff *skb); | ||
40 | int (*handler)(struct sk_buff *skb); | 41 | int (*handler)(struct sk_buff *skb); |
41 | void (*err_handler)(struct sk_buff *skb, u32 info); | 42 | void (*err_handler)(struct sk_buff *skb, u32 info); |
42 | int (*gso_send_check)(struct sk_buff *skb); | 43 | int (*gso_send_check)(struct sk_buff *skb); |
diff --git a/include/net/sock.h b/include/net/sock.h index 4a4521699563..87b424ae750a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -319,6 +319,7 @@ struct sock { | |||
319 | unsigned long sk_flags; | 319 | unsigned long sk_flags; |
320 | struct dst_entry *sk_dst_cache; | 320 | struct dst_entry *sk_dst_cache; |
321 | spinlock_t sk_dst_lock; | 321 | spinlock_t sk_dst_lock; |
322 | struct dst_entry *sk_rx_dst; | ||
322 | atomic_t sk_wmem_alloc; | 323 | atomic_t sk_wmem_alloc; |
323 | atomic_t sk_omem_alloc; | 324 | atomic_t sk_omem_alloc; |
324 | int sk_sndbuf; | 325 | int sk_sndbuf; |
@@ -1426,6 +1427,7 @@ extern struct sk_buff *sock_rmalloc(struct sock *sk, | |||
1426 | gfp_t priority); | 1427 | gfp_t priority); |
1427 | extern void sock_wfree(struct sk_buff *skb); | 1428 | extern void sock_wfree(struct sk_buff *skb); |
1428 | extern void sock_rfree(struct sk_buff *skb); | 1429 | extern void sock_rfree(struct sk_buff *skb); |
1430 | extern void sock_edemux(struct sk_buff *skb); | ||
1429 | 1431 | ||
1430 | extern int sock_setsockopt(struct socket *sock, int level, | 1432 | extern int sock_setsockopt(struct socket *sock, int level, |
1431 | int op, char __user *optval, | 1433 | int op, char __user *optval, |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 9332f342259a..6660ffc4963d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -325,6 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); | |||
325 | 325 | ||
326 | extern void tcp_shutdown (struct sock *sk, int how); | 326 | extern void tcp_shutdown (struct sock *sk, int how); |
327 | 327 | ||
328 | extern int tcp_v4_early_demux(struct sk_buff *skb); | ||
328 | extern int tcp_v4_rcv(struct sk_buff *skb); | 329 | extern int tcp_v4_rcv(struct sk_buff *skb); |
329 | 330 | ||
330 | extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); | 331 | extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); |
diff --git a/net/core/sock.c b/net/core/sock.c index 9e5b71fda6ec..929bdcc2383b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1465,6 +1465,11 @@ void sock_rfree(struct sk_buff *skb) | |||
1465 | } | 1465 | } |
1466 | EXPORT_SYMBOL(sock_rfree); | 1466 | EXPORT_SYMBOL(sock_rfree); |
1467 | 1467 | ||
1468 | void sock_edemux(struct sk_buff *skb) | ||
1469 | { | ||
1470 | sock_put(skb->sk); | ||
1471 | } | ||
1472 | EXPORT_SYMBOL(sock_edemux); | ||
1468 | 1473 | ||
1469 | int sock_i_uid(struct sock *sk) | 1474 | int sock_i_uid(struct sock *sk) |
1470 | { | 1475 | { |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 85a3b1763136..07a02f6e9696 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -157,6 +157,7 @@ void inet_sock_destruct(struct sock *sk) | |||
157 | 157 | ||
158 | kfree(rcu_dereference_protected(inet->inet_opt, 1)); | 158 | kfree(rcu_dereference_protected(inet->inet_opt, 1)); |
159 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); | 159 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); |
160 | dst_release(sk->sk_rx_dst); | ||
160 | sk_refcnt_debug_dec(sk); | 161 | sk_refcnt_debug_dec(sk); |
161 | } | 162 | } |
162 | EXPORT_SYMBOL(inet_sock_destruct); | 163 | EXPORT_SYMBOL(inet_sock_destruct); |
@@ -1518,14 +1519,15 @@ static const struct net_protocol igmp_protocol = { | |||
1518 | #endif | 1519 | #endif |
1519 | 1520 | ||
1520 | static const struct net_protocol tcp_protocol = { | 1521 | static const struct net_protocol tcp_protocol = { |
1521 | .handler = tcp_v4_rcv, | 1522 | .early_demux = tcp_v4_early_demux, |
1522 | .err_handler = tcp_v4_err, | 1523 | .handler = tcp_v4_rcv, |
1523 | .gso_send_check = tcp_v4_gso_send_check, | 1524 | .err_handler = tcp_v4_err, |
1524 | .gso_segment = tcp_tso_segment, | 1525 | .gso_send_check = tcp_v4_gso_send_check, |
1525 | .gro_receive = tcp4_gro_receive, | 1526 | .gso_segment = tcp_tso_segment, |
1526 | .gro_complete = tcp4_gro_complete, | 1527 | .gro_receive = tcp4_gro_receive, |
1527 | .no_policy = 1, | 1528 | .gro_complete = tcp4_gro_complete, |
1528 | .netns_ok = 1, | 1529 | .no_policy = 1, |
1530 | .netns_ok = 1, | ||
1529 | }; | 1531 | }; |
1530 | 1532 | ||
1531 | static const struct net_protocol udp_protocol = { | 1533 | static const struct net_protocol udp_protocol = { |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c4fe1d271131..93b092c9a394 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -323,19 +323,32 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
323 | * how the packet travels inside Linux networking. | 323 | * how the packet travels inside Linux networking. |
324 | */ | 324 | */ |
325 | if (skb_dst(skb) == NULL) { | 325 | if (skb_dst(skb) == NULL) { |
326 | int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, | 326 | const struct net_protocol *ipprot; |
327 | iph->tos, skb->dev); | 327 | int protocol = iph->protocol; |
328 | if (unlikely(err)) { | 328 | int err; |
329 | if (err == -EHOSTUNREACH) | 329 | |
330 | IP_INC_STATS_BH(dev_net(skb->dev), | 330 | rcu_read_lock(); |
331 | IPSTATS_MIB_INADDRERRORS); | 331 | ipprot = rcu_dereference(inet_protos[protocol]); |
332 | else if (err == -ENETUNREACH) | 332 | err = -ENOENT; |
333 | IP_INC_STATS_BH(dev_net(skb->dev), | 333 | if (ipprot && ipprot->early_demux) |
334 | IPSTATS_MIB_INNOROUTES); | 334 | err = ipprot->early_demux(skb); |
335 | else if (err == -EXDEV) | 335 | rcu_read_unlock(); |
336 | NET_INC_STATS_BH(dev_net(skb->dev), | 336 | |
337 | LINUX_MIB_IPRPFILTER); | 337 | if (err) { |
338 | goto drop; | 338 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
339 | iph->tos, skb->dev); | ||
340 | if (unlikely(err)) { | ||
341 | if (err == -EHOSTUNREACH) | ||
342 | IP_INC_STATS_BH(dev_net(skb->dev), | ||
343 | IPSTATS_MIB_INADDRERRORS); | ||
344 | else if (err == -ENETUNREACH) | ||
345 | IP_INC_STATS_BH(dev_net(skb->dev), | ||
346 | IPSTATS_MIB_INNOROUTES); | ||
347 | else if (err == -EXDEV) | ||
348 | NET_INC_STATS_BH(dev_net(skb->dev), | ||
349 | LINUX_MIB_IPRPFILTER); | ||
350 | goto drop; | ||
351 | } | ||
339 | } | 352 | } |
340 | } | 353 | } |
341 | 354 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b224eb8bce8b..8416f8a68e65 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -5518,6 +5518,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5518 | struct tcp_sock *tp = tcp_sk(sk); | 5518 | struct tcp_sock *tp = tcp_sk(sk); |
5519 | int res; | 5519 | int res; |
5520 | 5520 | ||
5521 | if (sk->sk_rx_dst) { | ||
5522 | struct dst_entry *dst = sk->sk_rx_dst; | ||
5523 | if (unlikely(dst->obsolete)) { | ||
5524 | if (dst->ops->check(dst, 0) == NULL) { | ||
5525 | dst_release(dst); | ||
5526 | sk->sk_rx_dst = NULL; | ||
5527 | } | ||
5528 | } | ||
5529 | } | ||
5530 | if (unlikely(sk->sk_rx_dst == NULL)) | ||
5531 | sk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
5532 | |||
5521 | /* | 5533 | /* |
5522 | * Header prediction. | 5534 | * Header prediction. |
5523 | * The code loosely follows the one in the famous | 5535 | * The code loosely follows the one in the famous |
@@ -5729,8 +5741,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) | |||
5729 | 5741 | ||
5730 | tcp_set_state(sk, TCP_ESTABLISHED); | 5742 | tcp_set_state(sk, TCP_ESTABLISHED); |
5731 | 5743 | ||
5732 | if (skb != NULL) | 5744 | if (skb != NULL) { |
5745 | sk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
5733 | security_inet_conn_established(sk, skb); | 5746 | security_inet_conn_established(sk, skb); |
5747 | } | ||
5734 | 5748 | ||
5735 | /* Make sure socket is routed, for correct metrics. */ | 5749 | /* Make sure socket is routed, for correct metrics. */ |
5736 | icsk->icsk_af_ops->rebuild_header(sk); | 5750 | icsk->icsk_af_ops->rebuild_header(sk); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fda2ca17135e..13857df1dae1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1671,6 +1671,52 @@ csum_err: | |||
1671 | } | 1671 | } |
1672 | EXPORT_SYMBOL(tcp_v4_do_rcv); | 1672 | EXPORT_SYMBOL(tcp_v4_do_rcv); |
1673 | 1673 | ||
1674 | int tcp_v4_early_demux(struct sk_buff *skb) | ||
1675 | { | ||
1676 | struct net *net = dev_net(skb->dev); | ||
1677 | const struct iphdr *iph; | ||
1678 | const struct tcphdr *th; | ||
1679 | struct sock *sk; | ||
1680 | int err; | ||
1681 | |||
1682 | err = -ENOENT; | ||
1683 | if (skb->pkt_type != PACKET_HOST) | ||
1684 | goto out_err; | ||
1685 | |||
1686 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) | ||
1687 | goto out_err; | ||
1688 | |||
1689 | iph = ip_hdr(skb); | ||
1690 | th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); | ||
1691 | |||
1692 | if (th->doff < sizeof(struct tcphdr) / 4) | ||
1693 | goto out_err; | ||
1694 | |||
1695 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) | ||
1696 | goto out_err; | ||
1697 | |||
1698 | sk = __inet_lookup_established(net, &tcp_hashinfo, | ||
1699 | iph->saddr, th->source, | ||
1700 | iph->daddr, th->dest, | ||
1701 | skb->dev->ifindex); | ||
1702 | if (sk) { | ||
1703 | skb->sk = sk; | ||
1704 | skb->destructor = sock_edemux; | ||
1705 | if (sk->sk_state != TCP_TIME_WAIT) { | ||
1706 | struct dst_entry *dst = sk->sk_rx_dst; | ||
1707 | if (dst) | ||
1708 | dst = dst_check(dst, 0); | ||
1709 | if (dst) { | ||
1710 | skb_dst_set_noref(skb, dst); | ||
1711 | err = 0; | ||
1712 | } | ||
1713 | } | ||
1714 | } | ||
1715 | |||
1716 | out_err: | ||
1717 | return err; | ||
1718 | } | ||
1719 | |||
1674 | /* | 1720 | /* |
1675 | * From tcp_input.c | 1721 | * From tcp_input.c |
1676 | */ | 1722 | */ |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index cb015317c9f7..72b7c63b1a39 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -445,6 +445,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
445 | struct tcp_sock *oldtp = tcp_sk(sk); | 445 | struct tcp_sock *oldtp = tcp_sk(sk); |
446 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; | 446 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; |
447 | 447 | ||
448 | newsk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
449 | |||
448 | /* TCP Cookie Transactions require space for the cookie pair, | 450 | /* TCP Cookie Transactions require space for the cookie pair, |
449 | * as it differs for each connection. There is no need to | 451 | * as it differs for each connection. There is no need to |
450 | * copy any s_data_payload stored at the original socket. | 452 | * copy any s_data_payload stored at the original socket. |