diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-20 16:43:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-20 16:43:21 -0400 |
commit | 06f4e926d256d902dd9a53dcb400fd74974ce087 (patch) | |
tree | 0b438b67f5f0eff6fd617bc497a9dace6164a488 /net/ipv4 | |
parent | 8e7bfcbab3825d1b404d615cb1b54f44ff81f981 (diff) | |
parent | d93515611bbc70c2fe4db232e5feb448ed8e4cc9 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1446 commits)
macvlan: fix panic if lowerdev in a bond
tg3: Add braces around 5906 workaround.
tg3: Fix NETIF_F_LOOPBACK error
macvlan: remove one synchronize_rcu() call
networking: NET_CLS_ROUTE4 depends on INET
irda: Fix error propagation in ircomm_lmp_connect_response()
irda: Kill set but unused variable 'bytes' in irlan_check_command_param()
irda: Kill set but unused variable 'clen' in ircomm_connect_indication()
rxrpc: Fix set but unused variable 'usage' in rxrpc_get_transport()
be2net: Kill set but unused variable 'req' in lancer_fw_download()
irda: Kill set but unused vars 'saddr' and 'daddr' in irlan_provider_connect_indication()
atl1c: atl1c_resume() is only used when CONFIG_PM_SLEEP is defined.
rxrpc: Fix set but unused variable 'usage' in rxrpc_get_peer().
rxrpc: Kill set but unused variable 'local' in rxrpc_UDP_error_handler()
rxrpc: Kill set but unused variable 'sp' in rxrpc_process_connection()
rxrpc: Kill set but unused variable 'sp' in rxrpc_rotate_tx_window()
pkt_sched: Kill set but unused variable 'protocol' in tc_classify()
isdn: capi: Use pr_debug() instead of ifdefs.
tg3: Update version to 3.119
tg3: Apply rx_discards fix to 5719/5720
...
Fix up trivial conflicts in arch/x86/Kconfig and net/mac80211/agg-tx.c
as per Davem.
Diffstat (limited to 'net/ipv4')
39 files changed, 1999 insertions, 830 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 0dc772d0d125..f2dc69cffb57 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
11 | datagram.o raw.o udp.o udplite.o \ | 11 | datagram.o raw.o udp.o udplite.o \ |
12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o fib_trie.o \ | 13 | fib_frontend.o fib_semantics.o fib_trie.o \ |
14 | inet_fragment.o | 14 | inet_fragment.o ping.o |
15 | 15 | ||
16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | 16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o |
17 | obj-$(CONFIG_PROC_FS) += proc.o | 17 | obj-$(CONFIG_PROC_FS) += proc.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 807d83c02ef6..cc1463156cd0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -105,6 +105,7 @@ | |||
105 | #include <net/tcp.h> | 105 | #include <net/tcp.h> |
106 | #include <net/udp.h> | 106 | #include <net/udp.h> |
107 | #include <net/udplite.h> | 107 | #include <net/udplite.h> |
108 | #include <net/ping.h> | ||
108 | #include <linux/skbuff.h> | 109 | #include <linux/skbuff.h> |
109 | #include <net/sock.h> | 110 | #include <net/sock.h> |
110 | #include <net/raw.h> | 111 | #include <net/raw.h> |
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk) | |||
153 | WARN_ON(sk->sk_wmem_queued); | 154 | WARN_ON(sk->sk_wmem_queued); |
154 | WARN_ON(sk->sk_forward_alloc); | 155 | WARN_ON(sk->sk_forward_alloc); |
155 | 156 | ||
156 | kfree(inet->opt); | 157 | kfree(rcu_dereference_protected(inet->inet_opt, 1)); |
157 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); | 158 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); |
158 | sk_refcnt_debug_dec(sk); | 159 | sk_refcnt_debug_dec(sk); |
159 | } | 160 | } |
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] = | |||
1008 | .flags = INET_PROTOSW_PERMANENT, | 1009 | .flags = INET_PROTOSW_PERMANENT, |
1009 | }, | 1010 | }, |
1010 | 1011 | ||
1012 | { | ||
1013 | .type = SOCK_DGRAM, | ||
1014 | .protocol = IPPROTO_ICMP, | ||
1015 | .prot = &ping_prot, | ||
1016 | .ops = &inet_dgram_ops, | ||
1017 | .no_check = UDP_CSUM_DEFAULT, | ||
1018 | .flags = INET_PROTOSW_REUSE, | ||
1019 | }, | ||
1011 | 1020 | ||
1012 | { | 1021 | { |
1013 | .type = SOCK_RAW, | 1022 | .type = SOCK_RAW, |
@@ -1103,14 +1112,19 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1103 | struct inet_sock *inet = inet_sk(sk); | 1112 | struct inet_sock *inet = inet_sk(sk); |
1104 | __be32 old_saddr = inet->inet_saddr; | 1113 | __be32 old_saddr = inet->inet_saddr; |
1105 | __be32 daddr = inet->inet_daddr; | 1114 | __be32 daddr = inet->inet_daddr; |
1115 | struct flowi4 *fl4; | ||
1106 | struct rtable *rt; | 1116 | struct rtable *rt; |
1107 | __be32 new_saddr; | 1117 | __be32 new_saddr; |
1118 | struct ip_options_rcu *inet_opt; | ||
1108 | 1119 | ||
1109 | if (inet->opt && inet->opt->srr) | 1120 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
1110 | daddr = inet->opt->faddr; | 1121 | sock_owned_by_user(sk)); |
1122 | if (inet_opt && inet_opt->opt.srr) | ||
1123 | daddr = inet_opt->opt.faddr; | ||
1111 | 1124 | ||
1112 | /* Query new route. */ | 1125 | /* Query new route. */ |
1113 | rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), | 1126 | fl4 = &inet->cork.fl.u.ip4; |
1127 | rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), | ||
1114 | sk->sk_bound_dev_if, sk->sk_protocol, | 1128 | sk->sk_bound_dev_if, sk->sk_protocol, |
1115 | inet->inet_sport, inet->inet_dport, sk, false); | 1129 | inet->inet_sport, inet->inet_dport, sk, false); |
1116 | if (IS_ERR(rt)) | 1130 | if (IS_ERR(rt)) |
@@ -1118,7 +1132,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1118 | 1132 | ||
1119 | sk_setup_caps(sk, &rt->dst); | 1133 | sk_setup_caps(sk, &rt->dst); |
1120 | 1134 | ||
1121 | new_saddr = rt->rt_src; | 1135 | new_saddr = fl4->saddr; |
1122 | 1136 | ||
1123 | if (new_saddr == old_saddr) | 1137 | if (new_saddr == old_saddr) |
1124 | return 0; | 1138 | return 0; |
@@ -1147,6 +1161,8 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1147 | struct inet_sock *inet = inet_sk(sk); | 1161 | struct inet_sock *inet = inet_sk(sk); |
1148 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); | 1162 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); |
1149 | __be32 daddr; | 1163 | __be32 daddr; |
1164 | struct ip_options_rcu *inet_opt; | ||
1165 | struct flowi4 *fl4; | ||
1150 | int err; | 1166 | int err; |
1151 | 1167 | ||
1152 | /* Route is OK, nothing to do. */ | 1168 | /* Route is OK, nothing to do. */ |
@@ -1154,10 +1170,14 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1154 | return 0; | 1170 | return 0; |
1155 | 1171 | ||
1156 | /* Reroute. */ | 1172 | /* Reroute. */ |
1173 | rcu_read_lock(); | ||
1174 | inet_opt = rcu_dereference(inet->inet_opt); | ||
1157 | daddr = inet->inet_daddr; | 1175 | daddr = inet->inet_daddr; |
1158 | if (inet->opt && inet->opt->srr) | 1176 | if (inet_opt && inet_opt->opt.srr) |
1159 | daddr = inet->opt->faddr; | 1177 | daddr = inet_opt->opt.faddr; |
1160 | rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, | 1178 | rcu_read_unlock(); |
1179 | fl4 = &inet->cork.fl.u.ip4; | ||
1180 | rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, | ||
1161 | inet->inet_dport, inet->inet_sport, | 1181 | inet->inet_dport, inet->inet_sport, |
1162 | sk->sk_protocol, RT_CONN_FLAGS(sk), | 1182 | sk->sk_protocol, RT_CONN_FLAGS(sk), |
1163 | sk->sk_bound_dev_if); | 1183 | sk->sk_bound_dev_if); |
@@ -1186,7 +1206,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); | |||
1186 | 1206 | ||
1187 | static int inet_gso_send_check(struct sk_buff *skb) | 1207 | static int inet_gso_send_check(struct sk_buff *skb) |
1188 | { | 1208 | { |
1189 | struct iphdr *iph; | 1209 | const struct iphdr *iph; |
1190 | const struct net_protocol *ops; | 1210 | const struct net_protocol *ops; |
1191 | int proto; | 1211 | int proto; |
1192 | int ihl; | 1212 | int ihl; |
@@ -1293,7 +1313,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1293 | const struct net_protocol *ops; | 1313 | const struct net_protocol *ops; |
1294 | struct sk_buff **pp = NULL; | 1314 | struct sk_buff **pp = NULL; |
1295 | struct sk_buff *p; | 1315 | struct sk_buff *p; |
1296 | struct iphdr *iph; | 1316 | const struct iphdr *iph; |
1297 | unsigned int hlen; | 1317 | unsigned int hlen; |
1298 | unsigned int off; | 1318 | unsigned int off; |
1299 | unsigned int id; | 1319 | unsigned int id; |
@@ -1516,6 +1536,7 @@ static const struct net_protocol udp_protocol = { | |||
1516 | 1536 | ||
1517 | static const struct net_protocol icmp_protocol = { | 1537 | static const struct net_protocol icmp_protocol = { |
1518 | .handler = icmp_rcv, | 1538 | .handler = icmp_rcv, |
1539 | .err_handler = ping_err, | ||
1519 | .no_policy = 1, | 1540 | .no_policy = 1, |
1520 | .netns_ok = 1, | 1541 | .netns_ok = 1, |
1521 | }; | 1542 | }; |
@@ -1631,6 +1652,10 @@ static int __init inet_init(void) | |||
1631 | if (rc) | 1652 | if (rc) |
1632 | goto out_unregister_udp_proto; | 1653 | goto out_unregister_udp_proto; |
1633 | 1654 | ||
1655 | rc = proto_register(&ping_prot, 1); | ||
1656 | if (rc) | ||
1657 | goto out_unregister_raw_proto; | ||
1658 | |||
1634 | /* | 1659 | /* |
1635 | * Tell SOCKET that we are alive... | 1660 | * Tell SOCKET that we are alive... |
1636 | */ | 1661 | */ |
@@ -1686,6 +1711,8 @@ static int __init inet_init(void) | |||
1686 | /* Add UDP-Lite (RFC 3828) */ | 1711 | /* Add UDP-Lite (RFC 3828) */ |
1687 | udplite4_register(); | 1712 | udplite4_register(); |
1688 | 1713 | ||
1714 | ping_init(); | ||
1715 | |||
1689 | /* | 1716 | /* |
1690 | * Set the ICMP layer up | 1717 | * Set the ICMP layer up |
1691 | */ | 1718 | */ |
@@ -1716,6 +1743,8 @@ static int __init inet_init(void) | |||
1716 | rc = 0; | 1743 | rc = 0; |
1717 | out: | 1744 | out: |
1718 | return rc; | 1745 | return rc; |
1746 | out_unregister_raw_proto: | ||
1747 | proto_unregister(&raw_prot); | ||
1719 | out_unregister_udp_proto: | 1748 | out_unregister_udp_proto: |
1720 | proto_unregister(&udp_prot); | 1749 | proto_unregister(&udp_prot); |
1721 | out_unregister_tcp_proto: | 1750 | out_unregister_tcp_proto: |
@@ -1740,11 +1769,15 @@ static int __init ipv4_proc_init(void) | |||
1740 | goto out_tcp; | 1769 | goto out_tcp; |
1741 | if (udp4_proc_init()) | 1770 | if (udp4_proc_init()) |
1742 | goto out_udp; | 1771 | goto out_udp; |
1772 | if (ping_proc_init()) | ||
1773 | goto out_ping; | ||
1743 | if (ip_misc_proc_init()) | 1774 | if (ip_misc_proc_init()) |
1744 | goto out_misc; | 1775 | goto out_misc; |
1745 | out: | 1776 | out: |
1746 | return rc; | 1777 | return rc; |
1747 | out_misc: | 1778 | out_misc: |
1779 | ping_proc_exit(); | ||
1780 | out_ping: | ||
1748 | udp4_proc_exit(); | 1781 | udp4_proc_exit(); |
1749 | out_udp: | 1782 | out_udp: |
1750 | tcp4_proc_exit(); | 1783 | tcp4_proc_exit(); |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 4286fd3cc0e2..c1f4154552fc 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, | |||
73 | * into IP header for icv calculation. Options are already checked | 73 | * into IP header for icv calculation. Options are already checked |
74 | * for validity, so paranoia is not required. */ | 74 | * for validity, so paranoia is not required. */ |
75 | 75 | ||
76 | static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) | 76 | static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) |
77 | { | 77 | { |
78 | unsigned char * optptr = (unsigned char*)(iph+1); | 78 | unsigned char * optptr = (unsigned char*)(iph+1); |
79 | int l = iph->ihl*4 - sizeof(struct iphdr); | 79 | int l = iph->ihl*4 - sizeof(struct iphdr); |
@@ -396,7 +396,7 @@ out: | |||
396 | static void ah4_err(struct sk_buff *skb, u32 info) | 396 | static void ah4_err(struct sk_buff *skb, u32 info) |
397 | { | 397 | { |
398 | struct net *net = dev_net(skb->dev); | 398 | struct net *net = dev_net(skb->dev); |
399 | struct iphdr *iph = (struct iphdr *)skb->data; | 399 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
400 | struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); | 400 | struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); |
401 | struct xfrm_state *x; | 401 | struct xfrm_state *x; |
402 | 402 | ||
@@ -404,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
404 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 404 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
405 | return; | 405 | return; |
406 | 406 | ||
407 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); | 407 | x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, |
408 | ah->spi, IPPROTO_AH, AF_INET); | ||
408 | if (!x) | 409 | if (!x) |
409 | return; | 410 | return; |
410 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 411 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a0af7ea87870..2b3c23c287cd 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, | |||
1857 | return CIPSO_V4_HDR_LEN + ret_val; | 1857 | return CIPSO_V4_HDR_LEN + ret_val; |
1858 | } | 1858 | } |
1859 | 1859 | ||
1860 | static void opt_kfree_rcu(struct rcu_head *head) | ||
1861 | { | ||
1862 | kfree(container_of(head, struct ip_options_rcu, rcu)); | ||
1863 | } | ||
1864 | |||
1860 | /** | 1865 | /** |
1861 | * cipso_v4_sock_setattr - Add a CIPSO option to a socket | 1866 | * cipso_v4_sock_setattr - Add a CIPSO option to a socket |
1862 | * @sk: the socket | 1867 | * @sk: the socket |
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1879 | unsigned char *buf = NULL; | 1884 | unsigned char *buf = NULL; |
1880 | u32 buf_len; | 1885 | u32 buf_len; |
1881 | u32 opt_len; | 1886 | u32 opt_len; |
1882 | struct ip_options *opt = NULL; | 1887 | struct ip_options_rcu *old, *opt = NULL; |
1883 | struct inet_sock *sk_inet; | 1888 | struct inet_sock *sk_inet; |
1884 | struct inet_connection_sock *sk_conn; | 1889 | struct inet_connection_sock *sk_conn; |
1885 | 1890 | ||
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1915 | ret_val = -ENOMEM; | 1920 | ret_val = -ENOMEM; |
1916 | goto socket_setattr_failure; | 1921 | goto socket_setattr_failure; |
1917 | } | 1922 | } |
1918 | memcpy(opt->__data, buf, buf_len); | 1923 | memcpy(opt->opt.__data, buf, buf_len); |
1919 | opt->optlen = opt_len; | 1924 | opt->opt.optlen = opt_len; |
1920 | opt->cipso = sizeof(struct iphdr); | 1925 | opt->opt.cipso = sizeof(struct iphdr); |
1921 | kfree(buf); | 1926 | kfree(buf); |
1922 | buf = NULL; | 1927 | buf = NULL; |
1923 | 1928 | ||
1924 | sk_inet = inet_sk(sk); | 1929 | sk_inet = inet_sk(sk); |
1930 | |||
1931 | old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk)); | ||
1925 | if (sk_inet->is_icsk) { | 1932 | if (sk_inet->is_icsk) { |
1926 | sk_conn = inet_csk(sk); | 1933 | sk_conn = inet_csk(sk); |
1927 | if (sk_inet->opt) | 1934 | if (old) |
1928 | sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; | 1935 | sk_conn->icsk_ext_hdr_len -= old->opt.optlen; |
1929 | sk_conn->icsk_ext_hdr_len += opt->optlen; | 1936 | sk_conn->icsk_ext_hdr_len += opt->opt.optlen; |
1930 | sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); | 1937 | sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); |
1931 | } | 1938 | } |
1932 | opt = xchg(&sk_inet->opt, opt); | 1939 | rcu_assign_pointer(sk_inet->inet_opt, opt); |
1933 | kfree(opt); | 1940 | if (old) |
1941 | call_rcu(&old->rcu, opt_kfree_rcu); | ||
1934 | 1942 | ||
1935 | return 0; | 1943 | return 0; |
1936 | 1944 | ||
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req, | |||
1960 | unsigned char *buf = NULL; | 1968 | unsigned char *buf = NULL; |
1961 | u32 buf_len; | 1969 | u32 buf_len; |
1962 | u32 opt_len; | 1970 | u32 opt_len; |
1963 | struct ip_options *opt = NULL; | 1971 | struct ip_options_rcu *opt = NULL; |
1964 | struct inet_request_sock *req_inet; | 1972 | struct inet_request_sock *req_inet; |
1965 | 1973 | ||
1966 | /* We allocate the maximum CIPSO option size here so we are probably | 1974 | /* We allocate the maximum CIPSO option size here so we are probably |
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req, | |||
1988 | ret_val = -ENOMEM; | 1996 | ret_val = -ENOMEM; |
1989 | goto req_setattr_failure; | 1997 | goto req_setattr_failure; |
1990 | } | 1998 | } |
1991 | memcpy(opt->__data, buf, buf_len); | 1999 | memcpy(opt->opt.__data, buf, buf_len); |
1992 | opt->optlen = opt_len; | 2000 | opt->opt.optlen = opt_len; |
1993 | opt->cipso = sizeof(struct iphdr); | 2001 | opt->opt.cipso = sizeof(struct iphdr); |
1994 | kfree(buf); | 2002 | kfree(buf); |
1995 | buf = NULL; | 2003 | buf = NULL; |
1996 | 2004 | ||
1997 | req_inet = inet_rsk(req); | 2005 | req_inet = inet_rsk(req); |
1998 | opt = xchg(&req_inet->opt, opt); | 2006 | opt = xchg(&req_inet->opt, opt); |
1999 | kfree(opt); | 2007 | if (opt) |
2008 | call_rcu(&opt->rcu, opt_kfree_rcu); | ||
2000 | 2009 | ||
2001 | return 0; | 2010 | return 0; |
2002 | 2011 | ||
@@ -2016,34 +2025,34 @@ req_setattr_failure: | |||
2016 | * values on failure. | 2025 | * values on failure. |
2017 | * | 2026 | * |
2018 | */ | 2027 | */ |
2019 | static int cipso_v4_delopt(struct ip_options **opt_ptr) | 2028 | static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) |
2020 | { | 2029 | { |
2021 | int hdr_delta = 0; | 2030 | int hdr_delta = 0; |
2022 | struct ip_options *opt = *opt_ptr; | 2031 | struct ip_options_rcu *opt = *opt_ptr; |
2023 | 2032 | ||
2024 | if (opt->srr || opt->rr || opt->ts || opt->router_alert) { | 2033 | if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { |
2025 | u8 cipso_len; | 2034 | u8 cipso_len; |
2026 | u8 cipso_off; | 2035 | u8 cipso_off; |
2027 | unsigned char *cipso_ptr; | 2036 | unsigned char *cipso_ptr; |
2028 | int iter; | 2037 | int iter; |
2029 | int optlen_new; | 2038 | int optlen_new; |
2030 | 2039 | ||
2031 | cipso_off = opt->cipso - sizeof(struct iphdr); | 2040 | cipso_off = opt->opt.cipso - sizeof(struct iphdr); |
2032 | cipso_ptr = &opt->__data[cipso_off]; | 2041 | cipso_ptr = &opt->opt.__data[cipso_off]; |
2033 | cipso_len = cipso_ptr[1]; | 2042 | cipso_len = cipso_ptr[1]; |
2034 | 2043 | ||
2035 | if (opt->srr > opt->cipso) | 2044 | if (opt->opt.srr > opt->opt.cipso) |
2036 | opt->srr -= cipso_len; | 2045 | opt->opt.srr -= cipso_len; |
2037 | if (opt->rr > opt->cipso) | 2046 | if (opt->opt.rr > opt->opt.cipso) |
2038 | opt->rr -= cipso_len; | 2047 | opt->opt.rr -= cipso_len; |
2039 | if (opt->ts > opt->cipso) | 2048 | if (opt->opt.ts > opt->opt.cipso) |
2040 | opt->ts -= cipso_len; | 2049 | opt->opt.ts -= cipso_len; |
2041 | if (opt->router_alert > opt->cipso) | 2050 | if (opt->opt.router_alert > opt->opt.cipso) |
2042 | opt->router_alert -= cipso_len; | 2051 | opt->opt.router_alert -= cipso_len; |
2043 | opt->cipso = 0; | 2052 | opt->opt.cipso = 0; |
2044 | 2053 | ||
2045 | memmove(cipso_ptr, cipso_ptr + cipso_len, | 2054 | memmove(cipso_ptr, cipso_ptr + cipso_len, |
2046 | opt->optlen - cipso_off - cipso_len); | 2055 | opt->opt.optlen - cipso_off - cipso_len); |
2047 | 2056 | ||
2048 | /* determining the new total option length is tricky because of | 2057 | /* determining the new total option length is tricky because of |
2049 | * the padding necessary, the only thing i can think to do at | 2058 | * the padding necessary, the only thing i can think to do at |
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) | |||
2052 | * from there we can determine the new total option length */ | 2061 | * from there we can determine the new total option length */ |
2053 | iter = 0; | 2062 | iter = 0; |
2054 | optlen_new = 0; | 2063 | optlen_new = 0; |
2055 | while (iter < opt->optlen) | 2064 | while (iter < opt->opt.optlen) |
2056 | if (opt->__data[iter] != IPOPT_NOP) { | 2065 | if (opt->opt.__data[iter] != IPOPT_NOP) { |
2057 | iter += opt->__data[iter + 1]; | 2066 | iter += opt->opt.__data[iter + 1]; |
2058 | optlen_new = iter; | 2067 | optlen_new = iter; |
2059 | } else | 2068 | } else |
2060 | iter++; | 2069 | iter++; |
2061 | hdr_delta = opt->optlen; | 2070 | hdr_delta = opt->opt.optlen; |
2062 | opt->optlen = (optlen_new + 3) & ~3; | 2071 | opt->opt.optlen = (optlen_new + 3) & ~3; |
2063 | hdr_delta -= opt->optlen; | 2072 | hdr_delta -= opt->opt.optlen; |
2064 | } else { | 2073 | } else { |
2065 | /* only the cipso option was present on the socket so we can | 2074 | /* only the cipso option was present on the socket so we can |
2066 | * remove the entire option struct */ | 2075 | * remove the entire option struct */ |
2067 | *opt_ptr = NULL; | 2076 | *opt_ptr = NULL; |
2068 | hdr_delta = opt->optlen; | 2077 | hdr_delta = opt->opt.optlen; |
2069 | kfree(opt); | 2078 | call_rcu(&opt->rcu, opt_kfree_rcu); |
2070 | } | 2079 | } |
2071 | 2080 | ||
2072 | return hdr_delta; | 2081 | return hdr_delta; |
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) | |||
2083 | void cipso_v4_sock_delattr(struct sock *sk) | 2092 | void cipso_v4_sock_delattr(struct sock *sk) |
2084 | { | 2093 | { |
2085 | int hdr_delta; | 2094 | int hdr_delta; |
2086 | struct ip_options *opt; | 2095 | struct ip_options_rcu *opt; |
2087 | struct inet_sock *sk_inet; | 2096 | struct inet_sock *sk_inet; |
2088 | 2097 | ||
2089 | sk_inet = inet_sk(sk); | 2098 | sk_inet = inet_sk(sk); |
2090 | opt = sk_inet->opt; | 2099 | opt = rcu_dereference_protected(sk_inet->inet_opt, 1); |
2091 | if (opt == NULL || opt->cipso == 0) | 2100 | if (opt == NULL || opt->opt.cipso == 0) |
2092 | return; | 2101 | return; |
2093 | 2102 | ||
2094 | hdr_delta = cipso_v4_delopt(&sk_inet->opt); | 2103 | hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); |
2095 | if (sk_inet->is_icsk && hdr_delta > 0) { | 2104 | if (sk_inet->is_icsk && hdr_delta > 0) { |
2096 | struct inet_connection_sock *sk_conn = inet_csk(sk); | 2105 | struct inet_connection_sock *sk_conn = inet_csk(sk); |
2097 | sk_conn->icsk_ext_hdr_len -= hdr_delta; | 2106 | sk_conn->icsk_ext_hdr_len -= hdr_delta; |
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk) | |||
2109 | */ | 2118 | */ |
2110 | void cipso_v4_req_delattr(struct request_sock *req) | 2119 | void cipso_v4_req_delattr(struct request_sock *req) |
2111 | { | 2120 | { |
2112 | struct ip_options *opt; | 2121 | struct ip_options_rcu *opt; |
2113 | struct inet_request_sock *req_inet; | 2122 | struct inet_request_sock *req_inet; |
2114 | 2123 | ||
2115 | req_inet = inet_rsk(req); | 2124 | req_inet = inet_rsk(req); |
2116 | opt = req_inet->opt; | 2125 | opt = req_inet->opt; |
2117 | if (opt == NULL || opt->cipso == 0) | 2126 | if (opt == NULL || opt->opt.cipso == 0) |
2118 | return; | 2127 | return; |
2119 | 2128 | ||
2120 | cipso_v4_delopt(&req_inet->opt); | 2129 | cipso_v4_delopt(&req_inet->opt); |
@@ -2184,14 +2193,18 @@ getattr_return: | |||
2184 | */ | 2193 | */ |
2185 | int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) | 2194 | int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) |
2186 | { | 2195 | { |
2187 | struct ip_options *opt; | 2196 | struct ip_options_rcu *opt; |
2197 | int res = -ENOMSG; | ||
2188 | 2198 | ||
2189 | opt = inet_sk(sk)->opt; | 2199 | rcu_read_lock(); |
2190 | if (opt == NULL || opt->cipso == 0) | 2200 | opt = rcu_dereference(inet_sk(sk)->inet_opt); |
2191 | return -ENOMSG; | 2201 | if (opt && opt->opt.cipso) |
2192 | 2202 | res = cipso_v4_getattr(opt->opt.__data + | |
2193 | return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), | 2203 | opt->opt.cipso - |
2194 | secattr); | 2204 | sizeof(struct iphdr), |
2205 | secattr); | ||
2206 | rcu_read_unlock(); | ||
2207 | return res; | ||
2195 | } | 2208 | } |
2196 | 2209 | ||
2197 | /** | 2210 | /** |
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 85bd24ca4f6d..424fafbc8cb0 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
24 | { | 24 | { |
25 | struct inet_sock *inet = inet_sk(sk); | 25 | struct inet_sock *inet = inet_sk(sk); |
26 | struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; | 26 | struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; |
27 | struct flowi4 *fl4; | ||
27 | struct rtable *rt; | 28 | struct rtable *rt; |
28 | __be32 saddr; | 29 | __be32 saddr; |
29 | int oif; | 30 | int oif; |
@@ -38,6 +39,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
38 | 39 | ||
39 | sk_dst_reset(sk); | 40 | sk_dst_reset(sk); |
40 | 41 | ||
42 | lock_sock(sk); | ||
43 | |||
41 | oif = sk->sk_bound_dev_if; | 44 | oif = sk->sk_bound_dev_if; |
42 | saddr = inet->inet_saddr; | 45 | saddr = inet->inet_saddr; |
43 | if (ipv4_is_multicast(usin->sin_addr.s_addr)) { | 46 | if (ipv4_is_multicast(usin->sin_addr.s_addr)) { |
@@ -46,7 +49,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
46 | if (!saddr) | 49 | if (!saddr) |
47 | saddr = inet->mc_addr; | 50 | saddr = inet->mc_addr; |
48 | } | 51 | } |
49 | rt = ip_route_connect(usin->sin_addr.s_addr, saddr, | 52 | fl4 = &inet->cork.fl.u.ip4; |
53 | rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, | ||
50 | RT_CONN_FLAGS(sk), oif, | 54 | RT_CONN_FLAGS(sk), oif, |
51 | sk->sk_protocol, | 55 | sk->sk_protocol, |
52 | inet->inet_sport, usin->sin_port, sk, true); | 56 | inet->inet_sport, usin->sin_port, sk, true); |
@@ -54,26 +58,30 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
54 | err = PTR_ERR(rt); | 58 | err = PTR_ERR(rt); |
55 | if (err == -ENETUNREACH) | 59 | if (err == -ENETUNREACH) |
56 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 60 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
57 | return err; | 61 | goto out; |
58 | } | 62 | } |
59 | 63 | ||
60 | if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { | 64 | if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { |
61 | ip_rt_put(rt); | 65 | ip_rt_put(rt); |
62 | return -EACCES; | 66 | err = -EACCES; |
67 | goto out; | ||
63 | } | 68 | } |
64 | if (!inet->inet_saddr) | 69 | if (!inet->inet_saddr) |
65 | inet->inet_saddr = rt->rt_src; /* Update source address */ | 70 | inet->inet_saddr = fl4->saddr; /* Update source address */ |
66 | if (!inet->inet_rcv_saddr) { | 71 | if (!inet->inet_rcv_saddr) { |
67 | inet->inet_rcv_saddr = rt->rt_src; | 72 | inet->inet_rcv_saddr = fl4->saddr; |
68 | if (sk->sk_prot->rehash) | 73 | if (sk->sk_prot->rehash) |
69 | sk->sk_prot->rehash(sk); | 74 | sk->sk_prot->rehash(sk); |
70 | } | 75 | } |
71 | inet->inet_daddr = rt->rt_dst; | 76 | inet->inet_daddr = fl4->daddr; |
72 | inet->inet_dport = usin->sin_port; | 77 | inet->inet_dport = usin->sin_port; |
73 | sk->sk_state = TCP_ESTABLISHED; | 78 | sk->sk_state = TCP_ESTABLISHED; |
74 | inet->inet_id = jiffies; | 79 | inet->inet_id = jiffies; |
75 | 80 | ||
76 | sk_dst_set(sk, &rt->dst); | 81 | sk_dst_set(sk, &rt->dst); |
77 | return 0; | 82 | err = 0; |
83 | out: | ||
84 | release_sock(sk); | ||
85 | return err; | ||
78 | } | 86 | } |
79 | EXPORT_SYMBOL(ip4_datagram_connect); | 87 | EXPORT_SYMBOL(ip4_datagram_connect); |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cd9ca0811cfa..0d4a184af16f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1369,7 +1369,7 @@ errout: | |||
1369 | 1369 | ||
1370 | static size_t inet_get_link_af_size(const struct net_device *dev) | 1370 | static size_t inet_get_link_af_size(const struct net_device *dev) |
1371 | { | 1371 | { |
1372 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1372 | struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); |
1373 | 1373 | ||
1374 | if (!in_dev) | 1374 | if (!in_dev) |
1375 | return 0; | 1375 | return 0; |
@@ -1379,7 +1379,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev) | |||
1379 | 1379 | ||
1380 | static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) | 1380 | static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) |
1381 | { | 1381 | { |
1382 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1382 | struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); |
1383 | struct nlattr *nla; | 1383 | struct nlattr *nla; |
1384 | int i; | 1384 | int i; |
1385 | 1385 | ||
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 03f994bcf7de..a5b413416da3 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -276,7 +276,7 @@ error: | |||
276 | 276 | ||
277 | static int esp_input_done2(struct sk_buff *skb, int err) | 277 | static int esp_input_done2(struct sk_buff *skb, int err) |
278 | { | 278 | { |
279 | struct iphdr *iph; | 279 | const struct iphdr *iph; |
280 | struct xfrm_state *x = xfrm_input_state(skb); | 280 | struct xfrm_state *x = xfrm_input_state(skb); |
281 | struct esp_data *esp = x->data; | 281 | struct esp_data *esp = x->data; |
282 | struct crypto_aead *aead = esp->aead; | 282 | struct crypto_aead *aead = esp->aead; |
@@ -484,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) | |||
484 | static void esp4_err(struct sk_buff *skb, u32 info) | 484 | static void esp4_err(struct sk_buff *skb, u32 info) |
485 | { | 485 | { |
486 | struct net *net = dev_net(skb->dev); | 486 | struct net *net = dev_net(skb->dev); |
487 | struct iphdr *iph = (struct iphdr *)skb->data; | 487 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
488 | struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); | 488 | struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); |
489 | struct xfrm_state *x; | 489 | struct xfrm_state *x; |
490 | 490 | ||
@@ -492,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
492 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 492 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
493 | return; | 493 | return; |
494 | 494 | ||
495 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 495 | x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, |
496 | esph->spi, IPPROTO_ESP, AF_INET); | ||
496 | if (!x) | 497 | if (!x) |
497 | return; | 498 | return; |
498 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 499 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451088330bbb..22524716fe70 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <net/arp.h> | 44 | #include <net/arp.h> |
45 | #include <net/ip_fib.h> | 45 | #include <net/ip_fib.h> |
46 | #include <net/rtnetlink.h> | 46 | #include <net/rtnetlink.h> |
47 | #include <net/xfrm.h> | ||
47 | 48 | ||
48 | #ifndef CONFIG_IP_MULTIPLE_TABLES | 49 | #ifndef CONFIG_IP_MULTIPLE_TABLES |
49 | 50 | ||
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type); | |||
188 | * - check, that packet arrived from expected physical interface. | 189 | * - check, that packet arrived from expected physical interface. |
189 | * called with rcu_read_lock() | 190 | * called with rcu_read_lock() |
190 | */ | 191 | */ |
191 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | 192 | int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, |
192 | struct net_device *dev, __be32 *spec_dst, | 193 | int oif, struct net_device *dev, __be32 *spec_dst, |
193 | u32 *itag, u32 mark) | 194 | u32 *itag) |
194 | { | 195 | { |
195 | struct in_device *in_dev; | 196 | struct in_device *in_dev; |
196 | struct flowi4 fl4; | 197 | struct flowi4 fl4; |
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
202 | 203 | ||
203 | fl4.flowi4_oif = 0; | 204 | fl4.flowi4_oif = 0; |
204 | fl4.flowi4_iif = oif; | 205 | fl4.flowi4_iif = oif; |
205 | fl4.flowi4_mark = mark; | ||
206 | fl4.daddr = src; | 206 | fl4.daddr = src; |
207 | fl4.saddr = dst; | 207 | fl4.saddr = dst; |
208 | fl4.flowi4_tos = tos; | 208 | fl4.flowi4_tos = tos; |
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
212 | in_dev = __in_dev_get_rcu(dev); | 212 | in_dev = __in_dev_get_rcu(dev); |
213 | if (in_dev) { | 213 | if (in_dev) { |
214 | no_addr = in_dev->ifa_list == NULL; | 214 | no_addr = in_dev->ifa_list == NULL; |
215 | rpf = IN_DEV_RPFILTER(in_dev); | 215 | |
216 | /* Ignore rp_filter for packets protected by IPsec. */ | ||
217 | rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); | ||
218 | |||
216 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); | 219 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); |
217 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 220 | fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; |
218 | fl4.flowi4_mark = 0; | ||
219 | } | 221 | } |
220 | 222 | ||
221 | if (in_dev == NULL) | 223 | if (in_dev == NULL) |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 11d4d28190bd..c779ce96e5b5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -126,7 +126,7 @@ struct tnode { | |||
126 | struct work_struct work; | 126 | struct work_struct work; |
127 | struct tnode *tnode_free; | 127 | struct tnode *tnode_free; |
128 | }; | 128 | }; |
129 | struct rt_trie_node *child[0]; | 129 | struct rt_trie_node __rcu *child[0]; |
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,7 +151,7 @@ struct trie_stat { | |||
151 | }; | 151 | }; |
152 | 152 | ||
153 | struct trie { | 153 | struct trie { |
154 | struct rt_trie_node *trie; | 154 | struct rt_trie_node __rcu *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 156 | struct trie_use_stats stats; |
157 | #endif | 157 | #endif |
@@ -177,16 +177,29 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 179 | ||
180 | static inline struct tnode *node_parent(struct rt_trie_node *node) | 180 | /* |
181 | * caller must hold RTNL | ||
182 | */ | ||
183 | static inline struct tnode *node_parent(const struct rt_trie_node *node) | ||
181 | { | 184 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 185 | unsigned long parent; |
186 | |||
187 | parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); | ||
188 | |||
189 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | ||
183 | } | 190 | } |
184 | 191 | ||
185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) | 192 | /* |
193 | * caller must hold RCU read lock or RTNL | ||
194 | */ | ||
195 | static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | ||
186 | { | 196 | { |
187 | struct tnode *ret = node_parent(node); | 197 | unsigned long parent; |
198 | |||
199 | parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || | ||
200 | lockdep_rtnl_is_held()); | ||
188 | 201 | ||
189 | return rcu_dereference_rtnl(ret); | 202 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); |
190 | } | 203 | } |
191 | 204 | ||
192 | /* Same as rcu_assign_pointer | 205 | /* Same as rcu_assign_pointer |
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | |||
198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 211 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
199 | } | 212 | } |
200 | 213 | ||
201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) | 214 | /* |
215 | * caller must hold RTNL | ||
216 | */ | ||
217 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) | ||
202 | { | 218 | { |
203 | BUG_ON(i >= 1U << tn->bits); | 219 | BUG_ON(i >= 1U << tn->bits); |
204 | 220 | ||
205 | return tn->child[i]; | 221 | return rtnl_dereference(tn->child[i]); |
206 | } | 222 | } |
207 | 223 | ||
208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 224 | /* |
225 | * caller must hold RCU read lock or RTNL | ||
226 | */ | ||
227 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) | ||
209 | { | 228 | { |
210 | struct rt_trie_node *ret = tnode_get_child(tn, i); | 229 | BUG_ON(i >= 1U << tn->bits); |
211 | 230 | ||
212 | return rcu_dereference_rtnl(ret); | 231 | return rcu_dereference_rtnl(tn->child[i]); |
213 | } | 232 | } |
214 | 233 | ||
215 | static inline int tnode_child_length(const struct tnode *tn) | 234 | static inline int tnode_child_length(const struct tnode *tn) |
@@ -482,7 +501,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
482 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, | 501 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
483 | int wasfull) | 502 | int wasfull) |
484 | { | 503 | { |
485 | struct rt_trie_node *chi = tn->child[i]; | 504 | struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); |
486 | int isfull; | 505 | int isfull; |
487 | 506 | ||
488 | BUG_ON(i >= 1<<tn->bits); | 507 | BUG_ON(i >= 1<<tn->bits); |
@@ -660,7 +679,7 @@ one_child: | |||
660 | for (i = 0; i < tnode_child_length(tn); i++) { | 679 | for (i = 0; i < tnode_child_length(tn); i++) { |
661 | struct rt_trie_node *n; | 680 | struct rt_trie_node *n; |
662 | 681 | ||
663 | n = tn->child[i]; | 682 | n = rtnl_dereference(tn->child[i]); |
664 | if (!n) | 683 | if (!n) |
665 | continue; | 684 | continue; |
666 | 685 | ||
@@ -674,6 +693,20 @@ one_child: | |||
674 | return (struct rt_trie_node *) tn; | 693 | return (struct rt_trie_node *) tn; |
675 | } | 694 | } |
676 | 695 | ||
696 | |||
697 | static void tnode_clean_free(struct tnode *tn) | ||
698 | { | ||
699 | int i; | ||
700 | struct tnode *tofree; | ||
701 | |||
702 | for (i = 0; i < tnode_child_length(tn); i++) { | ||
703 | tofree = (struct tnode *)rtnl_dereference(tn->child[i]); | ||
704 | if (tofree) | ||
705 | tnode_free(tofree); | ||
706 | } | ||
707 | tnode_free(tn); | ||
708 | } | ||
709 | |||
677 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 710 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
678 | { | 711 | { |
679 | struct tnode *oldtnode = tn; | 712 | struct tnode *oldtnode = tn; |
@@ -750,8 +783,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
750 | inode = (struct tnode *) node; | 783 | inode = (struct tnode *) node; |
751 | 784 | ||
752 | if (inode->bits == 1) { | 785 | if (inode->bits == 1) { |
753 | put_child(t, tn, 2*i, inode->child[0]); | 786 | put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); |
754 | put_child(t, tn, 2*i+1, inode->child[1]); | 787 | put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); |
755 | 788 | ||
756 | tnode_free_safe(inode); | 789 | tnode_free_safe(inode); |
757 | continue; | 790 | continue; |
@@ -792,8 +825,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
792 | 825 | ||
793 | size = tnode_child_length(left); | 826 | size = tnode_child_length(left); |
794 | for (j = 0; j < size; j++) { | 827 | for (j = 0; j < size; j++) { |
795 | put_child(t, left, j, inode->child[j]); | 828 | put_child(t, left, j, rtnl_dereference(inode->child[j])); |
796 | put_child(t, right, j, inode->child[j + size]); | 829 | put_child(t, right, j, rtnl_dereference(inode->child[j + size])); |
797 | } | 830 | } |
798 | put_child(t, tn, 2*i, resize(t, left)); | 831 | put_child(t, tn, 2*i, resize(t, left)); |
799 | put_child(t, tn, 2*i+1, resize(t, right)); | 832 | put_child(t, tn, 2*i+1, resize(t, right)); |
@@ -803,18 +836,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
803 | tnode_free_safe(oldtnode); | 836 | tnode_free_safe(oldtnode); |
804 | return tn; | 837 | return tn; |
805 | nomem: | 838 | nomem: |
806 | { | 839 | tnode_clean_free(tn); |
807 | int size = tnode_child_length(tn); | 840 | return ERR_PTR(-ENOMEM); |
808 | int j; | ||
809 | |||
810 | for (j = 0; j < size; j++) | ||
811 | if (tn->child[j]) | ||
812 | tnode_free((struct tnode *)tn->child[j]); | ||
813 | |||
814 | tnode_free(tn); | ||
815 | |||
816 | return ERR_PTR(-ENOMEM); | ||
817 | } | ||
818 | } | 841 | } |
819 | 842 | ||
820 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 843 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
@@ -885,18 +908,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
885 | tnode_free_safe(oldtnode); | 908 | tnode_free_safe(oldtnode); |
886 | return tn; | 909 | return tn; |
887 | nomem: | 910 | nomem: |
888 | { | 911 | tnode_clean_free(tn); |
889 | int size = tnode_child_length(tn); | 912 | return ERR_PTR(-ENOMEM); |
890 | int j; | ||
891 | |||
892 | for (j = 0; j < size; j++) | ||
893 | if (tn->child[j]) | ||
894 | tnode_free((struct tnode *)tn->child[j]); | ||
895 | |||
896 | tnode_free(tn); | ||
897 | |||
898 | return ERR_PTR(-ENOMEM); | ||
899 | } | ||
900 | } | 913 | } |
901 | 914 | ||
902 | /* readside must use rcu_read_lock currently dump routines | 915 | /* readside must use rcu_read_lock currently dump routines |
@@ -1028,7 +1041,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1028 | t_key cindex; | 1041 | t_key cindex; |
1029 | 1042 | ||
1030 | pos = 0; | 1043 | pos = 0; |
1031 | n = t->trie; | 1044 | n = rtnl_dereference(t->trie); |
1032 | 1045 | ||
1033 | /* If we point to NULL, stop. Either the tree is empty and we should | 1046 | /* If we point to NULL, stop. Either the tree is empty and we should |
1034 | * just put a new leaf in if, or we have reached an empty child slot, | 1047 | * just put a new leaf in if, or we have reached an empty child slot, |
@@ -1314,6 +1327,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1314 | } | 1327 | } |
1315 | } | 1328 | } |
1316 | 1329 | ||
1330 | if (!plen) | ||
1331 | tb->tb_num_default++; | ||
1332 | |||
1317 | list_add_tail_rcu(&new_fa->fa_list, | 1333 | list_add_tail_rcu(&new_fa->fa_list, |
1318 | (fa ? &fa->fa_list : fa_head)); | 1334 | (fa ? &fa->fa_list : fa_head)); |
1319 | 1335 | ||
@@ -1679,6 +1695,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1679 | 1695 | ||
1680 | list_del_rcu(&fa->fa_list); | 1696 | list_del_rcu(&fa->fa_list); |
1681 | 1697 | ||
1698 | if (!plen) | ||
1699 | tb->tb_num_default--; | ||
1700 | |||
1682 | if (list_empty(fa_head)) { | 1701 | if (list_empty(fa_head)) { |
1683 | hlist_del_rcu(&li->hlist); | 1702 | hlist_del_rcu(&li->hlist); |
1684 | free_leaf_info(li); | 1703 | free_leaf_info(li); |
@@ -1751,7 +1770,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) | |||
1751 | continue; | 1770 | continue; |
1752 | 1771 | ||
1753 | if (IS_LEAF(c)) { | 1772 | if (IS_LEAF(c)) { |
1754 | prefetch(p->child[idx]); | 1773 | prefetch(rcu_dereference_rtnl(p->child[idx])); |
1755 | return (struct leaf *) c; | 1774 | return (struct leaf *) c; |
1756 | } | 1775 | } |
1757 | 1776 | ||
@@ -1969,6 +1988,7 @@ struct fib_table *fib_trie_table(u32 id) | |||
1969 | 1988 | ||
1970 | tb->tb_id = id; | 1989 | tb->tb_id = id; |
1971 | tb->tb_default = -1; | 1990 | tb->tb_default = -1; |
1991 | tb->tb_num_default = 0; | ||
1972 | 1992 | ||
1973 | t = (struct trie *) tb->tb_data; | 1993 | t = (struct trie *) tb->tb_data; |
1974 | memset(t, 0, sizeof(*t)); | 1994 | memset(t, 0, sizeof(*t)); |
@@ -2264,7 +2284,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2264 | 2284 | ||
2265 | /* walk rest of this hash chain */ | 2285 | /* walk rest of this hash chain */ |
2266 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); | 2286 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); |
2267 | while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { | 2287 | while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { |
2268 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); | 2288 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); |
2269 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | 2289 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); |
2270 | if (n) | 2290 | if (n) |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e5f8a71d3a2a..5395e45dcce6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -83,6 +83,7 @@ | |||
83 | #include <net/tcp.h> | 83 | #include <net/tcp.h> |
84 | #include <net/udp.h> | 84 | #include <net/udp.h> |
85 | #include <net/raw.h> | 85 | #include <net/raw.h> |
86 | #include <net/ping.h> | ||
86 | #include <linux/skbuff.h> | 87 | #include <linux/skbuff.h> |
87 | #include <net/sock.h> | 88 | #include <net/sock.h> |
88 | #include <linux/errno.h> | 89 | #include <linux/errno.h> |
@@ -108,8 +109,7 @@ struct icmp_bxm { | |||
108 | __be32 times[3]; | 109 | __be32 times[3]; |
109 | } data; | 110 | } data; |
110 | int head_len; | 111 | int head_len; |
111 | struct ip_options replyopts; | 112 | struct ip_options_data replyopts; |
112 | unsigned char optbuf[40]; | ||
113 | }; | 113 | }; |
114 | 114 | ||
115 | /* An array of errno for error messages from dest unreach. */ | 115 | /* An array of errno for error messages from dest unreach. */ |
@@ -234,7 +234,7 @@ static inline void icmp_xmit_unlock(struct sock *sk) | |||
234 | */ | 234 | */ |
235 | 235 | ||
236 | static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | 236 | static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
237 | int type, int code) | 237 | struct flowi4 *fl4, int type, int code) |
238 | { | 238 | { |
239 | struct dst_entry *dst = &rt->dst; | 239 | struct dst_entry *dst = &rt->dst; |
240 | bool rc = true; | 240 | bool rc = true; |
@@ -253,7 +253,7 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
253 | /* Limit if icmp type is enabled in ratemask. */ | 253 | /* Limit if icmp type is enabled in ratemask. */ |
254 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { | 254 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
255 | if (!rt->peer) | 255 | if (!rt->peer) |
256 | rt_bind_peer(rt, 1); | 256 | rt_bind_peer(rt, fl4->daddr, 1); |
257 | rc = inet_peer_xrlim_allow(rt->peer, | 257 | rc = inet_peer_xrlim_allow(rt->peer, |
258 | net->ipv4.sysctl_icmp_ratelimit); | 258 | net->ipv4.sysctl_icmp_ratelimit); |
259 | } | 259 | } |
@@ -291,13 +291,14 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, | |||
291 | } | 291 | } |
292 | 292 | ||
293 | static void icmp_push_reply(struct icmp_bxm *icmp_param, | 293 | static void icmp_push_reply(struct icmp_bxm *icmp_param, |
294 | struct flowi4 *fl4, | ||
294 | struct ipcm_cookie *ipc, struct rtable **rt) | 295 | struct ipcm_cookie *ipc, struct rtable **rt) |
295 | { | 296 | { |
296 | struct sock *sk; | 297 | struct sock *sk; |
297 | struct sk_buff *skb; | 298 | struct sk_buff *skb; |
298 | 299 | ||
299 | sk = icmp_sk(dev_net((*rt)->dst.dev)); | 300 | sk = icmp_sk(dev_net((*rt)->dst.dev)); |
300 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, | 301 | if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, |
301 | icmp_param->data_len+icmp_param->head_len, | 302 | icmp_param->data_len+icmp_param->head_len, |
302 | icmp_param->head_len, | 303 | icmp_param->head_len, |
303 | ipc, rt, MSG_DONTWAIT) < 0) { | 304 | ipc, rt, MSG_DONTWAIT) < 0) { |
@@ -316,7 +317,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
316 | icmp_param->head_len, csum); | 317 | icmp_param->head_len, csum); |
317 | icmph->checksum = csum_fold(csum); | 318 | icmph->checksum = csum_fold(csum); |
318 | skb->ip_summed = CHECKSUM_NONE; | 319 | skb->ip_summed = CHECKSUM_NONE; |
319 | ip_push_pending_frames(sk); | 320 | ip_push_pending_frames(sk, fl4); |
320 | } | 321 | } |
321 | } | 322 | } |
322 | 323 | ||
@@ -329,11 +330,12 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
329 | struct ipcm_cookie ipc; | 330 | struct ipcm_cookie ipc; |
330 | struct rtable *rt = skb_rtable(skb); | 331 | struct rtable *rt = skb_rtable(skb); |
331 | struct net *net = dev_net(rt->dst.dev); | 332 | struct net *net = dev_net(rt->dst.dev); |
333 | struct flowi4 fl4; | ||
332 | struct sock *sk; | 334 | struct sock *sk; |
333 | struct inet_sock *inet; | 335 | struct inet_sock *inet; |
334 | __be32 daddr; | 336 | __be32 daddr; |
335 | 337 | ||
336 | if (ip_options_echo(&icmp_param->replyopts, skb)) | 338 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) |
337 | return; | 339 | return; |
338 | 340 | ||
339 | sk = icmp_xmit_lock(net); | 341 | sk = icmp_xmit_lock(net); |
@@ -344,65 +346,60 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
344 | icmp_param->data.icmph.checksum = 0; | 346 | icmp_param->data.icmph.checksum = 0; |
345 | 347 | ||
346 | inet->tos = ip_hdr(skb)->tos; | 348 | inet->tos = ip_hdr(skb)->tos; |
347 | daddr = ipc.addr = rt->rt_src; | 349 | daddr = ipc.addr = ip_hdr(skb)->saddr; |
348 | ipc.opt = NULL; | 350 | ipc.opt = NULL; |
349 | ipc.tx_flags = 0; | 351 | ipc.tx_flags = 0; |
350 | if (icmp_param->replyopts.optlen) { | 352 | if (icmp_param->replyopts.opt.opt.optlen) { |
351 | ipc.opt = &icmp_param->replyopts; | 353 | ipc.opt = &icmp_param->replyopts.opt; |
352 | if (ipc.opt->srr) | 354 | if (ipc.opt->opt.srr) |
353 | daddr = icmp_param->replyopts.faddr; | 355 | daddr = icmp_param->replyopts.opt.opt.faddr; |
354 | } | 356 | } |
355 | { | 357 | memset(&fl4, 0, sizeof(fl4)); |
356 | struct flowi4 fl4 = { | 358 | fl4.daddr = daddr; |
357 | .daddr = daddr, | 359 | fl4.saddr = rt->rt_spec_dst; |
358 | .saddr = rt->rt_spec_dst, | 360 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); |
359 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), | 361 | fl4.flowi4_proto = IPPROTO_ICMP; |
360 | .flowi4_proto = IPPROTO_ICMP, | 362 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
361 | }; | 363 | rt = ip_route_output_key(net, &fl4); |
362 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 364 | if (IS_ERR(rt)) |
363 | rt = ip_route_output_key(net, &fl4); | 365 | goto out_unlock; |
364 | if (IS_ERR(rt)) | 366 | if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type, |
365 | goto out_unlock; | ||
366 | } | ||
367 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, | ||
368 | icmp_param->data.icmph.code)) | 367 | icmp_param->data.icmph.code)) |
369 | icmp_push_reply(icmp_param, &ipc, &rt); | 368 | icmp_push_reply(icmp_param, &fl4, &ipc, &rt); |
370 | ip_rt_put(rt); | 369 | ip_rt_put(rt); |
371 | out_unlock: | 370 | out_unlock: |
372 | icmp_xmit_unlock(sk); | 371 | icmp_xmit_unlock(sk); |
373 | } | 372 | } |
374 | 373 | ||
375 | static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | 374 | static struct rtable *icmp_route_lookup(struct net *net, |
376 | struct iphdr *iph, | 375 | struct flowi4 *fl4, |
376 | struct sk_buff *skb_in, | ||
377 | const struct iphdr *iph, | ||
377 | __be32 saddr, u8 tos, | 378 | __be32 saddr, u8 tos, |
378 | int type, int code, | 379 | int type, int code, |
379 | struct icmp_bxm *param) | 380 | struct icmp_bxm *param) |
380 | { | 381 | { |
381 | struct flowi4 fl4 = { | ||
382 | .daddr = (param->replyopts.srr ? | ||
383 | param->replyopts.faddr : iph->saddr), | ||
384 | .saddr = saddr, | ||
385 | .flowi4_tos = RT_TOS(tos), | ||
386 | .flowi4_proto = IPPROTO_ICMP, | ||
387 | .fl4_icmp_type = type, | ||
388 | .fl4_icmp_code = code, | ||
389 | }; | ||
390 | struct rtable *rt, *rt2; | 382 | struct rtable *rt, *rt2; |
391 | int err; | 383 | int err; |
392 | 384 | ||
393 | security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); | 385 | memset(fl4, 0, sizeof(*fl4)); |
394 | rt = __ip_route_output_key(net, &fl4); | 386 | fl4->daddr = (param->replyopts.opt.opt.srr ? |
387 | param->replyopts.opt.opt.faddr : iph->saddr); | ||
388 | fl4->saddr = saddr; | ||
389 | fl4->flowi4_tos = RT_TOS(tos); | ||
390 | fl4->flowi4_proto = IPPROTO_ICMP; | ||
391 | fl4->fl4_icmp_type = type; | ||
392 | fl4->fl4_icmp_code = code; | ||
393 | security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); | ||
394 | rt = __ip_route_output_key(net, fl4); | ||
395 | if (IS_ERR(rt)) | 395 | if (IS_ERR(rt)) |
396 | return rt; | 396 | return rt; |
397 | 397 | ||
398 | /* No need to clone since we're just using its address. */ | 398 | /* No need to clone since we're just using its address. */ |
399 | rt2 = rt; | 399 | rt2 = rt; |
400 | 400 | ||
401 | if (!fl4.saddr) | ||
402 | fl4.saddr = rt->rt_src; | ||
403 | |||
404 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, | 401 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
405 | flowi4_to_flowi(&fl4), NULL, 0); | 402 | flowi4_to_flowi(fl4), NULL, 0); |
406 | if (!IS_ERR(rt)) { | 403 | if (!IS_ERR(rt)) { |
407 | if (rt != rt2) | 404 | if (rt != rt2) |
408 | return rt; | 405 | return rt; |
@@ -411,19 +408,19 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | |||
411 | } else | 408 | } else |
412 | return rt; | 409 | return rt; |
413 | 410 | ||
414 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); | 411 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET); |
415 | if (err) | 412 | if (err) |
416 | goto relookup_failed; | 413 | goto relookup_failed; |
417 | 414 | ||
418 | if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { | 415 | if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) { |
419 | rt2 = __ip_route_output_key(net, &fl4); | 416 | rt2 = __ip_route_output_key(net, fl4); |
420 | if (IS_ERR(rt2)) | 417 | if (IS_ERR(rt2)) |
421 | err = PTR_ERR(rt2); | 418 | err = PTR_ERR(rt2); |
422 | } else { | 419 | } else { |
423 | struct flowi4 fl4_2 = {}; | 420 | struct flowi4 fl4_2 = {}; |
424 | unsigned long orefdst; | 421 | unsigned long orefdst; |
425 | 422 | ||
426 | fl4_2.daddr = fl4.saddr; | 423 | fl4_2.daddr = fl4->saddr; |
427 | rt2 = ip_route_output_key(net, &fl4_2); | 424 | rt2 = ip_route_output_key(net, &fl4_2); |
428 | if (IS_ERR(rt2)) { | 425 | if (IS_ERR(rt2)) { |
429 | err = PTR_ERR(rt2); | 426 | err = PTR_ERR(rt2); |
@@ -431,7 +428,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | |||
431 | } | 428 | } |
432 | /* Ugh! */ | 429 | /* Ugh! */ |
433 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | 430 | orefdst = skb_in->_skb_refdst; /* save old refdst */ |
434 | err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, | 431 | err = ip_route_input(skb_in, fl4->daddr, fl4->saddr, |
435 | RT_TOS(tos), rt2->dst.dev); | 432 | RT_TOS(tos), rt2->dst.dev); |
436 | 433 | ||
437 | dst_release(&rt2->dst); | 434 | dst_release(&rt2->dst); |
@@ -443,7 +440,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | |||
443 | goto relookup_failed; | 440 | goto relookup_failed; |
444 | 441 | ||
445 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, | 442 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, |
446 | flowi4_to_flowi(&fl4), NULL, | 443 | flowi4_to_flowi(fl4), NULL, |
447 | XFRM_LOOKUP_ICMP); | 444 | XFRM_LOOKUP_ICMP); |
448 | if (!IS_ERR(rt2)) { | 445 | if (!IS_ERR(rt2)) { |
449 | dst_release(&rt->dst); | 446 | dst_release(&rt->dst); |
@@ -482,6 +479,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
482 | struct icmp_bxm icmp_param; | 479 | struct icmp_bxm icmp_param; |
483 | struct rtable *rt = skb_rtable(skb_in); | 480 | struct rtable *rt = skb_rtable(skb_in); |
484 | struct ipcm_cookie ipc; | 481 | struct ipcm_cookie ipc; |
482 | struct flowi4 fl4; | ||
485 | __be32 saddr; | 483 | __be32 saddr; |
486 | u8 tos; | 484 | u8 tos; |
487 | struct net *net; | 485 | struct net *net; |
@@ -581,7 +579,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
581 | IPTOS_PREC_INTERNETCONTROL) : | 579 | IPTOS_PREC_INTERNETCONTROL) : |
582 | iph->tos; | 580 | iph->tos; |
583 | 581 | ||
584 | if (ip_options_echo(&icmp_param.replyopts, skb_in)) | 582 | if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) |
585 | goto out_unlock; | 583 | goto out_unlock; |
586 | 584 | ||
587 | 585 | ||
@@ -597,15 +595,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
597 | icmp_param.offset = skb_network_offset(skb_in); | 595 | icmp_param.offset = skb_network_offset(skb_in); |
598 | inet_sk(sk)->tos = tos; | 596 | inet_sk(sk)->tos = tos; |
599 | ipc.addr = iph->saddr; | 597 | ipc.addr = iph->saddr; |
600 | ipc.opt = &icmp_param.replyopts; | 598 | ipc.opt = &icmp_param.replyopts.opt; |
601 | ipc.tx_flags = 0; | 599 | ipc.tx_flags = 0; |
602 | 600 | ||
603 | rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, | 601 | rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, |
604 | type, code, &icmp_param); | 602 | type, code, &icmp_param); |
605 | if (IS_ERR(rt)) | 603 | if (IS_ERR(rt)) |
606 | goto out_unlock; | 604 | goto out_unlock; |
607 | 605 | ||
608 | if (!icmpv4_xrlim_allow(net, rt, type, code)) | 606 | if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) |
609 | goto ende; | 607 | goto ende; |
610 | 608 | ||
611 | /* RFC says return as much as we can without exceeding 576 bytes. */ | 609 | /* RFC says return as much as we can without exceeding 576 bytes. */ |
@@ -613,7 +611,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
613 | room = dst_mtu(&rt->dst); | 611 | room = dst_mtu(&rt->dst); |
614 | if (room > 576) | 612 | if (room > 576) |
615 | room = 576; | 613 | room = 576; |
616 | room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; | 614 | room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; |
617 | room -= sizeof(struct icmphdr); | 615 | room -= sizeof(struct icmphdr); |
618 | 616 | ||
619 | icmp_param.data_len = skb_in->len - icmp_param.offset; | 617 | icmp_param.data_len = skb_in->len - icmp_param.offset; |
@@ -621,7 +619,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
621 | icmp_param.data_len = room; | 619 | icmp_param.data_len = room; |
622 | icmp_param.head_len = sizeof(struct icmphdr); | 620 | icmp_param.head_len = sizeof(struct icmphdr); |
623 | 621 | ||
624 | icmp_push_reply(&icmp_param, &ipc, &rt); | 622 | icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); |
625 | ende: | 623 | ende: |
626 | ip_rt_put(rt); | 624 | ip_rt_put(rt); |
627 | out_unlock: | 625 | out_unlock: |
@@ -637,7 +635,7 @@ EXPORT_SYMBOL(icmp_send); | |||
637 | 635 | ||
638 | static void icmp_unreach(struct sk_buff *skb) | 636 | static void icmp_unreach(struct sk_buff *skb) |
639 | { | 637 | { |
640 | struct iphdr *iph; | 638 | const struct iphdr *iph; |
641 | struct icmphdr *icmph; | 639 | struct icmphdr *icmph; |
642 | int hash, protocol; | 640 | int hash, protocol; |
643 | const struct net_protocol *ipprot; | 641 | const struct net_protocol *ipprot; |
@@ -656,7 +654,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
656 | goto out_err; | 654 | goto out_err; |
657 | 655 | ||
658 | icmph = icmp_hdr(skb); | 656 | icmph = icmp_hdr(skb); |
659 | iph = (struct iphdr *)skb->data; | 657 | iph = (const struct iphdr *)skb->data; |
660 | 658 | ||
661 | if (iph->ihl < 5) /* Mangled header, drop. */ | 659 | if (iph->ihl < 5) /* Mangled header, drop. */ |
662 | goto out_err; | 660 | goto out_err; |
@@ -729,7 +727,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
729 | if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) | 727 | if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) |
730 | goto out; | 728 | goto out; |
731 | 729 | ||
732 | iph = (struct iphdr *)skb->data; | 730 | iph = (const struct iphdr *)skb->data; |
733 | protocol = iph->protocol; | 731 | protocol = iph->protocol; |
734 | 732 | ||
735 | /* | 733 | /* |
@@ -758,7 +756,7 @@ out_err: | |||
758 | 756 | ||
759 | static void icmp_redirect(struct sk_buff *skb) | 757 | static void icmp_redirect(struct sk_buff *skb) |
760 | { | 758 | { |
761 | struct iphdr *iph; | 759 | const struct iphdr *iph; |
762 | 760 | ||
763 | if (skb->len < sizeof(struct iphdr)) | 761 | if (skb->len < sizeof(struct iphdr)) |
764 | goto out_err; | 762 | goto out_err; |
@@ -769,7 +767,7 @@ static void icmp_redirect(struct sk_buff *skb) | |||
769 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | 767 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) |
770 | goto out; | 768 | goto out; |
771 | 769 | ||
772 | iph = (struct iphdr *)skb->data; | 770 | iph = (const struct iphdr *)skb->data; |
773 | 771 | ||
774 | switch (icmp_hdr(skb)->code & 7) { | 772 | switch (icmp_hdr(skb)->code & 7) { |
775 | case ICMP_REDIR_NET: | 773 | case ICMP_REDIR_NET: |
@@ -784,6 +782,15 @@ static void icmp_redirect(struct sk_buff *skb) | |||
784 | iph->saddr, skb->dev); | 782 | iph->saddr, skb->dev); |
785 | break; | 783 | break; |
786 | } | 784 | } |
785 | |||
786 | /* Ping wants to see redirects. | ||
787 | * Let's pretend they are errors of sorts... */ | ||
788 | if (iph->protocol == IPPROTO_ICMP && | ||
789 | iph->ihl >= 5 && | ||
790 | pskb_may_pull(skb, (iph->ihl<<2)+8)) { | ||
791 | ping_err(skb, icmp_hdr(skb)->un.gateway); | ||
792 | } | ||
793 | |||
787 | out: | 794 | out: |
788 | return; | 795 | return; |
789 | out_err: | 796 | out_err: |
@@ -933,12 +940,12 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
933 | BUG_ON(mp == NULL); | 940 | BUG_ON(mp == NULL); |
934 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { | 941 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { |
935 | if (*mp == ifa->ifa_mask && | 942 | if (*mp == ifa->ifa_mask && |
936 | inet_ifa_match(rt->rt_src, ifa)) | 943 | inet_ifa_match(ip_hdr(skb)->saddr, ifa)) |
937 | break; | 944 | break; |
938 | } | 945 | } |
939 | if (!ifa && net_ratelimit()) { | 946 | if (!ifa && net_ratelimit()) { |
940 | printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", | 947 | printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", |
941 | mp, dev->name, &rt->rt_src); | 948 | mp, dev->name, &ip_hdr(skb)->saddr); |
942 | } | 949 | } |
943 | } | 950 | } |
944 | } | 951 | } |
@@ -1044,7 +1051,7 @@ error: | |||
1044 | */ | 1051 | */ |
1045 | static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { | 1052 | static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { |
1046 | [ICMP_ECHOREPLY] = { | 1053 | [ICMP_ECHOREPLY] = { |
1047 | .handler = icmp_discard, | 1054 | .handler = ping_rcv, |
1048 | }, | 1055 | }, |
1049 | [1] = { | 1056 | [1] = { |
1050 | .handler = icmp_discard, | 1057 | .handler = icmp_discard, |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8f62d66d0857..672e476c8c8a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -303,6 +303,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
303 | struct iphdr *pip; | 303 | struct iphdr *pip; |
304 | struct igmpv3_report *pig; | 304 | struct igmpv3_report *pig; |
305 | struct net *net = dev_net(dev); | 305 | struct net *net = dev_net(dev); |
306 | struct flowi4 fl4; | ||
306 | 307 | ||
307 | while (1) { | 308 | while (1) { |
308 | skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), | 309 | skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), |
@@ -315,18 +316,13 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
315 | } | 316 | } |
316 | igmp_skb_size(skb) = size; | 317 | igmp_skb_size(skb) = size; |
317 | 318 | ||
318 | rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, | 319 | rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, |
319 | 0, 0, | 320 | 0, 0, |
320 | IPPROTO_IGMP, 0, dev->ifindex); | 321 | IPPROTO_IGMP, 0, dev->ifindex); |
321 | if (IS_ERR(rt)) { | 322 | if (IS_ERR(rt)) { |
322 | kfree_skb(skb); | 323 | kfree_skb(skb); |
323 | return NULL; | 324 | return NULL; |
324 | } | 325 | } |
325 | if (rt->rt_src == 0) { | ||
326 | kfree_skb(skb); | ||
327 | ip_rt_put(rt); | ||
328 | return NULL; | ||
329 | } | ||
330 | 326 | ||
331 | skb_dst_set(skb, &rt->dst); | 327 | skb_dst_set(skb, &rt->dst); |
332 | skb->dev = dev; | 328 | skb->dev = dev; |
@@ -342,8 +338,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
342 | pip->tos = 0xc0; | 338 | pip->tos = 0xc0; |
343 | pip->frag_off = htons(IP_DF); | 339 | pip->frag_off = htons(IP_DF); |
344 | pip->ttl = 1; | 340 | pip->ttl = 1; |
345 | pip->daddr = rt->rt_dst; | 341 | pip->daddr = fl4.daddr; |
346 | pip->saddr = rt->rt_src; | 342 | pip->saddr = fl4.saddr; |
347 | pip->protocol = IPPROTO_IGMP; | 343 | pip->protocol = IPPROTO_IGMP; |
348 | pip->tot_len = 0; /* filled in later */ | 344 | pip->tot_len = 0; /* filled in later */ |
349 | ip_select_ident(pip, &rt->dst, NULL); | 345 | ip_select_ident(pip, &rt->dst, NULL); |
@@ -649,6 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
649 | struct net_device *dev = in_dev->dev; | 645 | struct net_device *dev = in_dev->dev; |
650 | struct net *net = dev_net(dev); | 646 | struct net *net = dev_net(dev); |
651 | __be32 group = pmc ? pmc->multiaddr : 0; | 647 | __be32 group = pmc ? pmc->multiaddr : 0; |
648 | struct flowi4 fl4; | ||
652 | __be32 dst; | 649 | __be32 dst; |
653 | 650 | ||
654 | if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) | 651 | if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) |
@@ -658,17 +655,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
658 | else | 655 | else |
659 | dst = group; | 656 | dst = group; |
660 | 657 | ||
661 | rt = ip_route_output_ports(net, NULL, dst, 0, | 658 | rt = ip_route_output_ports(net, &fl4, NULL, dst, 0, |
662 | 0, 0, | 659 | 0, 0, |
663 | IPPROTO_IGMP, 0, dev->ifindex); | 660 | IPPROTO_IGMP, 0, dev->ifindex); |
664 | if (IS_ERR(rt)) | 661 | if (IS_ERR(rt)) |
665 | return -1; | 662 | return -1; |
666 | 663 | ||
667 | if (rt->rt_src == 0) { | ||
668 | ip_rt_put(rt); | ||
669 | return -1; | ||
670 | } | ||
671 | |||
672 | skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); | 664 | skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); |
673 | if (skb == NULL) { | 665 | if (skb == NULL) { |
674 | ip_rt_put(rt); | 666 | ip_rt_put(rt); |
@@ -689,7 +681,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
689 | iph->frag_off = htons(IP_DF); | 681 | iph->frag_off = htons(IP_DF); |
690 | iph->ttl = 1; | 682 | iph->ttl = 1; |
691 | iph->daddr = dst; | 683 | iph->daddr = dst; |
692 | iph->saddr = rt->rt_src; | 684 | iph->saddr = fl4.saddr; |
693 | iph->protocol = IPPROTO_IGMP; | 685 | iph->protocol = IPPROTO_IGMP; |
694 | ip_select_ident(iph, &rt->dst, NULL); | 686 | ip_select_ident(iph, &rt->dst, NULL); |
695 | ((u8*)&iph[1])[0] = IPOPT_RA; | 687 | ((u8*)&iph[1])[0] = IPOPT_RA; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 38f23e721b80..61fac4cabc78 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -350,30 +350,24 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | |||
350 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | 350 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); |
351 | 351 | ||
352 | struct dst_entry *inet_csk_route_req(struct sock *sk, | 352 | struct dst_entry *inet_csk_route_req(struct sock *sk, |
353 | struct flowi4 *fl4, | ||
353 | const struct request_sock *req) | 354 | const struct request_sock *req) |
354 | { | 355 | { |
355 | struct rtable *rt; | 356 | struct rtable *rt; |
356 | const struct inet_request_sock *ireq = inet_rsk(req); | 357 | const struct inet_request_sock *ireq = inet_rsk(req); |
357 | struct ip_options *opt = inet_rsk(req)->opt; | 358 | struct ip_options_rcu *opt = inet_rsk(req)->opt; |
358 | struct flowi4 fl4 = { | ||
359 | .flowi4_oif = sk->sk_bound_dev_if, | ||
360 | .flowi4_mark = sk->sk_mark, | ||
361 | .daddr = ((opt && opt->srr) ? | ||
362 | opt->faddr : ireq->rmt_addr), | ||
363 | .saddr = ireq->loc_addr, | ||
364 | .flowi4_tos = RT_CONN_FLAGS(sk), | ||
365 | .flowi4_proto = sk->sk_protocol, | ||
366 | .flowi4_flags = inet_sk_flowi_flags(sk), | ||
367 | .fl4_sport = inet_sk(sk)->inet_sport, | ||
368 | .fl4_dport = ireq->rmt_port, | ||
369 | }; | ||
370 | struct net *net = sock_net(sk); | 359 | struct net *net = sock_net(sk); |
371 | 360 | ||
372 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | 361 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, |
373 | rt = ip_route_output_flow(net, &fl4, sk); | 362 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
363 | sk->sk_protocol, inet_sk_flowi_flags(sk), | ||
364 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, | ||
365 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); | ||
366 | security_req_classify_flow(req, flowi4_to_flowi(fl4)); | ||
367 | rt = ip_route_output_flow(net, fl4, sk); | ||
374 | if (IS_ERR(rt)) | 368 | if (IS_ERR(rt)) |
375 | goto no_route; | 369 | goto no_route; |
376 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 370 | if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) |
377 | goto route_err; | 371 | goto route_err; |
378 | return &rt->dst; | 372 | return &rt->dst; |
379 | 373 | ||
@@ -385,6 +379,39 @@ no_route: | |||
385 | } | 379 | } |
386 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | 380 | EXPORT_SYMBOL_GPL(inet_csk_route_req); |
387 | 381 | ||
382 | struct dst_entry *inet_csk_route_child_sock(struct sock *sk, | ||
383 | struct sock *newsk, | ||
384 | const struct request_sock *req) | ||
385 | { | ||
386 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
387 | struct inet_sock *newinet = inet_sk(newsk); | ||
388 | struct ip_options_rcu *opt = ireq->opt; | ||
389 | struct net *net = sock_net(sk); | ||
390 | struct flowi4 *fl4; | ||
391 | struct rtable *rt; | ||
392 | |||
393 | fl4 = &newinet->cork.fl.u.ip4; | ||
394 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, | ||
395 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | ||
396 | sk->sk_protocol, inet_sk_flowi_flags(sk), | ||
397 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, | ||
398 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); | ||
399 | security_req_classify_flow(req, flowi4_to_flowi(fl4)); | ||
400 | rt = ip_route_output_flow(net, fl4, sk); | ||
401 | if (IS_ERR(rt)) | ||
402 | goto no_route; | ||
403 | if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) | ||
404 | goto route_err; | ||
405 | return &rt->dst; | ||
406 | |||
407 | route_err: | ||
408 | ip_rt_put(rt); | ||
409 | no_route: | ||
410 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | ||
411 | return NULL; | ||
412 | } | ||
413 | EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); | ||
414 | |||
388 | static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, | 415 | static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, |
389 | const u32 rnd, const u32 synq_hsize) | 416 | const u32 rnd, const u32 synq_hsize) |
390 | { | 417 | { |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2ada17129fce..6ffe94ca5bc9 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -124,7 +124,7 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
124 | 124 | ||
125 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 125 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
126 | if (r->idiag_family == AF_INET6) { | 126 | if (r->idiag_family == AF_INET6) { |
127 | struct ipv6_pinfo *np = inet6_sk(sk); | 127 | const struct ipv6_pinfo *np = inet6_sk(sk); |
128 | 128 | ||
129 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | 129 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, |
130 | &np->rcv_saddr); | 130 | &np->rcv_saddr); |
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 47038cb6c138..85a0f75dae64 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c | |||
@@ -51,8 +51,8 @@ MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); | |||
51 | * Basic tcp checks whether packet is suitable for LRO | 51 | * Basic tcp checks whether packet is suitable for LRO |
52 | */ | 52 | */ |
53 | 53 | ||
54 | static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, | 54 | static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph, |
55 | int len, struct net_lro_desc *lro_desc) | 55 | int len, const struct net_lro_desc *lro_desc) |
56 | { | 56 | { |
57 | /* check ip header: don't aggregate padded frames */ | 57 | /* check ip header: don't aggregate padded frames */ |
58 | if (ntohs(iph->tot_len) != len) | 58 | if (ntohs(iph->tot_len) != len) |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 99461f09320f..3b34d1c86270 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb) | |||
84 | 84 | ||
85 | rt = skb_rtable(skb); | 85 | rt = skb_rtable(skb); |
86 | 86 | ||
87 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 87 | if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway) |
88 | goto sr_failed; | 88 | goto sr_failed; |
89 | 89 | ||
90 | if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && | 90 | if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b1d282f11be7..0ad6035f6366 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -77,22 +77,40 @@ struct ipq { | |||
77 | struct inet_peer *peer; | 77 | struct inet_peer *peer; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | #define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */ | 80 | /* RFC 3168 support : |
81 | #define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */ | 81 | * We want to check ECN values of all fragments, do detect invalid combinations. |
82 | * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. | ||
83 | */ | ||
84 | #define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ | ||
85 | #define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ | ||
86 | #define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ | ||
87 | #define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ | ||
82 | 88 | ||
83 | static inline u8 ip4_frag_ecn(u8 tos) | 89 | static inline u8 ip4_frag_ecn(u8 tos) |
84 | { | 90 | { |
85 | tos = (tos & INET_ECN_MASK) + 1; | 91 | return 1 << (tos & INET_ECN_MASK); |
86 | /* | ||
87 | * After the last operation we have (in binary): | ||
88 | * INET_ECN_NOT_ECT => 001 | ||
89 | * INET_ECN_ECT_1 => 010 | ||
90 | * INET_ECN_ECT_0 => 011 | ||
91 | * INET_ECN_CE => 100 | ||
92 | */ | ||
93 | return (tos & 2) ? 0 : tos; | ||
94 | } | 92 | } |
95 | 93 | ||
94 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | ||
95 | * Value : 0xff if frame should be dropped. | ||
96 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field | ||
97 | */ | ||
98 | static const u8 ip4_frag_ecn_table[16] = { | ||
99 | /* at least one fragment had CE, and others ECT_0 or ECT_1 */ | ||
100 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, | ||
101 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | ||
102 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | ||
103 | |||
104 | /* invalid combinations : drop frame */ | ||
105 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, | ||
106 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, | ||
107 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, | ||
108 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | ||
109 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, | ||
110 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, | ||
111 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | ||
112 | }; | ||
113 | |||
96 | static struct inet_frags ip4_frags; | 114 | static struct inet_frags ip4_frags; |
97 | 115 | ||
98 | int ip_frag_nqueues(struct net *net) | 116 | int ip_frag_nqueues(struct net *net) |
@@ -524,9 +542,15 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
524 | int len; | 542 | int len; |
525 | int ihlen; | 543 | int ihlen; |
526 | int err; | 544 | int err; |
545 | u8 ecn; | ||
527 | 546 | ||
528 | ipq_kill(qp); | 547 | ipq_kill(qp); |
529 | 548 | ||
549 | ecn = ip4_frag_ecn_table[qp->ecn]; | ||
550 | if (unlikely(ecn == 0xff)) { | ||
551 | err = -EINVAL; | ||
552 | goto out_fail; | ||
553 | } | ||
530 | /* Make the one we just received the head. */ | 554 | /* Make the one we just received the head. */ |
531 | if (prev) { | 555 | if (prev) { |
532 | head = prev->next; | 556 | head = prev->next; |
@@ -605,17 +629,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
605 | iph = ip_hdr(head); | 629 | iph = ip_hdr(head); |
606 | iph->frag_off = 0; | 630 | iph->frag_off = 0; |
607 | iph->tot_len = htons(len); | 631 | iph->tot_len = htons(len); |
608 | /* RFC3168 5.3 Fragmentation support | 632 | iph->tos |= ecn; |
609 | * If one fragment had INET_ECN_NOT_ECT, | ||
610 | * reassembled frame also has INET_ECN_NOT_ECT | ||
611 | * Elif one fragment had INET_ECN_CE | ||
612 | * reassembled frame also has INET_ECN_CE | ||
613 | */ | ||
614 | if (qp->ecn & IPFRAG_ECN_CLEAR) | ||
615 | iph->tos &= ~INET_ECN_MASK; | ||
616 | else if (qp->ecn & IPFRAG_ECN_SET_CE) | ||
617 | iph->tos |= INET_ECN_CE; | ||
618 | |||
619 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 633 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
620 | qp->q.fragments = NULL; | 634 | qp->q.fragments = NULL; |
621 | qp->q.fragments_tail = NULL; | 635 | qp->q.fragments_tail = NULL; |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index da5941f18c3c..8871067560db 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -413,11 +413,6 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, | |||
413 | 413 | ||
414 | dev_net_set(dev, net); | 414 | dev_net_set(dev, net); |
415 | 415 | ||
416 | if (strchr(name, '%')) { | ||
417 | if (dev_alloc_name(dev, name) < 0) | ||
418 | goto failed_free; | ||
419 | } | ||
420 | |||
421 | nt = netdev_priv(dev); | 416 | nt = netdev_priv(dev); |
422 | nt->parms = *parms; | 417 | nt->parms = *parms; |
423 | dev->rtnl_link_ops = &ipgre_link_ops; | 418 | dev->rtnl_link_ops = &ipgre_link_ops; |
@@ -462,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
462 | by themself??? | 457 | by themself??? |
463 | */ | 458 | */ |
464 | 459 | ||
465 | struct iphdr *iph = (struct iphdr *)skb->data; | 460 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
466 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); | 461 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); |
467 | int grehlen = (iph->ihl<<2) + 4; | 462 | int grehlen = (iph->ihl<<2) + 4; |
468 | const int type = icmp_hdr(skb)->type; | 463 | const int type = icmp_hdr(skb)->type; |
@@ -534,7 +529,7 @@ out: | |||
534 | rcu_read_unlock(); | 529 | rcu_read_unlock(); |
535 | } | 530 | } |
536 | 531 | ||
537 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 532 | static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) |
538 | { | 533 | { |
539 | if (INET_ECN_is_ce(iph->tos)) { | 534 | if (INET_ECN_is_ce(iph->tos)) { |
540 | if (skb->protocol == htons(ETH_P_IP)) { | 535 | if (skb->protocol == htons(ETH_P_IP)) { |
@@ -546,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | |||
546 | } | 541 | } |
547 | 542 | ||
548 | static inline u8 | 543 | static inline u8 |
549 | ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) | 544 | ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) |
550 | { | 545 | { |
551 | u8 inner = 0; | 546 | u8 inner = 0; |
552 | if (skb->protocol == htons(ETH_P_IP)) | 547 | if (skb->protocol == htons(ETH_P_IP)) |
553 | inner = old_iph->tos; | 548 | inner = old_iph->tos; |
554 | else if (skb->protocol == htons(ETH_P_IPV6)) | 549 | else if (skb->protocol == htons(ETH_P_IPV6)) |
555 | inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | 550 | inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); |
556 | return INET_ECN_encapsulate(tos, inner); | 551 | return INET_ECN_encapsulate(tos, inner); |
557 | } | 552 | } |
558 | 553 | ||
559 | static int ipgre_rcv(struct sk_buff *skb) | 554 | static int ipgre_rcv(struct sk_buff *skb) |
560 | { | 555 | { |
561 | struct iphdr *iph; | 556 | const struct iphdr *iph; |
562 | u8 *h; | 557 | u8 *h; |
563 | __be16 flags; | 558 | __be16 flags; |
564 | __sum16 csum = 0; | 559 | __sum16 csum = 0; |
@@ -697,8 +692,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
697 | { | 692 | { |
698 | struct ip_tunnel *tunnel = netdev_priv(dev); | 693 | struct ip_tunnel *tunnel = netdev_priv(dev); |
699 | struct pcpu_tstats *tstats; | 694 | struct pcpu_tstats *tstats; |
700 | struct iphdr *old_iph = ip_hdr(skb); | 695 | const struct iphdr *old_iph = ip_hdr(skb); |
701 | struct iphdr *tiph; | 696 | const struct iphdr *tiph; |
697 | struct flowi4 fl4; | ||
702 | u8 tos; | 698 | u8 tos; |
703 | __be16 df; | 699 | __be16 df; |
704 | struct rtable *rt; /* Route to the other host */ | 700 | struct rtable *rt; /* Route to the other host */ |
@@ -714,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
714 | 710 | ||
715 | if (dev->header_ops && dev->type == ARPHRD_IPGRE) { | 711 | if (dev->header_ops && dev->type == ARPHRD_IPGRE) { |
716 | gre_hlen = 0; | 712 | gre_hlen = 0; |
717 | tiph = (struct iphdr *)skb->data; | 713 | tiph = (const struct iphdr *)skb->data; |
718 | } else { | 714 | } else { |
719 | gre_hlen = tunnel->hlen; | 715 | gre_hlen = tunnel->hlen; |
720 | tiph = &tunnel->parms.iph; | 716 | tiph = &tunnel->parms.iph; |
@@ -735,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
735 | } | 731 | } |
736 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 732 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
737 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 733 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
738 | struct in6_addr *addr6; | 734 | const struct in6_addr *addr6; |
739 | int addr_type; | 735 | int addr_type; |
740 | struct neighbour *neigh = skb_dst(skb)->neighbour; | 736 | struct neighbour *neigh = skb_dst(skb)->neighbour; |
741 | 737 | ||
742 | if (neigh == NULL) | 738 | if (neigh == NULL) |
743 | goto tx_error; | 739 | goto tx_error; |
744 | 740 | ||
745 | addr6 = (struct in6_addr *)&neigh->primary_key; | 741 | addr6 = (const struct in6_addr *)&neigh->primary_key; |
746 | addr_type = ipv6_addr_type(addr6); | 742 | addr_type = ipv6_addr_type(addr6); |
747 | 743 | ||
748 | if (addr_type == IPV6_ADDR_ANY) { | 744 | if (addr_type == IPV6_ADDR_ANY) { |
@@ -766,10 +762,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
766 | if (skb->protocol == htons(ETH_P_IP)) | 762 | if (skb->protocol == htons(ETH_P_IP)) |
767 | tos = old_iph->tos; | 763 | tos = old_iph->tos; |
768 | else if (skb->protocol == htons(ETH_P_IPV6)) | 764 | else if (skb->protocol == htons(ETH_P_IPV6)) |
769 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | 765 | tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); |
770 | } | 766 | } |
771 | 767 | ||
772 | rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, | 768 | rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, |
773 | tunnel->parms.o_key, RT_TOS(tos), | 769 | tunnel->parms.o_key, RT_TOS(tos), |
774 | tunnel->parms.link); | 770 | tunnel->parms.link); |
775 | if (IS_ERR(rt)) { | 771 | if (IS_ERR(rt)) { |
@@ -873,15 +869,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
873 | iph->frag_off = df; | 869 | iph->frag_off = df; |
874 | iph->protocol = IPPROTO_GRE; | 870 | iph->protocol = IPPROTO_GRE; |
875 | iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); | 871 | iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); |
876 | iph->daddr = rt->rt_dst; | 872 | iph->daddr = fl4.daddr; |
877 | iph->saddr = rt->rt_src; | 873 | iph->saddr = fl4.saddr; |
878 | 874 | ||
879 | if ((iph->ttl = tiph->ttl) == 0) { | 875 | if ((iph->ttl = tiph->ttl) == 0) { |
880 | if (skb->protocol == htons(ETH_P_IP)) | 876 | if (skb->protocol == htons(ETH_P_IP)) |
881 | iph->ttl = old_iph->ttl; | 877 | iph->ttl = old_iph->ttl; |
882 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 878 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
883 | else if (skb->protocol == htons(ETH_P_IPV6)) | 879 | else if (skb->protocol == htons(ETH_P_IPV6)) |
884 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; | 880 | iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; |
885 | #endif | 881 | #endif |
886 | else | 882 | else |
887 | iph->ttl = ip4_dst_hoplimit(&rt->dst); | 883 | iph->ttl = ip4_dst_hoplimit(&rt->dst); |
@@ -927,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
927 | { | 923 | { |
928 | struct net_device *tdev = NULL; | 924 | struct net_device *tdev = NULL; |
929 | struct ip_tunnel *tunnel; | 925 | struct ip_tunnel *tunnel; |
930 | struct iphdr *iph; | 926 | const struct iphdr *iph; |
931 | int hlen = LL_MAX_HEADER; | 927 | int hlen = LL_MAX_HEADER; |
932 | int mtu = ETH_DATA_LEN; | 928 | int mtu = ETH_DATA_LEN; |
933 | int addend = sizeof(struct iphdr) + 4; | 929 | int addend = sizeof(struct iphdr) + 4; |
@@ -938,12 +934,14 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
938 | /* Guess output device to choose reasonable mtu and needed_headroom */ | 934 | /* Guess output device to choose reasonable mtu and needed_headroom */ |
939 | 935 | ||
940 | if (iph->daddr) { | 936 | if (iph->daddr) { |
941 | struct rtable *rt = ip_route_output_gre(dev_net(dev), | 937 | struct flowi4 fl4; |
942 | iph->daddr, iph->saddr, | 938 | struct rtable *rt; |
943 | tunnel->parms.o_key, | 939 | |
944 | RT_TOS(iph->tos), | 940 | rt = ip_route_output_gre(dev_net(dev), &fl4, |
945 | tunnel->parms.link); | 941 | iph->daddr, iph->saddr, |
946 | 942 | tunnel->parms.o_key, | |
943 | RT_TOS(iph->tos), | ||
944 | tunnel->parms.link); | ||
947 | if (!IS_ERR(rt)) { | 945 | if (!IS_ERR(rt)) { |
948 | tdev = rt->dst.dev; | 946 | tdev = rt->dst.dev; |
949 | ip_rt_put(rt); | 947 | ip_rt_put(rt); |
@@ -1180,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | |||
1180 | 1178 | ||
1181 | static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) | 1179 | static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) |
1182 | { | 1180 | { |
1183 | struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); | 1181 | const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); |
1184 | memcpy(haddr, &iph->saddr, 4); | 1182 | memcpy(haddr, &iph->saddr, 4); |
1185 | return 4; | 1183 | return 4; |
1186 | } | 1184 | } |
@@ -1196,13 +1194,15 @@ static int ipgre_open(struct net_device *dev) | |||
1196 | struct ip_tunnel *t = netdev_priv(dev); | 1194 | struct ip_tunnel *t = netdev_priv(dev); |
1197 | 1195 | ||
1198 | if (ipv4_is_multicast(t->parms.iph.daddr)) { | 1196 | if (ipv4_is_multicast(t->parms.iph.daddr)) { |
1199 | struct rtable *rt = ip_route_output_gre(dev_net(dev), | 1197 | struct flowi4 fl4; |
1200 | t->parms.iph.daddr, | 1198 | struct rtable *rt; |
1201 | t->parms.iph.saddr, | 1199 | |
1202 | t->parms.o_key, | 1200 | rt = ip_route_output_gre(dev_net(dev), &fl4, |
1203 | RT_TOS(t->parms.iph.tos), | 1201 | t->parms.iph.daddr, |
1204 | t->parms.link); | 1202 | t->parms.iph.saddr, |
1205 | 1203 | t->parms.o_key, | |
1204 | RT_TOS(t->parms.iph.tos), | ||
1205 | t->parms.link); | ||
1206 | if (IS_ERR(rt)) | 1206 | if (IS_ERR(rt)) |
1207 | return -EADDRNOTAVAIL; | 1207 | return -EADDRNOTAVAIL; |
1208 | dev = rt->dst.dev; | 1208 | dev = rt->dst.dev; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d7b2b0987a3b..c8f48efc5fd3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb) | |||
268 | static inline int ip_rcv_options(struct sk_buff *skb) | 268 | static inline int ip_rcv_options(struct sk_buff *skb) |
269 | { | 269 | { |
270 | struct ip_options *opt; | 270 | struct ip_options *opt; |
271 | struct iphdr *iph; | 271 | const struct iphdr *iph; |
272 | struct net_device *dev = skb->dev; | 272 | struct net_device *dev = skb->dev; |
273 | 273 | ||
274 | /* It looks as overkill, because not all | 274 | /* It looks as overkill, because not all |
@@ -374,7 +374,7 @@ drop: | |||
374 | */ | 374 | */ |
375 | int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) | 375 | int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
376 | { | 376 | { |
377 | struct iphdr *iph; | 377 | const struct iphdr *iph; |
378 | u32 len; | 378 | u32 len; |
379 | 379 | ||
380 | /* When the interface is in promisc. mode, drop all the crap | 380 | /* When the interface is in promisc. mode, drop all the crap |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2391b24e8251..c3118e1cd3bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -36,8 +36,8 @@ | |||
36 | * saddr is address of outgoing interface. | 36 | * saddr is address of outgoing interface. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | 39 | void ip_options_build(struct sk_buff *skb, struct ip_options *opt, |
40 | __be32 daddr, struct rtable *rt, int is_frag) | 40 | __be32 daddr, struct rtable *rt, int is_frag) |
41 | { | 41 | { |
42 | unsigned char *iph = skb_network_header(skb); | 42 | unsigned char *iph = skb_network_header(skb); |
43 | 43 | ||
@@ -50,9 +50,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | |||
50 | 50 | ||
51 | if (!is_frag) { | 51 | if (!is_frag) { |
52 | if (opt->rr_needaddr) | 52 | if (opt->rr_needaddr) |
53 | ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); | 53 | ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt); |
54 | if (opt->ts_needaddr) | 54 | if (opt->ts_needaddr) |
55 | ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); | 55 | ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt); |
56 | if (opt->ts_needtime) { | 56 | if (opt->ts_needtime) { |
57 | struct timespec tv; | 57 | struct timespec tv; |
58 | __be32 midtime; | 58 | __be32 midtime; |
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | |||
83 | * NOTE: dopt cannot point to skb. | 83 | * NOTE: dopt cannot point to skb. |
84 | */ | 84 | */ |
85 | 85 | ||
86 | int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | 86 | int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) |
87 | { | 87 | { |
88 | struct ip_options *sopt; | 88 | const struct ip_options *sopt; |
89 | unsigned char *sptr, *dptr; | 89 | unsigned char *sptr, *dptr; |
90 | int soffset, doffset; | 90 | int soffset, doffset; |
91 | int optlen; | 91 | int optlen; |
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
95 | 95 | ||
96 | sopt = &(IPCB(skb)->opt); | 96 | sopt = &(IPCB(skb)->opt); |
97 | 97 | ||
98 | if (sopt->optlen == 0) { | 98 | if (sopt->optlen == 0) |
99 | dopt->optlen = 0; | ||
100 | return 0; | 99 | return 0; |
101 | } | ||
102 | 100 | ||
103 | sptr = skb_network_header(skb); | 101 | sptr = skb_network_header(skb); |
104 | dptr = dopt->__data; | 102 | dptr = dopt->__data; |
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
157 | dopt->optlen += optlen; | 155 | dopt->optlen += optlen; |
158 | } | 156 | } |
159 | if (sopt->srr) { | 157 | if (sopt->srr) { |
160 | unsigned char * start = sptr+sopt->srr; | 158 | unsigned char *start = sptr+sopt->srr; |
161 | __be32 faddr; | 159 | __be32 faddr; |
162 | 160 | ||
163 | optlen = start[1]; | 161 | optlen = start[1]; |
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt) | |||
499 | } | 497 | } |
500 | } | 498 | } |
501 | 499 | ||
502 | static struct ip_options *ip_options_get_alloc(const int optlen) | 500 | static struct ip_options_rcu *ip_options_get_alloc(const int optlen) |
503 | { | 501 | { |
504 | return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), | 502 | return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), |
505 | GFP_KERNEL); | 503 | GFP_KERNEL); |
506 | } | 504 | } |
507 | 505 | ||
508 | static int ip_options_get_finish(struct net *net, struct ip_options **optp, | 506 | static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, |
509 | struct ip_options *opt, int optlen) | 507 | struct ip_options_rcu *opt, int optlen) |
510 | { | 508 | { |
511 | while (optlen & 3) | 509 | while (optlen & 3) |
512 | opt->__data[optlen++] = IPOPT_END; | 510 | opt->opt.__data[optlen++] = IPOPT_END; |
513 | opt->optlen = optlen; | 511 | opt->opt.optlen = optlen; |
514 | if (optlen && ip_options_compile(net, opt, NULL)) { | 512 | if (optlen && ip_options_compile(net, &opt->opt, NULL)) { |
515 | kfree(opt); | 513 | kfree(opt); |
516 | return -EINVAL; | 514 | return -EINVAL; |
517 | } | 515 | } |
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp, | |||
520 | return 0; | 518 | return 0; |
521 | } | 519 | } |
522 | 520 | ||
523 | int ip_options_get_from_user(struct net *net, struct ip_options **optp, | 521 | int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, |
524 | unsigned char __user *data, int optlen) | 522 | unsigned char __user *data, int optlen) |
525 | { | 523 | { |
526 | struct ip_options *opt = ip_options_get_alloc(optlen); | 524 | struct ip_options_rcu *opt = ip_options_get_alloc(optlen); |
527 | 525 | ||
528 | if (!opt) | 526 | if (!opt) |
529 | return -ENOMEM; | 527 | return -ENOMEM; |
530 | if (optlen && copy_from_user(opt->__data, data, optlen)) { | 528 | if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { |
531 | kfree(opt); | 529 | kfree(opt); |
532 | return -EFAULT; | 530 | return -EFAULT; |
533 | } | 531 | } |
534 | return ip_options_get_finish(net, optp, opt, optlen); | 532 | return ip_options_get_finish(net, optp, opt, optlen); |
535 | } | 533 | } |
536 | 534 | ||
537 | int ip_options_get(struct net *net, struct ip_options **optp, | 535 | int ip_options_get(struct net *net, struct ip_options_rcu **optp, |
538 | unsigned char *data, int optlen) | 536 | unsigned char *data, int optlen) |
539 | { | 537 | { |
540 | struct ip_options *opt = ip_options_get_alloc(optlen); | 538 | struct ip_options_rcu *opt = ip_options_get_alloc(optlen); |
541 | 539 | ||
542 | if (!opt) | 540 | if (!opt) |
543 | return -ENOMEM; | 541 | return -ENOMEM; |
544 | if (optlen) | 542 | if (optlen) |
545 | memcpy(opt->__data, data, optlen); | 543 | memcpy(opt->opt.__data, data, optlen); |
546 | return ip_options_get_finish(net, optp, opt, optlen); | 544 | return ip_options_get_finish(net, optp, opt, optlen); |
547 | } | 545 | } |
548 | 546 | ||
@@ -555,7 +553,7 @@ void ip_forward_options(struct sk_buff *skb) | |||
555 | 553 | ||
556 | if (opt->rr_needaddr) { | 554 | if (opt->rr_needaddr) { |
557 | optptr = (unsigned char *)raw + opt->rr; | 555 | optptr = (unsigned char *)raw + opt->rr; |
558 | ip_rt_get_source(&optptr[optptr[2]-5], rt); | 556 | ip_rt_get_source(&optptr[optptr[2]-5], skb, rt); |
559 | opt->is_changed = 1; | 557 | opt->is_changed = 1; |
560 | } | 558 | } |
561 | if (opt->srr_is_hit) { | 559 | if (opt->srr_is_hit) { |
@@ -569,19 +567,18 @@ void ip_forward_options(struct sk_buff *skb) | |||
569 | ) { | 567 | ) { |
570 | if (srrptr + 3 > srrspace) | 568 | if (srrptr + 3 > srrspace) |
571 | break; | 569 | break; |
572 | if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) | 570 | if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) |
573 | break; | 571 | break; |
574 | } | 572 | } |
575 | if (srrptr + 3 <= srrspace) { | 573 | if (srrptr + 3 <= srrspace) { |
576 | opt->is_changed = 1; | 574 | opt->is_changed = 1; |
577 | ip_rt_get_source(&optptr[srrptr-1], rt); | 575 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); |
578 | ip_hdr(skb)->daddr = rt->rt_dst; | ||
579 | optptr[2] = srrptr+4; | 576 | optptr[2] = srrptr+4; |
580 | } else if (net_ratelimit()) | 577 | } else if (net_ratelimit()) |
581 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); | 578 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); |
582 | if (opt->ts_needaddr) { | 579 | if (opt->ts_needaddr) { |
583 | optptr = raw + opt->ts; | 580 | optptr = raw + opt->ts; |
584 | ip_rt_get_source(&optptr[optptr[2]-9], rt); | 581 | ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); |
585 | opt->is_changed = 1; | 582 | opt->is_changed = 1; |
586 | } | 583 | } |
587 | } | 584 | } |
@@ -603,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
603 | unsigned long orefdst; | 600 | unsigned long orefdst; |
604 | int err; | 601 | int err; |
605 | 602 | ||
606 | if (!opt->srr || !rt) | 603 | if (!rt) |
607 | return 0; | 604 | return 0; |
608 | 605 | ||
609 | if (skb->pkt_type != PACKET_HOST) | 606 | if (skb->pkt_type != PACKET_HOST) |
@@ -637,7 +634,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
637 | if (rt2->rt_type != RTN_LOCAL) | 634 | if (rt2->rt_type != RTN_LOCAL) |
638 | break; | 635 | break; |
639 | /* Superfast 8) loopback forward */ | 636 | /* Superfast 8) loopback forward */ |
640 | memcpy(&iph->daddr, &optptr[srrptr-1], 4); | 637 | iph->daddr = nexthop; |
641 | opt->is_changed = 1; | 638 | opt->is_changed = 1; |
642 | } | 639 | } |
643 | if (srrptr <= srrspace) { | 640 | if (srrptr <= srrspace) { |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 459c011b1d4a..98af3697c718 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | |||
140 | * | 140 | * |
141 | */ | 141 | */ |
142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | 142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, |
143 | __be32 saddr, __be32 daddr, struct ip_options *opt) | 143 | __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) |
144 | { | 144 | { |
145 | struct inet_sock *inet = inet_sk(sk); | 145 | struct inet_sock *inet = inet_sk(sk); |
146 | struct rtable *rt = skb_rtable(skb); | 146 | struct rtable *rt = skb_rtable(skb); |
147 | struct iphdr *iph; | 147 | struct iphdr *iph; |
148 | 148 | ||
149 | /* Build the IP header. */ | 149 | /* Build the IP header. */ |
150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); |
151 | skb_reset_network_header(skb); | 151 | skb_reset_network_header(skb); |
152 | iph = ip_hdr(skb); | 152 | iph = ip_hdr(skb); |
153 | iph->version = 4; | 153 | iph->version = 4; |
@@ -158,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
158 | else | 158 | else |
159 | iph->frag_off = 0; | 159 | iph->frag_off = 0; |
160 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 160 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
161 | iph->daddr = rt->rt_dst; | 161 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); |
162 | iph->saddr = rt->rt_src; | 162 | iph->saddr = saddr; |
163 | iph->protocol = sk->sk_protocol; | 163 | iph->protocol = sk->sk_protocol; |
164 | ip_select_ident(iph, &rt->dst, sk); | 164 | ip_select_ident(iph, &rt->dst, sk); |
165 | 165 | ||
166 | if (opt && opt->optlen) { | 166 | if (opt && opt->opt.optlen) { |
167 | iph->ihl += opt->optlen>>2; | 167 | iph->ihl += opt->opt.optlen>>2; |
168 | ip_options_build(skb, opt, daddr, rt, 0); | 168 | ip_options_build(skb, &opt->opt, daddr, rt, 0); |
169 | } | 169 | } |
170 | 170 | ||
171 | skb->priority = sk->sk_priority; | 171 | skb->priority = sk->sk_priority; |
@@ -312,11 +312,12 @@ int ip_output(struct sk_buff *skb) | |||
312 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 312 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
313 | } | 313 | } |
314 | 314 | ||
315 | int ip_queue_xmit(struct sk_buff *skb) | 315 | int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) |
316 | { | 316 | { |
317 | struct sock *sk = skb->sk; | 317 | struct sock *sk = skb->sk; |
318 | struct inet_sock *inet = inet_sk(sk); | 318 | struct inet_sock *inet = inet_sk(sk); |
319 | struct ip_options *opt = inet->opt; | 319 | struct ip_options_rcu *inet_opt; |
320 | struct flowi4 *fl4; | ||
320 | struct rtable *rt; | 321 | struct rtable *rt; |
321 | struct iphdr *iph; | 322 | struct iphdr *iph; |
322 | int res; | 323 | int res; |
@@ -325,6 +326,8 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
325 | * f.e. by something like SCTP. | 326 | * f.e. by something like SCTP. |
326 | */ | 327 | */ |
327 | rcu_read_lock(); | 328 | rcu_read_lock(); |
329 | inet_opt = rcu_dereference(inet->inet_opt); | ||
330 | fl4 = &fl->u.ip4; | ||
328 | rt = skb_rtable(skb); | 331 | rt = skb_rtable(skb); |
329 | if (rt != NULL) | 332 | if (rt != NULL) |
330 | goto packet_routed; | 333 | goto packet_routed; |
@@ -336,14 +339,14 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
336 | 339 | ||
337 | /* Use correct destination address if we have options. */ | 340 | /* Use correct destination address if we have options. */ |
338 | daddr = inet->inet_daddr; | 341 | daddr = inet->inet_daddr; |
339 | if(opt && opt->srr) | 342 | if (inet_opt && inet_opt->opt.srr) |
340 | daddr = opt->faddr; | 343 | daddr = inet_opt->opt.faddr; |
341 | 344 | ||
342 | /* If this fails, retransmit mechanism of transport layer will | 345 | /* If this fails, retransmit mechanism of transport layer will |
343 | * keep trying until route appears or the connection times | 346 | * keep trying until route appears or the connection times |
344 | * itself out. | 347 | * itself out. |
345 | */ | 348 | */ |
346 | rt = ip_route_output_ports(sock_net(sk), sk, | 349 | rt = ip_route_output_ports(sock_net(sk), fl4, sk, |
347 | daddr, inet->inet_saddr, | 350 | daddr, inet->inet_saddr, |
348 | inet->inet_dport, | 351 | inet->inet_dport, |
349 | inet->inet_sport, | 352 | inet->inet_sport, |
@@ -357,11 +360,11 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
357 | skb_dst_set_noref(skb, &rt->dst); | 360 | skb_dst_set_noref(skb, &rt->dst); |
358 | 361 | ||
359 | packet_routed: | 362 | packet_routed: |
360 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 363 | if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) |
361 | goto no_route; | 364 | goto no_route; |
362 | 365 | ||
363 | /* OK, we know where to send it, allocate and build IP header. */ | 366 | /* OK, we know where to send it, allocate and build IP header. */ |
364 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 367 | skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); |
365 | skb_reset_network_header(skb); | 368 | skb_reset_network_header(skb); |
366 | iph = ip_hdr(skb); | 369 | iph = ip_hdr(skb); |
367 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 370 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
@@ -371,13 +374,13 @@ packet_routed: | |||
371 | iph->frag_off = 0; | 374 | iph->frag_off = 0; |
372 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 375 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
373 | iph->protocol = sk->sk_protocol; | 376 | iph->protocol = sk->sk_protocol; |
374 | iph->saddr = rt->rt_src; | 377 | iph->saddr = fl4->saddr; |
375 | iph->daddr = rt->rt_dst; | 378 | iph->daddr = fl4->daddr; |
376 | /* Transport layer set skb->h.foo itself. */ | 379 | /* Transport layer set skb->h.foo itself. */ |
377 | 380 | ||
378 | if (opt && opt->optlen) { | 381 | if (inet_opt && inet_opt->opt.optlen) { |
379 | iph->ihl += opt->optlen >> 2; | 382 | iph->ihl += inet_opt->opt.optlen >> 2; |
380 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); | 383 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); |
381 | } | 384 | } |
382 | 385 | ||
383 | ip_select_ident_more(iph, &rt->dst, sk, | 386 | ip_select_ident_more(iph, &rt->dst, sk, |
@@ -773,7 +776,9 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
773 | (length - transhdrlen)); | 776 | (length - transhdrlen)); |
774 | } | 777 | } |
775 | 778 | ||
776 | static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, | 779 | static int __ip_append_data(struct sock *sk, |
780 | struct flowi4 *fl4, | ||
781 | struct sk_buff_head *queue, | ||
777 | struct inet_cork *cork, | 782 | struct inet_cork *cork, |
778 | int getfrag(void *from, char *to, int offset, | 783 | int getfrag(void *from, char *to, int offset, |
779 | int len, int odd, struct sk_buff *skb), | 784 | int len, int odd, struct sk_buff *skb), |
@@ -805,7 +810,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, | |||
805 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 810 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
806 | 811 | ||
807 | if (cork->length + length > 0xFFFF - fragheaderlen) { | 812 | if (cork->length + length > 0xFFFF - fragheaderlen) { |
808 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 813 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
809 | mtu-exthdrlen); | 814 | mtu-exthdrlen); |
810 | return -EMSGSIZE; | 815 | return -EMSGSIZE; |
811 | } | 816 | } |
@@ -1033,7 +1038,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1033 | struct ipcm_cookie *ipc, struct rtable **rtp) | 1038 | struct ipcm_cookie *ipc, struct rtable **rtp) |
1034 | { | 1039 | { |
1035 | struct inet_sock *inet = inet_sk(sk); | 1040 | struct inet_sock *inet = inet_sk(sk); |
1036 | struct ip_options *opt; | 1041 | struct ip_options_rcu *opt; |
1037 | struct rtable *rt; | 1042 | struct rtable *rt; |
1038 | 1043 | ||
1039 | /* | 1044 | /* |
@@ -1047,7 +1052,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1047 | if (unlikely(cork->opt == NULL)) | 1052 | if (unlikely(cork->opt == NULL)) |
1048 | return -ENOBUFS; | 1053 | return -ENOBUFS; |
1049 | } | 1054 | } |
1050 | memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); | 1055 | memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); |
1051 | cork->flags |= IPCORK_OPT; | 1056 | cork->flags |= IPCORK_OPT; |
1052 | cork->addr = ipc->addr; | 1057 | cork->addr = ipc->addr; |
1053 | } | 1058 | } |
@@ -1080,7 +1085,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1080 | * | 1085 | * |
1081 | * LATER: length must be adjusted by pad at tail, when it is required. | 1086 | * LATER: length must be adjusted by pad at tail, when it is required. |
1082 | */ | 1087 | */ |
1083 | int ip_append_data(struct sock *sk, | 1088 | int ip_append_data(struct sock *sk, struct flowi4 *fl4, |
1084 | int getfrag(void *from, char *to, int offset, int len, | 1089 | int getfrag(void *from, char *to, int offset, int len, |
1085 | int odd, struct sk_buff *skb), | 1090 | int odd, struct sk_buff *skb), |
1086 | void *from, int length, int transhdrlen, | 1091 | void *from, int length, int transhdrlen, |
@@ -1094,24 +1099,25 @@ int ip_append_data(struct sock *sk, | |||
1094 | return 0; | 1099 | return 0; |
1095 | 1100 | ||
1096 | if (skb_queue_empty(&sk->sk_write_queue)) { | 1101 | if (skb_queue_empty(&sk->sk_write_queue)) { |
1097 | err = ip_setup_cork(sk, &inet->cork, ipc, rtp); | 1102 | err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp); |
1098 | if (err) | 1103 | if (err) |
1099 | return err; | 1104 | return err; |
1100 | } else { | 1105 | } else { |
1101 | transhdrlen = 0; | 1106 | transhdrlen = 0; |
1102 | } | 1107 | } |
1103 | 1108 | ||
1104 | return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, | 1109 | return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, |
1105 | from, length, transhdrlen, flags); | 1110 | from, length, transhdrlen, flags); |
1106 | } | 1111 | } |
1107 | 1112 | ||
1108 | ssize_t ip_append_page(struct sock *sk, struct page *page, | 1113 | ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, |
1109 | int offset, size_t size, int flags) | 1114 | int offset, size_t size, int flags) |
1110 | { | 1115 | { |
1111 | struct inet_sock *inet = inet_sk(sk); | 1116 | struct inet_sock *inet = inet_sk(sk); |
1112 | struct sk_buff *skb; | 1117 | struct sk_buff *skb; |
1113 | struct rtable *rt; | 1118 | struct rtable *rt; |
1114 | struct ip_options *opt = NULL; | 1119 | struct ip_options *opt = NULL; |
1120 | struct inet_cork *cork; | ||
1115 | int hh_len; | 1121 | int hh_len; |
1116 | int mtu; | 1122 | int mtu; |
1117 | int len; | 1123 | int len; |
@@ -1127,28 +1133,29 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1127 | if (skb_queue_empty(&sk->sk_write_queue)) | 1133 | if (skb_queue_empty(&sk->sk_write_queue)) |
1128 | return -EINVAL; | 1134 | return -EINVAL; |
1129 | 1135 | ||
1130 | rt = (struct rtable *)inet->cork.dst; | 1136 | cork = &inet->cork.base; |
1131 | if (inet->cork.flags & IPCORK_OPT) | 1137 | rt = (struct rtable *)cork->dst; |
1132 | opt = inet->cork.opt; | 1138 | if (cork->flags & IPCORK_OPT) |
1139 | opt = cork->opt; | ||
1133 | 1140 | ||
1134 | if (!(rt->dst.dev->features&NETIF_F_SG)) | 1141 | if (!(rt->dst.dev->features&NETIF_F_SG)) |
1135 | return -EOPNOTSUPP; | 1142 | return -EOPNOTSUPP; |
1136 | 1143 | ||
1137 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 1144 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
1138 | mtu = inet->cork.fragsize; | 1145 | mtu = cork->fragsize; |
1139 | 1146 | ||
1140 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 1147 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
1141 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 1148 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
1142 | 1149 | ||
1143 | if (inet->cork.length + size > 0xFFFF - fragheaderlen) { | 1150 | if (cork->length + size > 0xFFFF - fragheaderlen) { |
1144 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); | 1151 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); |
1145 | return -EMSGSIZE; | 1152 | return -EMSGSIZE; |
1146 | } | 1153 | } |
1147 | 1154 | ||
1148 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 1155 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) |
1149 | return -EINVAL; | 1156 | return -EINVAL; |
1150 | 1157 | ||
1151 | inet->cork.length += size; | 1158 | cork->length += size; |
1152 | if ((size + skb->len > mtu) && | 1159 | if ((size + skb->len > mtu) && |
1153 | (sk->sk_protocol == IPPROTO_UDP) && | 1160 | (sk->sk_protocol == IPPROTO_UDP) && |
1154 | (rt->dst.dev->features & NETIF_F_UFO)) { | 1161 | (rt->dst.dev->features & NETIF_F_UFO)) { |
@@ -1243,7 +1250,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1243 | return 0; | 1250 | return 0; |
1244 | 1251 | ||
1245 | error: | 1252 | error: |
1246 | inet->cork.length -= size; | 1253 | cork->length -= size; |
1247 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1254 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1248 | return err; | 1255 | return err; |
1249 | } | 1256 | } |
@@ -1262,6 +1269,7 @@ static void ip_cork_release(struct inet_cork *cork) | |||
1262 | * and push them out. | 1269 | * and push them out. |
1263 | */ | 1270 | */ |
1264 | struct sk_buff *__ip_make_skb(struct sock *sk, | 1271 | struct sk_buff *__ip_make_skb(struct sock *sk, |
1272 | struct flowi4 *fl4, | ||
1265 | struct sk_buff_head *queue, | 1273 | struct sk_buff_head *queue, |
1266 | struct inet_cork *cork) | 1274 | struct inet_cork *cork) |
1267 | { | 1275 | { |
@@ -1319,17 +1327,18 @@ struct sk_buff *__ip_make_skb(struct sock *sk, | |||
1319 | iph = (struct iphdr *)skb->data; | 1327 | iph = (struct iphdr *)skb->data; |
1320 | iph->version = 4; | 1328 | iph->version = 4; |
1321 | iph->ihl = 5; | 1329 | iph->ihl = 5; |
1322 | if (opt) { | ||
1323 | iph->ihl += opt->optlen>>2; | ||
1324 | ip_options_build(skb, opt, cork->addr, rt, 0); | ||
1325 | } | ||
1326 | iph->tos = inet->tos; | 1330 | iph->tos = inet->tos; |
1327 | iph->frag_off = df; | 1331 | iph->frag_off = df; |
1328 | ip_select_ident(iph, &rt->dst, sk); | 1332 | ip_select_ident(iph, &rt->dst, sk); |
1329 | iph->ttl = ttl; | 1333 | iph->ttl = ttl; |
1330 | iph->protocol = sk->sk_protocol; | 1334 | iph->protocol = sk->sk_protocol; |
1331 | iph->saddr = rt->rt_src; | 1335 | iph->saddr = fl4->saddr; |
1332 | iph->daddr = rt->rt_dst; | 1336 | iph->daddr = fl4->daddr; |
1337 | |||
1338 | if (opt) { | ||
1339 | iph->ihl += opt->optlen>>2; | ||
1340 | ip_options_build(skb, opt, cork->addr, rt, 0); | ||
1341 | } | ||
1333 | 1342 | ||
1334 | skb->priority = sk->sk_priority; | 1343 | skb->priority = sk->sk_priority; |
1335 | skb->mark = sk->sk_mark; | 1344 | skb->mark = sk->sk_mark; |
@@ -1365,11 +1374,11 @@ int ip_send_skb(struct sk_buff *skb) | |||
1365 | return err; | 1374 | return err; |
1366 | } | 1375 | } |
1367 | 1376 | ||
1368 | int ip_push_pending_frames(struct sock *sk) | 1377 | int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) |
1369 | { | 1378 | { |
1370 | struct sk_buff *skb; | 1379 | struct sk_buff *skb; |
1371 | 1380 | ||
1372 | skb = ip_finish_skb(sk); | 1381 | skb = ip_finish_skb(sk, fl4); |
1373 | if (!skb) | 1382 | if (!skb) |
1374 | return 0; | 1383 | return 0; |
1375 | 1384 | ||
@@ -1394,17 +1403,18 @@ static void __ip_flush_pending_frames(struct sock *sk, | |||
1394 | 1403 | ||
1395 | void ip_flush_pending_frames(struct sock *sk) | 1404 | void ip_flush_pending_frames(struct sock *sk) |
1396 | { | 1405 | { |
1397 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); | 1406 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); |
1398 | } | 1407 | } |
1399 | 1408 | ||
1400 | struct sk_buff *ip_make_skb(struct sock *sk, | 1409 | struct sk_buff *ip_make_skb(struct sock *sk, |
1410 | struct flowi4 *fl4, | ||
1401 | int getfrag(void *from, char *to, int offset, | 1411 | int getfrag(void *from, char *to, int offset, |
1402 | int len, int odd, struct sk_buff *skb), | 1412 | int len, int odd, struct sk_buff *skb), |
1403 | void *from, int length, int transhdrlen, | 1413 | void *from, int length, int transhdrlen, |
1404 | struct ipcm_cookie *ipc, struct rtable **rtp, | 1414 | struct ipcm_cookie *ipc, struct rtable **rtp, |
1405 | unsigned int flags) | 1415 | unsigned int flags) |
1406 | { | 1416 | { |
1407 | struct inet_cork cork = {}; | 1417 | struct inet_cork cork; |
1408 | struct sk_buff_head queue; | 1418 | struct sk_buff_head queue; |
1409 | int err; | 1419 | int err; |
1410 | 1420 | ||
@@ -1413,18 +1423,21 @@ struct sk_buff *ip_make_skb(struct sock *sk, | |||
1413 | 1423 | ||
1414 | __skb_queue_head_init(&queue); | 1424 | __skb_queue_head_init(&queue); |
1415 | 1425 | ||
1426 | cork.flags = 0; | ||
1427 | cork.addr = 0; | ||
1428 | cork.opt = NULL; | ||
1416 | err = ip_setup_cork(sk, &cork, ipc, rtp); | 1429 | err = ip_setup_cork(sk, &cork, ipc, rtp); |
1417 | if (err) | 1430 | if (err) |
1418 | return ERR_PTR(err); | 1431 | return ERR_PTR(err); |
1419 | 1432 | ||
1420 | err = __ip_append_data(sk, &queue, &cork, getfrag, | 1433 | err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, |
1421 | from, length, transhdrlen, flags); | 1434 | from, length, transhdrlen, flags); |
1422 | if (err) { | 1435 | if (err) { |
1423 | __ip_flush_pending_frames(sk, &queue, &cork); | 1436 | __ip_flush_pending_frames(sk, &queue, &cork); |
1424 | return ERR_PTR(err); | 1437 | return ERR_PTR(err); |
1425 | } | 1438 | } |
1426 | 1439 | ||
1427 | return __ip_make_skb(sk, &queue, &cork); | 1440 | return __ip_make_skb(sk, fl4, &queue, &cork); |
1428 | } | 1441 | } |
1429 | 1442 | ||
1430 | /* | 1443 | /* |
@@ -1447,48 +1460,39 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, | |||
1447 | * Should run single threaded per socket because it uses the sock | 1460 | * Should run single threaded per socket because it uses the sock |
1448 | * structure to pass arguments. | 1461 | * structure to pass arguments. |
1449 | */ | 1462 | */ |
1450 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, | 1463 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, |
1451 | unsigned int len) | 1464 | struct ip_reply_arg *arg, unsigned int len) |
1452 | { | 1465 | { |
1453 | struct inet_sock *inet = inet_sk(sk); | 1466 | struct inet_sock *inet = inet_sk(sk); |
1454 | struct { | 1467 | struct ip_options_data replyopts; |
1455 | struct ip_options opt; | ||
1456 | char data[40]; | ||
1457 | } replyopts; | ||
1458 | struct ipcm_cookie ipc; | 1468 | struct ipcm_cookie ipc; |
1459 | __be32 daddr; | 1469 | struct flowi4 fl4; |
1460 | struct rtable *rt = skb_rtable(skb); | 1470 | struct rtable *rt = skb_rtable(skb); |
1461 | 1471 | ||
1462 | if (ip_options_echo(&replyopts.opt, skb)) | 1472 | if (ip_options_echo(&replyopts.opt.opt, skb)) |
1463 | return; | 1473 | return; |
1464 | 1474 | ||
1465 | daddr = ipc.addr = rt->rt_src; | 1475 | ipc.addr = daddr; |
1466 | ipc.opt = NULL; | 1476 | ipc.opt = NULL; |
1467 | ipc.tx_flags = 0; | 1477 | ipc.tx_flags = 0; |
1468 | 1478 | ||
1469 | if (replyopts.opt.optlen) { | 1479 | if (replyopts.opt.opt.optlen) { |
1470 | ipc.opt = &replyopts.opt; | 1480 | ipc.opt = &replyopts.opt; |
1471 | 1481 | ||
1472 | if (ipc.opt->srr) | 1482 | if (replyopts.opt.opt.srr) |
1473 | daddr = replyopts.opt.faddr; | 1483 | daddr = replyopts.opt.opt.faddr; |
1474 | } | 1484 | } |
1475 | 1485 | ||
1476 | { | 1486 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, |
1477 | struct flowi4 fl4 = { | 1487 | RT_TOS(ip_hdr(skb)->tos), |
1478 | .flowi4_oif = arg->bound_dev_if, | 1488 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
1479 | .daddr = daddr, | 1489 | ip_reply_arg_flowi_flags(arg), |
1480 | .saddr = rt->rt_spec_dst, | 1490 | daddr, rt->rt_spec_dst, |
1481 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), | 1491 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); |
1482 | .fl4_sport = tcp_hdr(skb)->dest, | 1492 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
1483 | .fl4_dport = tcp_hdr(skb)->source, | 1493 | rt = ip_route_output_key(sock_net(sk), &fl4); |
1484 | .flowi4_proto = sk->sk_protocol, | 1494 | if (IS_ERR(rt)) |
1485 | .flowi4_flags = ip_reply_arg_flowi_flags(arg), | 1495 | return; |
1486 | }; | ||
1487 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||
1488 | rt = ip_route_output_key(sock_net(sk), &fl4); | ||
1489 | if (IS_ERR(rt)) | ||
1490 | return; | ||
1491 | } | ||
1492 | 1496 | ||
1493 | /* And let IP do all the hard work. | 1497 | /* And let IP do all the hard work. |
1494 | 1498 | ||
@@ -1501,7 +1505,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1501 | sk->sk_priority = skb->priority; | 1505 | sk->sk_priority = skb->priority; |
1502 | sk->sk_protocol = ip_hdr(skb)->protocol; | 1506 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1503 | sk->sk_bound_dev_if = arg->bound_dev_if; | 1507 | sk->sk_bound_dev_if = arg->bound_dev_if; |
1504 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1508 | ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
1505 | &ipc, &rt, MSG_DONTWAIT); | 1509 | &ipc, &rt, MSG_DONTWAIT); |
1506 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1510 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
1507 | if (arg->csumoffset >= 0) | 1511 | if (arg->csumoffset >= 0) |
@@ -1509,7 +1513,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1509 | arg->csumoffset) = csum_fold(csum_add(skb->csum, | 1513 | arg->csumoffset) = csum_fold(csum_add(skb->csum, |
1510 | arg->csum)); | 1514 | arg->csum)); |
1511 | skb->ip_summed = CHECKSUM_NONE; | 1515 | skb->ip_summed = CHECKSUM_NONE; |
1512 | ip_push_pending_frames(sk); | 1516 | ip_push_pending_frames(sk, &fl4); |
1513 | } | 1517 | } |
1514 | 1518 | ||
1515 | bh_unlock_sock(sk); | 1519 | bh_unlock_sock(sk); |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3948c86e59ca..ab0c9efd1efa 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -131,7 +131,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) | |||
131 | static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) | 131 | static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) |
132 | { | 132 | { |
133 | struct sockaddr_in sin; | 133 | struct sockaddr_in sin; |
134 | struct iphdr *iph = ip_hdr(skb); | 134 | const struct iphdr *iph = ip_hdr(skb); |
135 | __be16 *ports = (__be16 *)skb_transport_header(skb); | 135 | __be16 *ports = (__be16 *)skb_transport_header(skb); |
136 | 136 | ||
137 | if (skb_transport_offset(skb) + 4 > skb->len) | 137 | if (skb_transport_offset(skb) + 4 > skb->len) |
@@ -451,6 +451,11 @@ out: | |||
451 | } | 451 | } |
452 | 452 | ||
453 | 453 | ||
454 | static void opt_kfree_rcu(struct rcu_head *head) | ||
455 | { | ||
456 | kfree(container_of(head, struct ip_options_rcu, rcu)); | ||
457 | } | ||
458 | |||
454 | /* | 459 | /* |
455 | * Socket option code for IP. This is the end of the line after any | 460 | * Socket option code for IP. This is the end of the line after any |
456 | * TCP,UDP etc options on an IP socket. | 461 | * TCP,UDP etc options on an IP socket. |
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
497 | switch (optname) { | 502 | switch (optname) { |
498 | case IP_OPTIONS: | 503 | case IP_OPTIONS: |
499 | { | 504 | { |
500 | struct ip_options *opt = NULL; | 505 | struct ip_options_rcu *old, *opt = NULL; |
506 | |||
501 | if (optlen > 40) | 507 | if (optlen > 40) |
502 | goto e_inval; | 508 | goto e_inval; |
503 | err = ip_options_get_from_user(sock_net(sk), &opt, | 509 | err = ip_options_get_from_user(sock_net(sk), &opt, |
504 | optval, optlen); | 510 | optval, optlen); |
505 | if (err) | 511 | if (err) |
506 | break; | 512 | break; |
513 | old = rcu_dereference_protected(inet->inet_opt, | ||
514 | sock_owned_by_user(sk)); | ||
507 | if (inet->is_icsk) { | 515 | if (inet->is_icsk) { |
508 | struct inet_connection_sock *icsk = inet_csk(sk); | 516 | struct inet_connection_sock *icsk = inet_csk(sk); |
509 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 517 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
512 | (TCPF_LISTEN | TCPF_CLOSE)) && | 520 | (TCPF_LISTEN | TCPF_CLOSE)) && |
513 | inet->inet_daddr != LOOPBACK4_IPV6)) { | 521 | inet->inet_daddr != LOOPBACK4_IPV6)) { |
514 | #endif | 522 | #endif |
515 | if (inet->opt) | 523 | if (old) |
516 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; | 524 | icsk->icsk_ext_hdr_len -= old->opt.optlen; |
517 | if (opt) | 525 | if (opt) |
518 | icsk->icsk_ext_hdr_len += opt->optlen; | 526 | icsk->icsk_ext_hdr_len += opt->opt.optlen; |
519 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); | 527 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); |
520 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 528 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
521 | } | 529 | } |
522 | #endif | 530 | #endif |
523 | } | 531 | } |
524 | opt = xchg(&inet->opt, opt); | 532 | rcu_assign_pointer(inet->inet_opt, opt); |
525 | kfree(opt); | 533 | if (old) |
534 | call_rcu(&old->rcu, opt_kfree_rcu); | ||
526 | break; | 535 | break; |
527 | } | 536 | } |
528 | case IP_PKTINFO: | 537 | case IP_PKTINFO: |
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1081 | case IP_OPTIONS: | 1090 | case IP_OPTIONS: |
1082 | { | 1091 | { |
1083 | unsigned char optbuf[sizeof(struct ip_options)+40]; | 1092 | unsigned char optbuf[sizeof(struct ip_options)+40]; |
1084 | struct ip_options * opt = (struct ip_options *)optbuf; | 1093 | struct ip_options *opt = (struct ip_options *)optbuf; |
1094 | struct ip_options_rcu *inet_opt; | ||
1095 | |||
1096 | inet_opt = rcu_dereference_protected(inet->inet_opt, | ||
1097 | sock_owned_by_user(sk)); | ||
1085 | opt->optlen = 0; | 1098 | opt->optlen = 0; |
1086 | if (inet->opt) | 1099 | if (inet_opt) |
1087 | memcpy(optbuf, inet->opt, | 1100 | memcpy(optbuf, &inet_opt->opt, |
1088 | sizeof(struct ip_options)+ | 1101 | sizeof(struct ip_options) + |
1089 | inet->opt->optlen); | 1102 | inet_opt->opt.optlen); |
1090 | release_sock(sk); | 1103 | release_sock(sk); |
1091 | 1104 | ||
1092 | if (opt->optlen == 0) | 1105 | if (opt->optlen == 0) |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 629067571f02..c857f6f49b03 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -27,7 +27,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
27 | { | 27 | { |
28 | struct net *net = dev_net(skb->dev); | 28 | struct net *net = dev_net(skb->dev); |
29 | __be32 spi; | 29 | __be32 spi; |
30 | struct iphdr *iph = (struct iphdr *)skb->data; | 30 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
31 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); | 31 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); |
32 | struct xfrm_state *x; | 32 | struct xfrm_state *x; |
33 | 33 | ||
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
36 | return; | 36 | return; |
37 | 37 | ||
38 | spi = htonl(ntohs(ipch->cpi)); | 38 | spi = htonl(ntohs(ipch->cpi)); |
39 | x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, | 39 | x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, |
40 | spi, IPPROTO_COMP, AF_INET); | 40 | spi, IPPROTO_COMP, AF_INET); |
41 | if (!x) | 41 | if (!x) |
42 | return; | 42 | return; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index cbff2ecccf3d..ab7e5542c1cf 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -87,8 +87,8 @@ | |||
87 | #endif | 87 | #endif |
88 | 88 | ||
89 | /* Define the friendly delay before and after opening net devices */ | 89 | /* Define the friendly delay before and after opening net devices */ |
90 | #define CONF_PRE_OPEN 500 /* Before opening: 1/2 second */ | 90 | #define CONF_POST_OPEN 10 /* After opening: 10 msecs */ |
91 | #define CONF_POST_OPEN 1 /* After opening: 1 second */ | 91 | #define CONF_CARRIER_TIMEOUT 120000 /* Wait for carrier timeout */ |
92 | 92 | ||
93 | /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ | 93 | /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ |
94 | #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ | 94 | #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ |
@@ -188,14 +188,14 @@ struct ic_device { | |||
188 | static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ | 188 | static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ |
189 | static struct net_device *ic_dev __initdata = NULL; /* Selected device */ | 189 | static struct net_device *ic_dev __initdata = NULL; /* Selected device */ |
190 | 190 | ||
191 | static bool __init ic_device_match(struct net_device *dev) | 191 | static bool __init ic_is_init_dev(struct net_device *dev) |
192 | { | 192 | { |
193 | if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : | 193 | if (dev->flags & IFF_LOOPBACK) |
194 | return false; | ||
195 | return user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : | ||
194 | (!(dev->flags & IFF_LOOPBACK) && | 196 | (!(dev->flags & IFF_LOOPBACK) && |
195 | (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && | 197 | (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && |
196 | strncmp(dev->name, "dummy", 5))) | 198 | strncmp(dev->name, "dummy", 5)); |
197 | return true; | ||
198 | return false; | ||
199 | } | 199 | } |
200 | 200 | ||
201 | static int __init ic_open_devs(void) | 201 | static int __init ic_open_devs(void) |
@@ -203,6 +203,7 @@ static int __init ic_open_devs(void) | |||
203 | struct ic_device *d, **last; | 203 | struct ic_device *d, **last; |
204 | struct net_device *dev; | 204 | struct net_device *dev; |
205 | unsigned short oflags; | 205 | unsigned short oflags; |
206 | unsigned long start; | ||
206 | 207 | ||
207 | last = &ic_first_dev; | 208 | last = &ic_first_dev; |
208 | rtnl_lock(); | 209 | rtnl_lock(); |
@@ -216,9 +217,7 @@ static int __init ic_open_devs(void) | |||
216 | } | 217 | } |
217 | 218 | ||
218 | for_each_netdev(&init_net, dev) { | 219 | for_each_netdev(&init_net, dev) { |
219 | if (dev->flags & IFF_LOOPBACK) | 220 | if (ic_is_init_dev(dev)) { |
220 | continue; | ||
221 | if (ic_device_match(dev)) { | ||
222 | int able = 0; | 221 | int able = 0; |
223 | if (dev->mtu >= 364) | 222 | if (dev->mtu >= 364) |
224 | able |= IC_BOOTP; | 223 | able |= IC_BOOTP; |
@@ -252,6 +251,17 @@ static int __init ic_open_devs(void) | |||
252 | dev->name, able, d->xid)); | 251 | dev->name, able, d->xid)); |
253 | } | 252 | } |
254 | } | 253 | } |
254 | |||
255 | /* wait for a carrier on at least one device */ | ||
256 | start = jiffies; | ||
257 | while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { | ||
258 | for_each_netdev(&init_net, dev) | ||
259 | if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) | ||
260 | goto have_carrier; | ||
261 | |||
262 | msleep(1); | ||
263 | } | ||
264 | have_carrier: | ||
255 | rtnl_unlock(); | 265 | rtnl_unlock(); |
256 | 266 | ||
257 | *last = NULL; | 267 | *last = NULL; |
@@ -1324,14 +1334,13 @@ static int __init wait_for_devices(void) | |||
1324 | { | 1334 | { |
1325 | int i; | 1335 | int i; |
1326 | 1336 | ||
1327 | msleep(CONF_PRE_OPEN); | ||
1328 | for (i = 0; i < DEVICE_WAIT_MAX; i++) { | 1337 | for (i = 0; i < DEVICE_WAIT_MAX; i++) { |
1329 | struct net_device *dev; | 1338 | struct net_device *dev; |
1330 | int found = 0; | 1339 | int found = 0; |
1331 | 1340 | ||
1332 | rtnl_lock(); | 1341 | rtnl_lock(); |
1333 | for_each_netdev(&init_net, dev) { | 1342 | for_each_netdev(&init_net, dev) { |
1334 | if (ic_device_match(dev)) { | 1343 | if (ic_is_init_dev(dev)) { |
1335 | found = 1; | 1344 | found = 1; |
1336 | break; | 1345 | break; |
1337 | } | 1346 | } |
@@ -1378,7 +1387,7 @@ static int __init ip_auto_config(void) | |||
1378 | return err; | 1387 | return err; |
1379 | 1388 | ||
1380 | /* Give drivers a chance to settle */ | 1389 | /* Give drivers a chance to settle */ |
1381 | ssleep(CONF_POST_OPEN); | 1390 | msleep(CONF_POST_OPEN); |
1382 | 1391 | ||
1383 | /* | 1392 | /* |
1384 | * If the config information is insufficient (e.g., our IP address or | 1393 | * If the config information is insufficient (e.g., our IP address or |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index bfc17c5914e7..378b20b7ca6e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -276,11 +276,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | |||
276 | 276 | ||
277 | dev_net_set(dev, net); | 277 | dev_net_set(dev, net); |
278 | 278 | ||
279 | if (strchr(name, '%')) { | ||
280 | if (dev_alloc_name(dev, name) < 0) | ||
281 | goto failed_free; | ||
282 | } | ||
283 | |||
284 | nt = netdev_priv(dev); | 279 | nt = netdev_priv(dev); |
285 | nt->parms = *parms; | 280 | nt->parms = *parms; |
286 | 281 | ||
@@ -319,7 +314,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
319 | 8 bytes of packet payload. It means, that precise relaying of | 314 | 8 bytes of packet payload. It means, that precise relaying of |
320 | ICMP in the real Internet is absolutely infeasible. | 315 | ICMP in the real Internet is absolutely infeasible. |
321 | */ | 316 | */ |
322 | struct iphdr *iph = (struct iphdr *)skb->data; | 317 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
323 | const int type = icmp_hdr(skb)->type; | 318 | const int type = icmp_hdr(skb)->type; |
324 | const int code = icmp_hdr(skb)->code; | 319 | const int code = icmp_hdr(skb)->code; |
325 | struct ip_tunnel *t; | 320 | struct ip_tunnel *t; |
@@ -433,15 +428,16 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
433 | { | 428 | { |
434 | struct ip_tunnel *tunnel = netdev_priv(dev); | 429 | struct ip_tunnel *tunnel = netdev_priv(dev); |
435 | struct pcpu_tstats *tstats; | 430 | struct pcpu_tstats *tstats; |
436 | struct iphdr *tiph = &tunnel->parms.iph; | 431 | const struct iphdr *tiph = &tunnel->parms.iph; |
437 | u8 tos = tunnel->parms.iph.tos; | 432 | u8 tos = tunnel->parms.iph.tos; |
438 | __be16 df = tiph->frag_off; | 433 | __be16 df = tiph->frag_off; |
439 | struct rtable *rt; /* Route to the other host */ | 434 | struct rtable *rt; /* Route to the other host */ |
440 | struct net_device *tdev; /* Device to other host */ | 435 | struct net_device *tdev; /* Device to other host */ |
441 | struct iphdr *old_iph = ip_hdr(skb); | 436 | const struct iphdr *old_iph = ip_hdr(skb); |
442 | struct iphdr *iph; /* Our new IP header */ | 437 | struct iphdr *iph; /* Our new IP header */ |
443 | unsigned int max_headroom; /* The extra header space needed */ | 438 | unsigned int max_headroom; /* The extra header space needed */ |
444 | __be32 dst = tiph->daddr; | 439 | __be32 dst = tiph->daddr; |
440 | struct flowi4 fl4; | ||
445 | int mtu; | 441 | int mtu; |
446 | 442 | ||
447 | if (skb->protocol != htons(ETH_P_IP)) | 443 | if (skb->protocol != htons(ETH_P_IP)) |
@@ -460,7 +456,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
460 | goto tx_error_icmp; | 456 | goto tx_error_icmp; |
461 | } | 457 | } |
462 | 458 | ||
463 | rt = ip_route_output_ports(dev_net(dev), NULL, | 459 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, |
464 | dst, tiph->saddr, | 460 | dst, tiph->saddr, |
465 | 0, 0, | 461 | 0, 0, |
466 | IPPROTO_IPIP, RT_TOS(tos), | 462 | IPPROTO_IPIP, RT_TOS(tos), |
@@ -549,8 +545,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
549 | iph->frag_off = df; | 545 | iph->frag_off = df; |
550 | iph->protocol = IPPROTO_IPIP; | 546 | iph->protocol = IPPROTO_IPIP; |
551 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); | 547 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); |
552 | iph->daddr = rt->rt_dst; | 548 | iph->daddr = fl4.daddr; |
553 | iph->saddr = rt->rt_src; | 549 | iph->saddr = fl4.saddr; |
554 | 550 | ||
555 | if ((iph->ttl = tiph->ttl) == 0) | 551 | if ((iph->ttl = tiph->ttl) == 0) |
556 | iph->ttl = old_iph->ttl; | 552 | iph->ttl = old_iph->ttl; |
@@ -572,19 +568,21 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
572 | { | 568 | { |
573 | struct net_device *tdev = NULL; | 569 | struct net_device *tdev = NULL; |
574 | struct ip_tunnel *tunnel; | 570 | struct ip_tunnel *tunnel; |
575 | struct iphdr *iph; | 571 | const struct iphdr *iph; |
576 | 572 | ||
577 | tunnel = netdev_priv(dev); | 573 | tunnel = netdev_priv(dev); |
578 | iph = &tunnel->parms.iph; | 574 | iph = &tunnel->parms.iph; |
579 | 575 | ||
580 | if (iph->daddr) { | 576 | if (iph->daddr) { |
581 | struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, | 577 | struct rtable *rt; |
582 | iph->daddr, iph->saddr, | 578 | struct flowi4 fl4; |
583 | 0, 0, | 579 | |
584 | IPPROTO_IPIP, | 580 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, |
585 | RT_TOS(iph->tos), | 581 | iph->daddr, iph->saddr, |
586 | tunnel->parms.link); | 582 | 0, 0, |
587 | 583 | IPPROTO_IPIP, | |
584 | RT_TOS(iph->tos), | ||
585 | tunnel->parms.link); | ||
588 | if (!IS_ERR(rt)) { | 586 | if (!IS_ERR(rt)) { |
589 | tdev = rt->dst.dev; | 587 | tdev = rt->dst.dev; |
590 | ip_rt_put(rt); | 588 | ip_rt_put(rt); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1f62eaeb6de4..30a7763c400e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -1549,7 +1549,7 @@ static struct notifier_block ip_mr_notifier = { | |||
1549 | static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) | 1549 | static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) |
1550 | { | 1550 | { |
1551 | struct iphdr *iph; | 1551 | struct iphdr *iph; |
1552 | struct iphdr *old_iph = ip_hdr(skb); | 1552 | const struct iphdr *old_iph = ip_hdr(skb); |
1553 | 1553 | ||
1554 | skb_push(skb, sizeof(struct iphdr)); | 1554 | skb_push(skb, sizeof(struct iphdr)); |
1555 | skb->transport_header = skb->network_header; | 1555 | skb->transport_header = skb->network_header; |
@@ -1595,6 +1595,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1595 | struct vif_device *vif = &mrt->vif_table[vifi]; | 1595 | struct vif_device *vif = &mrt->vif_table[vifi]; |
1596 | struct net_device *dev; | 1596 | struct net_device *dev; |
1597 | struct rtable *rt; | 1597 | struct rtable *rt; |
1598 | struct flowi4 fl4; | ||
1598 | int encap = 0; | 1599 | int encap = 0; |
1599 | 1600 | ||
1600 | if (vif->dev == NULL) | 1601 | if (vif->dev == NULL) |
@@ -1612,7 +1613,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1612 | #endif | 1613 | #endif |
1613 | 1614 | ||
1614 | if (vif->flags & VIFF_TUNNEL) { | 1615 | if (vif->flags & VIFF_TUNNEL) { |
1615 | rt = ip_route_output_ports(net, NULL, | 1616 | rt = ip_route_output_ports(net, &fl4, NULL, |
1616 | vif->remote, vif->local, | 1617 | vif->remote, vif->local, |
1617 | 0, 0, | 1618 | 0, 0, |
1618 | IPPROTO_IPIP, | 1619 | IPPROTO_IPIP, |
@@ -1621,7 +1622,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1621 | goto out_free; | 1622 | goto out_free; |
1622 | encap = sizeof(struct iphdr); | 1623 | encap = sizeof(struct iphdr); |
1623 | } else { | 1624 | } else { |
1624 | rt = ip_route_output_ports(net, NULL, iph->daddr, 0, | 1625 | rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, |
1625 | 0, 0, | 1626 | 0, 0, |
1626 | IPPROTO_IPIP, | 1627 | IPPROTO_IPIP, |
1627 | RT_TOS(iph->tos), vif->link); | 1628 | RT_TOS(iph->tos), vif->link); |
@@ -1788,12 +1789,14 @@ dont_forward: | |||
1788 | return 0; | 1789 | return 0; |
1789 | } | 1790 | } |
1790 | 1791 | ||
1791 | static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) | 1792 | static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) |
1792 | { | 1793 | { |
1794 | struct rtable *rt = skb_rtable(skb); | ||
1795 | struct iphdr *iph = ip_hdr(skb); | ||
1793 | struct flowi4 fl4 = { | 1796 | struct flowi4 fl4 = { |
1794 | .daddr = rt->rt_key_dst, | 1797 | .daddr = iph->daddr, |
1795 | .saddr = rt->rt_key_src, | 1798 | .saddr = iph->saddr, |
1796 | .flowi4_tos = rt->rt_tos, | 1799 | .flowi4_tos = iph->tos, |
1797 | .flowi4_oif = rt->rt_oif, | 1800 | .flowi4_oif = rt->rt_oif, |
1798 | .flowi4_iif = rt->rt_iif, | 1801 | .flowi4_iif = rt->rt_iif, |
1799 | .flowi4_mark = rt->rt_mark, | 1802 | .flowi4_mark = rt->rt_mark, |
@@ -1825,7 +1828,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
1825 | if (IPCB(skb)->flags & IPSKB_FORWARDED) | 1828 | if (IPCB(skb)->flags & IPSKB_FORWARDED) |
1826 | goto dont_forward; | 1829 | goto dont_forward; |
1827 | 1830 | ||
1828 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); | 1831 | mrt = ipmr_rt_fib_lookup(net, skb); |
1829 | if (IS_ERR(mrt)) { | 1832 | if (IS_ERR(mrt)) { |
1830 | kfree_skb(skb); | 1833 | kfree_skb(skb); |
1831 | return PTR_ERR(mrt); | 1834 | return PTR_ERR(mrt); |
@@ -1957,7 +1960,7 @@ int pim_rcv_v1(struct sk_buff *skb) | |||
1957 | 1960 | ||
1958 | pim = igmp_hdr(skb); | 1961 | pim = igmp_hdr(skb); |
1959 | 1962 | ||
1960 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); | 1963 | mrt = ipmr_rt_fib_lookup(net, skb); |
1961 | if (IS_ERR(mrt)) | 1964 | if (IS_ERR(mrt)) |
1962 | goto drop; | 1965 | goto drop; |
1963 | if (!mrt->mroute_do_pim || | 1966 | if (!mrt->mroute_do_pim || |
@@ -1989,7 +1992,7 @@ static int pim_rcv(struct sk_buff *skb) | |||
1989 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1992 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
1990 | goto drop; | 1993 | goto drop; |
1991 | 1994 | ||
1992 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); | 1995 | mrt = ipmr_rt_fib_lookup(net, skb); |
1993 | if (IS_ERR(mrt)) | 1996 | if (IS_ERR(mrt)) |
1994 | goto drop; | 1997 | goto drop; |
1995 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { | 1998 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { |
@@ -2038,20 +2041,20 @@ rtattr_failure: | |||
2038 | return -EMSGSIZE; | 2041 | return -EMSGSIZE; |
2039 | } | 2042 | } |
2040 | 2043 | ||
2041 | int ipmr_get_route(struct net *net, | 2044 | int ipmr_get_route(struct net *net, struct sk_buff *skb, |
2042 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) | 2045 | __be32 saddr, __be32 daddr, |
2046 | struct rtmsg *rtm, int nowait) | ||
2043 | { | 2047 | { |
2044 | int err; | ||
2045 | struct mr_table *mrt; | ||
2046 | struct mfc_cache *cache; | 2048 | struct mfc_cache *cache; |
2047 | struct rtable *rt = skb_rtable(skb); | 2049 | struct mr_table *mrt; |
2050 | int err; | ||
2048 | 2051 | ||
2049 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | 2052 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); |
2050 | if (mrt == NULL) | 2053 | if (mrt == NULL) |
2051 | return -ENOENT; | 2054 | return -ENOENT; |
2052 | 2055 | ||
2053 | rcu_read_lock(); | 2056 | rcu_read_lock(); |
2054 | cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); | 2057 | cache = ipmr_cache_find(mrt, saddr, daddr); |
2055 | 2058 | ||
2056 | if (cache == NULL) { | 2059 | if (cache == NULL) { |
2057 | struct sk_buff *skb2; | 2060 | struct sk_buff *skb2; |
@@ -2084,8 +2087,8 @@ int ipmr_get_route(struct net *net, | |||
2084 | skb_reset_network_header(skb2); | 2087 | skb_reset_network_header(skb2); |
2085 | iph = ip_hdr(skb2); | 2088 | iph = ip_hdr(skb2); |
2086 | iph->ihl = sizeof(struct iphdr) >> 2; | 2089 | iph->ihl = sizeof(struct iphdr) >> 2; |
2087 | iph->saddr = rt->rt_src; | 2090 | iph->saddr = saddr; |
2088 | iph->daddr = rt->rt_dst; | 2091 | iph->daddr = daddr; |
2089 | iph->version = 0; | 2092 | iph->version = 0; |
2090 | err = ipmr_cache_unresolved(mrt, vif, skb2); | 2093 | err = ipmr_cache_unresolved(mrt, vif, skb2); |
2091 | read_unlock(&mrt_lock); | 2094 | read_unlock(&mrt_lock); |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 89bc7e66d598..fd7a3f68917f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
260 | void *table_base; | 260 | void *table_base; |
261 | const struct xt_table_info *private; | 261 | const struct xt_table_info *private; |
262 | struct xt_action_param acpar; | 262 | struct xt_action_param acpar; |
263 | unsigned int addend; | ||
263 | 264 | ||
264 | if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) | 265 | if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) |
265 | return NF_DROP; | 266 | return NF_DROP; |
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
267 | indev = in ? in->name : nulldevname; | 268 | indev = in ? in->name : nulldevname; |
268 | outdev = out ? out->name : nulldevname; | 269 | outdev = out ? out->name : nulldevname; |
269 | 270 | ||
270 | xt_info_rdlock_bh(); | 271 | local_bh_disable(); |
272 | addend = xt_write_recseq_begin(); | ||
271 | private = table->private; | 273 | private = table->private; |
272 | table_base = private->entries[smp_processor_id()]; | 274 | table_base = private->entries[smp_processor_id()]; |
273 | 275 | ||
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
338 | /* Verdict */ | 340 | /* Verdict */ |
339 | break; | 341 | break; |
340 | } while (!acpar.hotdrop); | 342 | } while (!acpar.hotdrop); |
341 | xt_info_rdunlock_bh(); | 343 | xt_write_recseq_end(addend); |
344 | local_bh_enable(); | ||
342 | 345 | ||
343 | if (acpar.hotdrop) | 346 | if (acpar.hotdrop) |
344 | return NF_DROP; | 347 | return NF_DROP; |
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t, | |||
712 | unsigned int i; | 715 | unsigned int i; |
713 | 716 | ||
714 | for_each_possible_cpu(cpu) { | 717 | for_each_possible_cpu(cpu) { |
715 | seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; | 718 | seqcount_t *s = &per_cpu(xt_recseq, cpu); |
716 | 719 | ||
717 | i = 0; | 720 | i = 0; |
718 | xt_entry_foreach(iter, t->entries[cpu], t->size) { | 721 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t, | |||
720 | unsigned int start; | 723 | unsigned int start; |
721 | 724 | ||
722 | do { | 725 | do { |
723 | start = read_seqbegin(lock); | 726 | start = read_seqcount_begin(s); |
724 | bcnt = iter->counters.bcnt; | 727 | bcnt = iter->counters.bcnt; |
725 | pcnt = iter->counters.pcnt; | 728 | pcnt = iter->counters.pcnt; |
726 | } while (read_seqretry(lock, start)); | 729 | } while (read_seqcount_retry(s, start)); |
727 | 730 | ||
728 | ADD_COUNTER(counters[i], bcnt, pcnt); | 731 | ADD_COUNTER(counters[i], bcnt, pcnt); |
729 | ++i; | 732 | ++i; |
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user, | |||
1115 | int ret = 0; | 1118 | int ret = 0; |
1116 | void *loc_cpu_entry; | 1119 | void *loc_cpu_entry; |
1117 | struct arpt_entry *iter; | 1120 | struct arpt_entry *iter; |
1121 | unsigned int addend; | ||
1118 | #ifdef CONFIG_COMPAT | 1122 | #ifdef CONFIG_COMPAT |
1119 | struct compat_xt_counters_info compat_tmp; | 1123 | struct compat_xt_counters_info compat_tmp; |
1120 | 1124 | ||
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user, | |||
1171 | /* Choose the copy that is on our node */ | 1175 | /* Choose the copy that is on our node */ |
1172 | curcpu = smp_processor_id(); | 1176 | curcpu = smp_processor_id(); |
1173 | loc_cpu_entry = private->entries[curcpu]; | 1177 | loc_cpu_entry = private->entries[curcpu]; |
1174 | xt_info_wrlock(curcpu); | 1178 | addend = xt_write_recseq_begin(); |
1175 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { | 1179 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { |
1176 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); | 1180 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); |
1177 | ++i; | 1181 | ++i; |
1178 | } | 1182 | } |
1179 | xt_info_wrunlock(curcpu); | 1183 | xt_write_recseq_end(addend); |
1180 | unlock_up_free: | 1184 | unlock_up_free: |
1181 | local_bh_enable(); | 1185 | local_bh_enable(); |
1182 | xt_table_unlock(t); | 1186 | xt_table_unlock(t); |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 704915028009..764743843503 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info) | |||
68 | } | 68 | } |
69 | EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); | 69 | EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); |
70 | 70 | ||
71 | /* | ||
72 | We keep a set of rules for each CPU, so we can avoid write-locking | ||
73 | them in the softirq when updating the counters and therefore | ||
74 | only need to read-lock in the softirq; doing a write_lock_bh() in user | ||
75 | context stops packets coming through and allows user context to read | ||
76 | the counters or update the rules. | ||
77 | |||
78 | Hence the start of any table is given by get_table() below. */ | ||
79 | |||
80 | /* Returns whether matches rule or not. */ | 71 | /* Returns whether matches rule or not. */ |
81 | /* Performance critical - called for every packet */ | 72 | /* Performance critical - called for every packet */ |
82 | static inline bool | 73 | static inline bool |
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb, | |||
311 | unsigned int *stackptr, origptr, cpu; | 302 | unsigned int *stackptr, origptr, cpu; |
312 | const struct xt_table_info *private; | 303 | const struct xt_table_info *private; |
313 | struct xt_action_param acpar; | 304 | struct xt_action_param acpar; |
305 | unsigned int addend; | ||
314 | 306 | ||
315 | /* Initialization */ | 307 | /* Initialization */ |
316 | ip = ip_hdr(skb); | 308 | ip = ip_hdr(skb); |
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb, | |||
331 | acpar.hooknum = hook; | 323 | acpar.hooknum = hook; |
332 | 324 | ||
333 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 325 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
334 | xt_info_rdlock_bh(); | 326 | local_bh_disable(); |
327 | addend = xt_write_recseq_begin(); | ||
335 | private = table->private; | 328 | private = table->private; |
336 | cpu = smp_processor_id(); | 329 | cpu = smp_processor_id(); |
337 | table_base = private->entries[cpu]; | 330 | table_base = private->entries[cpu]; |
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb, | |||
430 | pr_debug("Exiting %s; resetting sp from %u to %u\n", | 423 | pr_debug("Exiting %s; resetting sp from %u to %u\n", |
431 | __func__, *stackptr, origptr); | 424 | __func__, *stackptr, origptr); |
432 | *stackptr = origptr; | 425 | *stackptr = origptr; |
433 | xt_info_rdunlock_bh(); | 426 | xt_write_recseq_end(addend); |
427 | local_bh_enable(); | ||
428 | |||
434 | #ifdef DEBUG_ALLOW_ALL | 429 | #ifdef DEBUG_ALLOW_ALL |
435 | return NF_ACCEPT; | 430 | return NF_ACCEPT; |
436 | #else | 431 | #else |
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t, | |||
886 | unsigned int i; | 881 | unsigned int i; |
887 | 882 | ||
888 | for_each_possible_cpu(cpu) { | 883 | for_each_possible_cpu(cpu) { |
889 | seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; | 884 | seqcount_t *s = &per_cpu(xt_recseq, cpu); |
890 | 885 | ||
891 | i = 0; | 886 | i = 0; |
892 | xt_entry_foreach(iter, t->entries[cpu], t->size) { | 887 | xt_entry_foreach(iter, t->entries[cpu], t->size) { |
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t, | |||
894 | unsigned int start; | 889 | unsigned int start; |
895 | 890 | ||
896 | do { | 891 | do { |
897 | start = read_seqbegin(lock); | 892 | start = read_seqcount_begin(s); |
898 | bcnt = iter->counters.bcnt; | 893 | bcnt = iter->counters.bcnt; |
899 | pcnt = iter->counters.pcnt; | 894 | pcnt = iter->counters.pcnt; |
900 | } while (read_seqretry(lock, start)); | 895 | } while (read_seqcount_retry(s, start)); |
901 | 896 | ||
902 | ADD_COUNTER(counters[i], bcnt, pcnt); | 897 | ADD_COUNTER(counters[i], bcnt, pcnt); |
903 | ++i; /* macro does multi eval of i */ | 898 | ++i; /* macro does multi eval of i */ |
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user, | |||
1312 | int ret = 0; | 1307 | int ret = 0; |
1313 | void *loc_cpu_entry; | 1308 | void *loc_cpu_entry; |
1314 | struct ipt_entry *iter; | 1309 | struct ipt_entry *iter; |
1310 | unsigned int addend; | ||
1315 | #ifdef CONFIG_COMPAT | 1311 | #ifdef CONFIG_COMPAT |
1316 | struct compat_xt_counters_info compat_tmp; | 1312 | struct compat_xt_counters_info compat_tmp; |
1317 | 1313 | ||
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user, | |||
1368 | /* Choose the copy that is on our node */ | 1364 | /* Choose the copy that is on our node */ |
1369 | curcpu = smp_processor_id(); | 1365 | curcpu = smp_processor_id(); |
1370 | loc_cpu_entry = private->entries[curcpu]; | 1366 | loc_cpu_entry = private->entries[curcpu]; |
1371 | xt_info_wrlock(curcpu); | 1367 | addend = xt_write_recseq_begin(); |
1372 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { | 1368 | xt_entry_foreach(iter, loc_cpu_entry, private->size) { |
1373 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); | 1369 | ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); |
1374 | ++i; | 1370 | ++i; |
1375 | } | 1371 | } |
1376 | xt_info_wrunlock(curcpu); | 1372 | xt_write_recseq_end(addend); |
1377 | unlock_up_free: | 1373 | unlock_up_free: |
1378 | local_bh_enable(); | 1374 | local_bh_enable(); |
1379 | xt_table_unlock(t); | 1375 | xt_table_unlock(t); |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 31427fb57aa8..99cfa28b6d38 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -153,7 +153,7 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, | |||
153 | } | 153 | } |
154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); | 154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); |
155 | 155 | ||
156 | static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data, | 156 | static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, |
157 | int datalen, __sum16 *check, int oldlen) | 157 | int datalen, __sum16 *check, int oldlen) |
158 | { | 158 | { |
159 | struct rtable *rt = skb_rtable(skb); | 159 | struct rtable *rt = skb_rtable(skb); |
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c new file mode 100644 index 000000000000..1f3bb11490c9 --- /dev/null +++ b/net/ipv4/ping.c | |||
@@ -0,0 +1,935 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * "Ping" sockets | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * Based on ipv4/udp.c code. | ||
14 | * | ||
15 | * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6), | ||
16 | * Pavel Kankovsky (for Linux 2.4.32) | ||
17 | * | ||
18 | * Pavel gave all rights to bugs to Vasiliy, | ||
19 | * none of the bugs are Pavel's now. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <asm/system.h> | ||
24 | #include <linux/uaccess.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/fcntl.h> | ||
27 | #include <linux/socket.h> | ||
28 | #include <linux/sockios.h> | ||
29 | #include <linux/in.h> | ||
30 | #include <linux/errno.h> | ||
31 | #include <linux/timer.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/inet.h> | ||
34 | #include <linux/netdevice.h> | ||
35 | #include <net/snmp.h> | ||
36 | #include <net/ip.h> | ||
37 | #include <net/ipv6.h> | ||
38 | #include <net/icmp.h> | ||
39 | #include <net/protocol.h> | ||
40 | #include <linux/skbuff.h> | ||
41 | #include <linux/proc_fs.h> | ||
42 | #include <net/sock.h> | ||
43 | #include <net/ping.h> | ||
44 | #include <net/icmp.h> | ||
45 | #include <net/udp.h> | ||
46 | #include <net/route.h> | ||
47 | #include <net/inet_common.h> | ||
48 | #include <net/checksum.h> | ||
49 | |||
50 | |||
51 | static struct ping_table ping_table; | ||
52 | |||
53 | static u16 ping_port_rover; | ||
54 | |||
55 | static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) | ||
56 | { | ||
57 | int res = (num + net_hash_mix(net)) & mask; | ||
58 | pr_debug("hash(%d) = %d\n", num, res); | ||
59 | return res; | ||
60 | } | ||
61 | |||
62 | static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, | ||
63 | struct net *net, unsigned num) | ||
64 | { | ||
65 | return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; | ||
66 | } | ||
67 | |||
68 | static int ping_v4_get_port(struct sock *sk, unsigned short ident) | ||
69 | { | ||
70 | struct hlist_nulls_node *node; | ||
71 | struct hlist_nulls_head *hlist; | ||
72 | struct inet_sock *isk, *isk2; | ||
73 | struct sock *sk2 = NULL; | ||
74 | |||
75 | isk = inet_sk(sk); | ||
76 | write_lock_bh(&ping_table.lock); | ||
77 | if (ident == 0) { | ||
78 | u32 i; | ||
79 | u16 result = ping_port_rover + 1; | ||
80 | |||
81 | for (i = 0; i < (1L << 16); i++, result++) { | ||
82 | if (!result) | ||
83 | result++; /* avoid zero */ | ||
84 | hlist = ping_hashslot(&ping_table, sock_net(sk), | ||
85 | result); | ||
86 | ping_portaddr_for_each_entry(sk2, node, hlist) { | ||
87 | isk2 = inet_sk(sk2); | ||
88 | |||
89 | if (isk2->inet_num == result) | ||
90 | goto next_port; | ||
91 | } | ||
92 | |||
93 | /* found */ | ||
94 | ping_port_rover = ident = result; | ||
95 | break; | ||
96 | next_port: | ||
97 | ; | ||
98 | } | ||
99 | if (i >= (1L << 16)) | ||
100 | goto fail; | ||
101 | } else { | ||
102 | hlist = ping_hashslot(&ping_table, sock_net(sk), ident); | ||
103 | ping_portaddr_for_each_entry(sk2, node, hlist) { | ||
104 | isk2 = inet_sk(sk2); | ||
105 | |||
106 | if ((isk2->inet_num == ident) && | ||
107 | (sk2 != sk) && | ||
108 | (!sk2->sk_reuse || !sk->sk_reuse)) | ||
109 | goto fail; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | pr_debug("found port/ident = %d\n", ident); | ||
114 | isk->inet_num = ident; | ||
115 | if (sk_unhashed(sk)) { | ||
116 | pr_debug("was not hashed\n"); | ||
117 | sock_hold(sk); | ||
118 | hlist_nulls_add_head(&sk->sk_nulls_node, hlist); | ||
119 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | ||
120 | } | ||
121 | write_unlock_bh(&ping_table.lock); | ||
122 | return 0; | ||
123 | |||
124 | fail: | ||
125 | write_unlock_bh(&ping_table.lock); | ||
126 | return 1; | ||
127 | } | ||
128 | |||
129 | static void ping_v4_hash(struct sock *sk) | ||
130 | { | ||
131 | pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); | ||
132 | BUG(); /* "Please do not press this button again." */ | ||
133 | } | ||
134 | |||
135 | static void ping_v4_unhash(struct sock *sk) | ||
136 | { | ||
137 | struct inet_sock *isk = inet_sk(sk); | ||
138 | pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); | ||
139 | if (sk_hashed(sk)) { | ||
140 | struct hlist_nulls_head *hslot; | ||
141 | |||
142 | hslot = ping_hashslot(&ping_table, sock_net(sk), isk->inet_num); | ||
143 | write_lock_bh(&ping_table.lock); | ||
144 | hlist_nulls_del(&sk->sk_nulls_node); | ||
145 | sock_put(sk); | ||
146 | isk->inet_num = isk->inet_sport = 0; | ||
147 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
148 | write_unlock_bh(&ping_table.lock); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, | ||
153 | u16 ident, int dif) | ||
154 | { | ||
155 | struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); | ||
156 | struct sock *sk = NULL; | ||
157 | struct inet_sock *isk; | ||
158 | struct hlist_nulls_node *hnode; | ||
159 | |||
160 | pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n", | ||
161 | (int)ident, (unsigned long)daddr, dif); | ||
162 | read_lock_bh(&ping_table.lock); | ||
163 | |||
164 | ping_portaddr_for_each_entry(sk, hnode, hslot) { | ||
165 | isk = inet_sk(sk); | ||
166 | |||
167 | pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk, | ||
168 | (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr, | ||
169 | sk->sk_bound_dev_if); | ||
170 | |||
171 | pr_debug("iterate\n"); | ||
172 | if (isk->inet_num != ident) | ||
173 | continue; | ||
174 | if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) | ||
175 | continue; | ||
176 | if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) | ||
177 | continue; | ||
178 | |||
179 | sock_hold(sk); | ||
180 | goto exit; | ||
181 | } | ||
182 | |||
183 | sk = NULL; | ||
184 | exit: | ||
185 | read_unlock_bh(&ping_table.lock); | ||
186 | |||
187 | return sk; | ||
188 | } | ||
189 | |||
190 | static void inet_get_ping_group_range_net(struct net *net, gid_t *low, | ||
191 | gid_t *high) | ||
192 | { | ||
193 | gid_t *data = net->ipv4.sysctl_ping_group_range; | ||
194 | unsigned seq; | ||
195 | do { | ||
196 | seq = read_seqbegin(&sysctl_local_ports.lock); | ||
197 | |||
198 | *low = data[0]; | ||
199 | *high = data[1]; | ||
200 | } while (read_seqretry(&sysctl_local_ports.lock, seq)); | ||
201 | } | ||
202 | |||
203 | |||
204 | static int ping_init_sock(struct sock *sk) | ||
205 | { | ||
206 | struct net *net = sock_net(sk); | ||
207 | gid_t group = current_egid(); | ||
208 | gid_t range[2]; | ||
209 | struct group_info *group_info = get_current_groups(); | ||
210 | int i, j, count = group_info->ngroups; | ||
211 | |||
212 | inet_get_ping_group_range_net(net, range, range+1); | ||
213 | if (range[0] <= group && group <= range[1]) | ||
214 | return 0; | ||
215 | |||
216 | for (i = 0; i < group_info->nblocks; i++) { | ||
217 | int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); | ||
218 | |||
219 | for (j = 0; j < cp_count; j++) { | ||
220 | group = group_info->blocks[i][j]; | ||
221 | if (range[0] <= group && group <= range[1]) | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | count -= cp_count; | ||
226 | } | ||
227 | |||
228 | return -EACCES; | ||
229 | } | ||
230 | |||
231 | static void ping_close(struct sock *sk, long timeout) | ||
232 | { | ||
233 | pr_debug("ping_close(sk=%p,sk->num=%u)\n", | ||
234 | inet_sk(sk), inet_sk(sk)->inet_num); | ||
235 | pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter); | ||
236 | |||
237 | sk_common_release(sk); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * We need our own bind because there are no privileged id's == local ports. | ||
242 | * Moreover, we don't allow binding to multi- and broadcast addresses. | ||
243 | */ | ||
244 | |||
245 | static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) | ||
246 | { | ||
247 | struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; | ||
248 | struct inet_sock *isk = inet_sk(sk); | ||
249 | unsigned short snum; | ||
250 | int chk_addr_ret; | ||
251 | int err; | ||
252 | |||
253 | if (addr_len < sizeof(struct sockaddr_in)) | ||
254 | return -EINVAL; | ||
255 | |||
256 | pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", | ||
257 | sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); | ||
258 | |||
259 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); | ||
260 | if (addr->sin_addr.s_addr == INADDR_ANY) | ||
261 | chk_addr_ret = RTN_LOCAL; | ||
262 | |||
263 | if ((sysctl_ip_nonlocal_bind == 0 && | ||
264 | isk->freebind == 0 && isk->transparent == 0 && | ||
265 | chk_addr_ret != RTN_LOCAL) || | ||
266 | chk_addr_ret == RTN_MULTICAST || | ||
267 | chk_addr_ret == RTN_BROADCAST) | ||
268 | return -EADDRNOTAVAIL; | ||
269 | |||
270 | lock_sock(sk); | ||
271 | |||
272 | err = -EINVAL; | ||
273 | if (isk->inet_num != 0) | ||
274 | goto out; | ||
275 | |||
276 | err = -EADDRINUSE; | ||
277 | isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; | ||
278 | snum = ntohs(addr->sin_port); | ||
279 | if (ping_v4_get_port(sk, snum) != 0) { | ||
280 | isk->inet_saddr = isk->inet_rcv_saddr = 0; | ||
281 | goto out; | ||
282 | } | ||
283 | |||
284 | pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n", | ||
285 | (int)isk->inet_num, | ||
286 | (unsigned long) isk->inet_rcv_saddr, | ||
287 | (int)sk->sk_bound_dev_if); | ||
288 | |||
289 | err = 0; | ||
290 | if (isk->inet_rcv_saddr) | ||
291 | sk->sk_userlocks |= SOCK_BINDADDR_LOCK; | ||
292 | if (snum) | ||
293 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
294 | isk->inet_sport = htons(isk->inet_num); | ||
295 | isk->inet_daddr = 0; | ||
296 | isk->inet_dport = 0; | ||
297 | sk_dst_reset(sk); | ||
298 | out: | ||
299 | release_sock(sk); | ||
300 | pr_debug("ping_v4_bind -> %d\n", err); | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * Is this a supported type of ICMP message? | ||
306 | */ | ||
307 | |||
308 | static inline int ping_supported(int type, int code) | ||
309 | { | ||
310 | if (type == ICMP_ECHO && code == 0) | ||
311 | return 1; | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * This routine is called by the ICMP module when it gets some | ||
317 | * sort of error condition. | ||
318 | */ | ||
319 | |||
320 | static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); | ||
321 | |||
322 | void ping_err(struct sk_buff *skb, u32 info) | ||
323 | { | ||
324 | struct iphdr *iph = (struct iphdr *)skb->data; | ||
325 | struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); | ||
326 | struct inet_sock *inet_sock; | ||
327 | int type = icmph->type; | ||
328 | int code = icmph->code; | ||
329 | struct net *net = dev_net(skb->dev); | ||
330 | struct sock *sk; | ||
331 | int harderr; | ||
332 | int err; | ||
333 | |||
334 | /* We assume the packet has already been checked by icmp_unreach */ | ||
335 | |||
336 | if (!ping_supported(icmph->type, icmph->code)) | ||
337 | return; | ||
338 | |||
339 | pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, | ||
340 | code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); | ||
341 | |||
342 | sk = ping_v4_lookup(net, iph->daddr, iph->saddr, | ||
343 | ntohs(icmph->un.echo.id), skb->dev->ifindex); | ||
344 | if (sk == NULL) { | ||
345 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | ||
346 | pr_debug("no socket, dropping\n"); | ||
347 | return; /* No socket for error */ | ||
348 | } | ||
349 | pr_debug("err on socket %p\n", sk); | ||
350 | |||
351 | err = 0; | ||
352 | harderr = 0; | ||
353 | inet_sock = inet_sk(sk); | ||
354 | |||
355 | switch (type) { | ||
356 | default: | ||
357 | case ICMP_TIME_EXCEEDED: | ||
358 | err = EHOSTUNREACH; | ||
359 | break; | ||
360 | case ICMP_SOURCE_QUENCH: | ||
361 | /* This is not a real error but ping wants to see it. | ||
362 | * Report it with some fake errno. */ | ||
363 | err = EREMOTEIO; | ||
364 | break; | ||
365 | case ICMP_PARAMETERPROB: | ||
366 | err = EPROTO; | ||
367 | harderr = 1; | ||
368 | break; | ||
369 | case ICMP_DEST_UNREACH: | ||
370 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | ||
371 | if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { | ||
372 | err = EMSGSIZE; | ||
373 | harderr = 1; | ||
374 | break; | ||
375 | } | ||
376 | goto out; | ||
377 | } | ||
378 | err = EHOSTUNREACH; | ||
379 | if (code <= NR_ICMP_UNREACH) { | ||
380 | harderr = icmp_err_convert[code].fatal; | ||
381 | err = icmp_err_convert[code].errno; | ||
382 | } | ||
383 | break; | ||
384 | case ICMP_REDIRECT: | ||
385 | /* See ICMP_SOURCE_QUENCH */ | ||
386 | err = EREMOTEIO; | ||
387 | break; | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | * RFC1122: OK. Passes ICMP errors back to application, as per | ||
392 | * 4.1.3.3. | ||
393 | */ | ||
394 | if (!inet_sock->recverr) { | ||
395 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | ||
396 | goto out; | ||
397 | } else { | ||
398 | ip_icmp_error(sk, skb, err, 0 /* no remote port */, | ||
399 | info, (u8 *)icmph); | ||
400 | } | ||
401 | sk->sk_err = err; | ||
402 | sk->sk_error_report(sk); | ||
403 | out: | ||
404 | sock_put(sk); | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * Copy and checksum an ICMP Echo packet from user space into a buffer. | ||
409 | */ | ||
410 | |||
411 | struct pingfakehdr { | ||
412 | struct icmphdr icmph; | ||
413 | struct iovec *iov; | ||
414 | u32 wcheck; | ||
415 | }; | ||
416 | |||
417 | static int ping_getfrag(void *from, char * to, | ||
418 | int offset, int fraglen, int odd, struct sk_buff *skb) | ||
419 | { | ||
420 | struct pingfakehdr *pfh = (struct pingfakehdr *)from; | ||
421 | |||
422 | if (offset == 0) { | ||
423 | if (fraglen < sizeof(struct icmphdr)) | ||
424 | BUG(); | ||
425 | if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr), | ||
426 | pfh->iov, 0, fraglen - sizeof(struct icmphdr), | ||
427 | &pfh->wcheck)) | ||
428 | return -EFAULT; | ||
429 | |||
430 | return 0; | ||
431 | } | ||
432 | if (offset < sizeof(struct icmphdr)) | ||
433 | BUG(); | ||
434 | if (csum_partial_copy_fromiovecend | ||
435 | (to, pfh->iov, offset - sizeof(struct icmphdr), | ||
436 | fraglen, &pfh->wcheck)) | ||
437 | return -EFAULT; | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, | ||
442 | struct flowi4 *fl4) | ||
443 | { | ||
444 | struct sk_buff *skb = skb_peek(&sk->sk_write_queue); | ||
445 | |||
446 | pfh->wcheck = csum_partial((char *)&pfh->icmph, | ||
447 | sizeof(struct icmphdr), pfh->wcheck); | ||
448 | pfh->icmph.checksum = csum_fold(pfh->wcheck); | ||
449 | memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr)); | ||
450 | skb->ip_summed = CHECKSUM_NONE; | ||
451 | return ip_push_pending_frames(sk, fl4); | ||
452 | } | ||
453 | |||
454 | static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
455 | size_t len) | ||
456 | { | ||
457 | struct net *net = sock_net(sk); | ||
458 | struct flowi4 fl4; | ||
459 | struct inet_sock *inet = inet_sk(sk); | ||
460 | struct ipcm_cookie ipc; | ||
461 | struct icmphdr user_icmph; | ||
462 | struct pingfakehdr pfh; | ||
463 | struct rtable *rt = NULL; | ||
464 | struct ip_options_data opt_copy; | ||
465 | int free = 0; | ||
466 | u32 saddr, daddr, faddr; | ||
467 | u8 tos; | ||
468 | int err; | ||
469 | |||
470 | pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); | ||
471 | |||
472 | |||
473 | if (len > 0xFFFF) | ||
474 | return -EMSGSIZE; | ||
475 | |||
476 | /* | ||
477 | * Check the flags. | ||
478 | */ | ||
479 | |||
480 | /* Mirror BSD error message compatibility */ | ||
481 | if (msg->msg_flags & MSG_OOB) | ||
482 | return -EOPNOTSUPP; | ||
483 | |||
484 | /* | ||
485 | * Fetch the ICMP header provided by the userland. | ||
486 | * iovec is modified! | ||
487 | */ | ||
488 | |||
489 | if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, | ||
490 | sizeof(struct icmphdr))) | ||
491 | return -EFAULT; | ||
492 | if (!ping_supported(user_icmph.type, user_icmph.code)) | ||
493 | return -EINVAL; | ||
494 | |||
495 | /* | ||
496 | * Get and verify the address. | ||
497 | */ | ||
498 | |||
499 | if (msg->msg_name) { | ||
500 | struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; | ||
501 | if (msg->msg_namelen < sizeof(*usin)) | ||
502 | return -EINVAL; | ||
503 | if (usin->sin_family != AF_INET) | ||
504 | return -EINVAL; | ||
505 | daddr = usin->sin_addr.s_addr; | ||
506 | /* no remote port */ | ||
507 | } else { | ||
508 | if (sk->sk_state != TCP_ESTABLISHED) | ||
509 | return -EDESTADDRREQ; | ||
510 | daddr = inet->inet_daddr; | ||
511 | /* no remote port */ | ||
512 | } | ||
513 | |||
514 | ipc.addr = inet->inet_saddr; | ||
515 | ipc.opt = NULL; | ||
516 | ipc.oif = sk->sk_bound_dev_if; | ||
517 | ipc.tx_flags = 0; | ||
518 | err = sock_tx_timestamp(sk, &ipc.tx_flags); | ||
519 | if (err) | ||
520 | return err; | ||
521 | |||
522 | if (msg->msg_controllen) { | ||
523 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); | ||
524 | if (err) | ||
525 | return err; | ||
526 | if (ipc.opt) | ||
527 | free = 1; | ||
528 | } | ||
529 | if (!ipc.opt) { | ||
530 | struct ip_options_rcu *inet_opt; | ||
531 | |||
532 | rcu_read_lock(); | ||
533 | inet_opt = rcu_dereference(inet->inet_opt); | ||
534 | if (inet_opt) { | ||
535 | memcpy(&opt_copy, inet_opt, | ||
536 | sizeof(*inet_opt) + inet_opt->opt.optlen); | ||
537 | ipc.opt = &opt_copy.opt; | ||
538 | } | ||
539 | rcu_read_unlock(); | ||
540 | } | ||
541 | |||
542 | saddr = ipc.addr; | ||
543 | ipc.addr = faddr = daddr; | ||
544 | |||
545 | if (ipc.opt && ipc.opt->opt.srr) { | ||
546 | if (!daddr) | ||
547 | return -EINVAL; | ||
548 | faddr = ipc.opt->opt.faddr; | ||
549 | } | ||
550 | tos = RT_TOS(inet->tos); | ||
551 | if (sock_flag(sk, SOCK_LOCALROUTE) || | ||
552 | (msg->msg_flags & MSG_DONTROUTE) || | ||
553 | (ipc.opt && ipc.opt->opt.is_strictroute)) { | ||
554 | tos |= RTO_ONLINK; | ||
555 | } | ||
556 | |||
557 | if (ipv4_is_multicast(daddr)) { | ||
558 | if (!ipc.oif) | ||
559 | ipc.oif = inet->mc_index; | ||
560 | if (!saddr) | ||
561 | saddr = inet->mc_addr; | ||
562 | } | ||
563 | |||
564 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, | ||
565 | RT_SCOPE_UNIVERSE, sk->sk_protocol, | ||
566 | inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); | ||
567 | |||
568 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); | ||
569 | rt = ip_route_output_flow(net, &fl4, sk); | ||
570 | if (IS_ERR(rt)) { | ||
571 | err = PTR_ERR(rt); | ||
572 | rt = NULL; | ||
573 | if (err == -ENETUNREACH) | ||
574 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | ||
575 | goto out; | ||
576 | } | ||
577 | |||
578 | err = -EACCES; | ||
579 | if ((rt->rt_flags & RTCF_BROADCAST) && | ||
580 | !sock_flag(sk, SOCK_BROADCAST)) | ||
581 | goto out; | ||
582 | |||
583 | if (msg->msg_flags & MSG_CONFIRM) | ||
584 | goto do_confirm; | ||
585 | back_from_confirm: | ||
586 | |||
587 | if (!ipc.addr) | ||
588 | ipc.addr = fl4.daddr; | ||
589 | |||
590 | lock_sock(sk); | ||
591 | |||
592 | pfh.icmph.type = user_icmph.type; /* already checked */ | ||
593 | pfh.icmph.code = user_icmph.code; /* ditto */ | ||
594 | pfh.icmph.checksum = 0; | ||
595 | pfh.icmph.un.echo.id = inet->inet_sport; | ||
596 | pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; | ||
597 | pfh.iov = msg->msg_iov; | ||
598 | pfh.wcheck = 0; | ||
599 | |||
600 | err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, | ||
601 | 0, &ipc, &rt, msg->msg_flags); | ||
602 | if (err) | ||
603 | ip_flush_pending_frames(sk); | ||
604 | else | ||
605 | err = ping_push_pending_frames(sk, &pfh, &fl4); | ||
606 | release_sock(sk); | ||
607 | |||
608 | out: | ||
609 | ip_rt_put(rt); | ||
610 | if (free) | ||
611 | kfree(ipc.opt); | ||
612 | if (!err) { | ||
613 | icmp_out_count(sock_net(sk), user_icmph.type); | ||
614 | return len; | ||
615 | } | ||
616 | return err; | ||
617 | |||
618 | do_confirm: | ||
619 | dst_confirm(&rt->dst); | ||
620 | if (!(msg->msg_flags & MSG_PROBE) || len) | ||
621 | goto back_from_confirm; | ||
622 | err = 0; | ||
623 | goto out; | ||
624 | } | ||
625 | |||
626 | static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
627 | size_t len, int noblock, int flags, int *addr_len) | ||
628 | { | ||
629 | struct inet_sock *isk = inet_sk(sk); | ||
630 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | ||
631 | struct sk_buff *skb; | ||
632 | int copied, err; | ||
633 | |||
634 | pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); | ||
635 | |||
636 | if (flags & MSG_OOB) | ||
637 | goto out; | ||
638 | |||
639 | if (addr_len) | ||
640 | *addr_len = sizeof(*sin); | ||
641 | |||
642 | if (flags & MSG_ERRQUEUE) | ||
643 | return ip_recv_error(sk, msg, len); | ||
644 | |||
645 | skb = skb_recv_datagram(sk, flags, noblock, &err); | ||
646 | if (!skb) | ||
647 | goto out; | ||
648 | |||
649 | copied = skb->len; | ||
650 | if (copied > len) { | ||
651 | msg->msg_flags |= MSG_TRUNC; | ||
652 | copied = len; | ||
653 | } | ||
654 | |||
655 | /* Don't bother checking the checksum */ | ||
656 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); | ||
657 | if (err) | ||
658 | goto done; | ||
659 | |||
660 | sock_recv_timestamp(msg, sk, skb); | ||
661 | |||
662 | /* Copy the address. */ | ||
663 | if (sin) { | ||
664 | sin->sin_family = AF_INET; | ||
665 | sin->sin_port = 0 /* skb->h.uh->source */; | ||
666 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | ||
667 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
668 | } | ||
669 | if (isk->cmsg_flags) | ||
670 | ip_cmsg_recv(msg, skb); | ||
671 | err = copied; | ||
672 | |||
673 | done: | ||
674 | skb_free_datagram(sk, skb); | ||
675 | out: | ||
676 | pr_debug("ping_recvmsg -> %d\n", err); | ||
677 | return err; | ||
678 | } | ||
679 | |||
680 | static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | ||
681 | { | ||
682 | pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", | ||
683 | inet_sk(sk), inet_sk(sk)->inet_num, skb); | ||
684 | if (sock_queue_rcv_skb(sk, skb) < 0) { | ||
685 | ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS); | ||
686 | kfree_skb(skb); | ||
687 | pr_debug("ping_queue_rcv_skb -> failed\n"); | ||
688 | return -1; | ||
689 | } | ||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | |||
694 | /* | ||
695 | * All we need to do is get the socket. | ||
696 | */ | ||
697 | |||
698 | void ping_rcv(struct sk_buff *skb) | ||
699 | { | ||
700 | struct sock *sk; | ||
701 | struct net *net = dev_net(skb->dev); | ||
702 | struct iphdr *iph = ip_hdr(skb); | ||
703 | struct icmphdr *icmph = icmp_hdr(skb); | ||
704 | u32 saddr = iph->saddr; | ||
705 | u32 daddr = iph->daddr; | ||
706 | |||
707 | /* We assume the packet has already been checked by icmp_rcv */ | ||
708 | |||
709 | pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n", | ||
710 | skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); | ||
711 | |||
712 | /* Push ICMP header back */ | ||
713 | skb_push(skb, skb->data - (u8 *)icmph); | ||
714 | |||
715 | sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), | ||
716 | skb->dev->ifindex); | ||
717 | if (sk != NULL) { | ||
718 | pr_debug("rcv on socket %p\n", sk); | ||
719 | ping_queue_rcv_skb(sk, skb_get(skb)); | ||
720 | sock_put(sk); | ||
721 | return; | ||
722 | } | ||
723 | pr_debug("no socket, dropping\n"); | ||
724 | |||
725 | /* We're called from icmp_rcv(). kfree_skb() is done there. */ | ||
726 | } | ||
727 | |||
728 | struct proto ping_prot = { | ||
729 | .name = "PING", | ||
730 | .owner = THIS_MODULE, | ||
731 | .init = ping_init_sock, | ||
732 | .close = ping_close, | ||
733 | .connect = ip4_datagram_connect, | ||
734 | .disconnect = udp_disconnect, | ||
735 | .setsockopt = ip_setsockopt, | ||
736 | .getsockopt = ip_getsockopt, | ||
737 | .sendmsg = ping_sendmsg, | ||
738 | .recvmsg = ping_recvmsg, | ||
739 | .bind = ping_bind, | ||
740 | .backlog_rcv = ping_queue_rcv_skb, | ||
741 | .hash = ping_v4_hash, | ||
742 | .unhash = ping_v4_unhash, | ||
743 | .get_port = ping_v4_get_port, | ||
744 | .obj_size = sizeof(struct inet_sock), | ||
745 | }; | ||
746 | EXPORT_SYMBOL(ping_prot); | ||
747 | |||
748 | #ifdef CONFIG_PROC_FS | ||
749 | |||
750 | static struct sock *ping_get_first(struct seq_file *seq, int start) | ||
751 | { | ||
752 | struct sock *sk; | ||
753 | struct ping_iter_state *state = seq->private; | ||
754 | struct net *net = seq_file_net(seq); | ||
755 | |||
756 | for (state->bucket = start; state->bucket < PING_HTABLE_SIZE; | ||
757 | ++state->bucket) { | ||
758 | struct hlist_nulls_node *node; | ||
759 | struct hlist_nulls_head *hslot; | ||
760 | |||
761 | hslot = &ping_table.hash[state->bucket]; | ||
762 | |||
763 | if (hlist_nulls_empty(hslot)) | ||
764 | continue; | ||
765 | |||
766 | sk_nulls_for_each(sk, node, hslot) { | ||
767 | if (net_eq(sock_net(sk), net)) | ||
768 | goto found; | ||
769 | } | ||
770 | } | ||
771 | sk = NULL; | ||
772 | found: | ||
773 | return sk; | ||
774 | } | ||
775 | |||
776 | static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk) | ||
777 | { | ||
778 | struct ping_iter_state *state = seq->private; | ||
779 | struct net *net = seq_file_net(seq); | ||
780 | |||
781 | do { | ||
782 | sk = sk_nulls_next(sk); | ||
783 | } while (sk && (!net_eq(sock_net(sk), net))); | ||
784 | |||
785 | if (!sk) | ||
786 | return ping_get_first(seq, state->bucket + 1); | ||
787 | return sk; | ||
788 | } | ||
789 | |||
790 | static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) | ||
791 | { | ||
792 | struct sock *sk = ping_get_first(seq, 0); | ||
793 | |||
794 | if (sk) | ||
795 | while (pos && (sk = ping_get_next(seq, sk)) != NULL) | ||
796 | --pos; | ||
797 | return pos ? NULL : sk; | ||
798 | } | ||
799 | |||
800 | static void *ping_seq_start(struct seq_file *seq, loff_t *pos) | ||
801 | { | ||
802 | struct ping_iter_state *state = seq->private; | ||
803 | state->bucket = 0; | ||
804 | |||
805 | read_lock_bh(&ping_table.lock); | ||
806 | |||
807 | return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | ||
808 | } | ||
809 | |||
810 | static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
811 | { | ||
812 | struct sock *sk; | ||
813 | |||
814 | if (v == SEQ_START_TOKEN) | ||
815 | sk = ping_get_idx(seq, 0); | ||
816 | else | ||
817 | sk = ping_get_next(seq, v); | ||
818 | |||
819 | ++*pos; | ||
820 | return sk; | ||
821 | } | ||
822 | |||
823 | static void ping_seq_stop(struct seq_file *seq, void *v) | ||
824 | { | ||
825 | read_unlock_bh(&ping_table.lock); | ||
826 | } | ||
827 | |||
828 | static void ping_format_sock(struct sock *sp, struct seq_file *f, | ||
829 | int bucket, int *len) | ||
830 | { | ||
831 | struct inet_sock *inet = inet_sk(sp); | ||
832 | __be32 dest = inet->inet_daddr; | ||
833 | __be32 src = inet->inet_rcv_saddr; | ||
834 | __u16 destp = ntohs(inet->inet_dport); | ||
835 | __u16 srcp = ntohs(inet->inet_sport); | ||
836 | |||
837 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" | ||
838 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", | ||
839 | bucket, src, srcp, dest, destp, sp->sk_state, | ||
840 | sk_wmem_alloc_get(sp), | ||
841 | sk_rmem_alloc_get(sp), | ||
842 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | ||
843 | atomic_read(&sp->sk_refcnt), sp, | ||
844 | atomic_read(&sp->sk_drops), len); | ||
845 | } | ||
846 | |||
847 | static int ping_seq_show(struct seq_file *seq, void *v) | ||
848 | { | ||
849 | if (v == SEQ_START_TOKEN) | ||
850 | seq_printf(seq, "%-127s\n", | ||
851 | " sl local_address rem_address st tx_queue " | ||
852 | "rx_queue tr tm->when retrnsmt uid timeout " | ||
853 | "inode ref pointer drops"); | ||
854 | else { | ||
855 | struct ping_iter_state *state = seq->private; | ||
856 | int len; | ||
857 | |||
858 | ping_format_sock(v, seq, state->bucket, &len); | ||
859 | seq_printf(seq, "%*s\n", 127 - len, ""); | ||
860 | } | ||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | static const struct seq_operations ping_seq_ops = { | ||
865 | .show = ping_seq_show, | ||
866 | .start = ping_seq_start, | ||
867 | .next = ping_seq_next, | ||
868 | .stop = ping_seq_stop, | ||
869 | }; | ||
870 | |||
871 | static int ping_seq_open(struct inode *inode, struct file *file) | ||
872 | { | ||
873 | return seq_open_net(inode, file, &ping_seq_ops, | ||
874 | sizeof(struct ping_iter_state)); | ||
875 | } | ||
876 | |||
877 | static const struct file_operations ping_seq_fops = { | ||
878 | .open = ping_seq_open, | ||
879 | .read = seq_read, | ||
880 | .llseek = seq_lseek, | ||
881 | .release = seq_release_net, | ||
882 | }; | ||
883 | |||
884 | static int ping_proc_register(struct net *net) | ||
885 | { | ||
886 | struct proc_dir_entry *p; | ||
887 | int rc = 0; | ||
888 | |||
889 | p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops); | ||
890 | if (!p) | ||
891 | rc = -ENOMEM; | ||
892 | return rc; | ||
893 | } | ||
894 | |||
895 | static void ping_proc_unregister(struct net *net) | ||
896 | { | ||
897 | proc_net_remove(net, "icmp"); | ||
898 | } | ||
899 | |||
900 | |||
901 | static int __net_init ping_proc_init_net(struct net *net) | ||
902 | { | ||
903 | return ping_proc_register(net); | ||
904 | } | ||
905 | |||
906 | static void __net_exit ping_proc_exit_net(struct net *net) | ||
907 | { | ||
908 | ping_proc_unregister(net); | ||
909 | } | ||
910 | |||
911 | static struct pernet_operations ping_net_ops = { | ||
912 | .init = ping_proc_init_net, | ||
913 | .exit = ping_proc_exit_net, | ||
914 | }; | ||
915 | |||
916 | int __init ping_proc_init(void) | ||
917 | { | ||
918 | return register_pernet_subsys(&ping_net_ops); | ||
919 | } | ||
920 | |||
921 | void ping_proc_exit(void) | ||
922 | { | ||
923 | unregister_pernet_subsys(&ping_net_ops); | ||
924 | } | ||
925 | |||
926 | #endif | ||
927 | |||
928 | void __init ping_init(void) | ||
929 | { | ||
930 | int i; | ||
931 | |||
932 | for (i = 0; i < PING_HTABLE_SIZE; i++) | ||
933 | INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i); | ||
934 | rwlock_init(&ping_table.lock); | ||
935 | } | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bceaec42c37d..11e1780455f2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -154,7 +154,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) | |||
154 | * RFC 1122: SHOULD pass TOS value up to the transport layer. | 154 | * RFC 1122: SHOULD pass TOS value up to the transport layer. |
155 | * -> It does. And not only TOS, but all IP header. | 155 | * -> It does. And not only TOS, but all IP header. |
156 | */ | 156 | */ |
157 | static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) | 157 | static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) |
158 | { | 158 | { |
159 | struct sock *sk; | 159 | struct sock *sk; |
160 | struct hlist_head *head; | 160 | struct hlist_head *head; |
@@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) | |||
247 | } | 247 | } |
248 | 248 | ||
249 | if (inet->recverr) { | 249 | if (inet->recverr) { |
250 | struct iphdr *iph = (struct iphdr *)skb->data; | 250 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
251 | u8 *payload = skb->data + (iph->ihl << 2); | 251 | u8 *payload = skb->data + (iph->ihl << 2); |
252 | 252 | ||
253 | if (inet->hdrincl) | 253 | if (inet->hdrincl) |
@@ -265,7 +265,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) | |||
265 | { | 265 | { |
266 | int hash; | 266 | int hash; |
267 | struct sock *raw_sk; | 267 | struct sock *raw_sk; |
268 | struct iphdr *iph; | 268 | const struct iphdr *iph; |
269 | struct net *net; | 269 | struct net *net; |
270 | 270 | ||
271 | hash = protocol & (RAW_HTABLE_SIZE - 1); | 271 | hash = protocol & (RAW_HTABLE_SIZE - 1); |
@@ -273,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) | |||
273 | read_lock(&raw_v4_hashinfo.lock); | 273 | read_lock(&raw_v4_hashinfo.lock); |
274 | raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); | 274 | raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); |
275 | if (raw_sk != NULL) { | 275 | if (raw_sk != NULL) { |
276 | iph = (struct iphdr *)skb->data; | 276 | iph = (const struct iphdr *)skb->data; |
277 | net = dev_net(skb->dev); | 277 | net = dev_net(skb->dev); |
278 | 278 | ||
279 | while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, | 279 | while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, |
@@ -281,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) | |||
281 | skb->dev->ifindex)) != NULL) { | 281 | skb->dev->ifindex)) != NULL) { |
282 | raw_err(raw_sk, skb, info); | 282 | raw_err(raw_sk, skb, info); |
283 | raw_sk = sk_next(raw_sk); | 283 | raw_sk = sk_next(raw_sk); |
284 | iph = (struct iphdr *)skb->data; | 284 | iph = (const struct iphdr *)skb->data; |
285 | } | 285 | } |
286 | } | 286 | } |
287 | read_unlock(&raw_v4_hashinfo.lock); | 287 | read_unlock(&raw_v4_hashinfo.lock); |
@@ -314,9 +314,10 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) | |||
314 | return 0; | 314 | return 0; |
315 | } | 315 | } |
316 | 316 | ||
317 | static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | 317 | static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, |
318 | struct rtable **rtp, | 318 | void *from, size_t length, |
319 | unsigned int flags) | 319 | struct rtable **rtp, |
320 | unsigned int flags) | ||
320 | { | 321 | { |
321 | struct inet_sock *inet = inet_sk(sk); | 322 | struct inet_sock *inet = inet_sk(sk); |
322 | struct net *net = sock_net(sk); | 323 | struct net *net = sock_net(sk); |
@@ -327,7 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
327 | struct rtable *rt = *rtp; | 328 | struct rtable *rt = *rtp; |
328 | 329 | ||
329 | if (length > rt->dst.dev->mtu) { | 330 | if (length > rt->dst.dev->mtu) { |
330 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 331 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
331 | rt->dst.dev->mtu); | 332 | rt->dst.dev->mtu); |
332 | return -EMSGSIZE; | 333 | return -EMSGSIZE; |
333 | } | 334 | } |
@@ -372,7 +373,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
372 | 373 | ||
373 | if (iphlen >= sizeof(*iph)) { | 374 | if (iphlen >= sizeof(*iph)) { |
374 | if (!iph->saddr) | 375 | if (!iph->saddr) |
375 | iph->saddr = rt->rt_src; | 376 | iph->saddr = fl4->saddr; |
376 | iph->check = 0; | 377 | iph->check = 0; |
377 | iph->tot_len = htons(length); | 378 | iph->tot_len = htons(length); |
378 | if (!iph->id) | 379 | if (!iph->id) |
@@ -455,11 +456,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
455 | struct inet_sock *inet = inet_sk(sk); | 456 | struct inet_sock *inet = inet_sk(sk); |
456 | struct ipcm_cookie ipc; | 457 | struct ipcm_cookie ipc; |
457 | struct rtable *rt = NULL; | 458 | struct rtable *rt = NULL; |
459 | struct flowi4 fl4; | ||
458 | int free = 0; | 460 | int free = 0; |
459 | __be32 daddr; | 461 | __be32 daddr; |
460 | __be32 saddr; | 462 | __be32 saddr; |
461 | u8 tos; | 463 | u8 tos; |
462 | int err; | 464 | int err; |
465 | struct ip_options_data opt_copy; | ||
463 | 466 | ||
464 | err = -EMSGSIZE; | 467 | err = -EMSGSIZE; |
465 | if (len > 0xFFFF) | 468 | if (len > 0xFFFF) |
@@ -520,8 +523,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
520 | saddr = ipc.addr; | 523 | saddr = ipc.addr; |
521 | ipc.addr = daddr; | 524 | ipc.addr = daddr; |
522 | 525 | ||
523 | if (!ipc.opt) | 526 | if (!ipc.opt) { |
524 | ipc.opt = inet->opt; | 527 | struct ip_options_rcu *inet_opt; |
528 | |||
529 | rcu_read_lock(); | ||
530 | inet_opt = rcu_dereference(inet->inet_opt); | ||
531 | if (inet_opt) { | ||
532 | memcpy(&opt_copy, inet_opt, | ||
533 | sizeof(*inet_opt) + inet_opt->opt.optlen); | ||
534 | ipc.opt = &opt_copy.opt; | ||
535 | } | ||
536 | rcu_read_unlock(); | ||
537 | } | ||
525 | 538 | ||
526 | if (ipc.opt) { | 539 | if (ipc.opt) { |
527 | err = -EINVAL; | 540 | err = -EINVAL; |
@@ -530,10 +543,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
530 | */ | 543 | */ |
531 | if (inet->hdrincl) | 544 | if (inet->hdrincl) |
532 | goto done; | 545 | goto done; |
533 | if (ipc.opt->srr) { | 546 | if (ipc.opt->opt.srr) { |
534 | if (!daddr) | 547 | if (!daddr) |
535 | goto done; | 548 | goto done; |
536 | daddr = ipc.opt->faddr; | 549 | daddr = ipc.opt->opt.faddr; |
537 | } | 550 | } |
538 | } | 551 | } |
539 | tos = RT_CONN_FLAGS(sk); | 552 | tos = RT_CONN_FLAGS(sk); |
@@ -547,31 +560,23 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
547 | saddr = inet->mc_addr; | 560 | saddr = inet->mc_addr; |
548 | } | 561 | } |
549 | 562 | ||
550 | { | 563 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, |
551 | struct flowi4 fl4 = { | 564 | RT_SCOPE_UNIVERSE, |
552 | .flowi4_oif = ipc.oif, | 565 | inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, |
553 | .flowi4_mark = sk->sk_mark, | 566 | FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); |
554 | .daddr = daddr, | ||
555 | .saddr = saddr, | ||
556 | .flowi4_tos = tos, | ||
557 | .flowi4_proto = (inet->hdrincl ? | ||
558 | IPPROTO_RAW : | ||
559 | sk->sk_protocol), | ||
560 | .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, | ||
561 | }; | ||
562 | if (!inet->hdrincl) { | ||
563 | err = raw_probe_proto_opt(&fl4, msg); | ||
564 | if (err) | ||
565 | goto done; | ||
566 | } | ||
567 | 567 | ||
568 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); | 568 | if (!inet->hdrincl) { |
569 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); | 569 | err = raw_probe_proto_opt(&fl4, msg); |
570 | if (IS_ERR(rt)) { | 570 | if (err) |
571 | err = PTR_ERR(rt); | ||
572 | rt = NULL; | ||
573 | goto done; | 571 | goto done; |
574 | } | 572 | } |
573 | |||
574 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); | ||
575 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); | ||
576 | if (IS_ERR(rt)) { | ||
577 | err = PTR_ERR(rt); | ||
578 | rt = NULL; | ||
579 | goto done; | ||
575 | } | 580 | } |
576 | 581 | ||
577 | err = -EACCES; | 582 | err = -EACCES; |
@@ -583,19 +588,20 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
583 | back_from_confirm: | 588 | back_from_confirm: |
584 | 589 | ||
585 | if (inet->hdrincl) | 590 | if (inet->hdrincl) |
586 | err = raw_send_hdrinc(sk, msg->msg_iov, len, | 591 | err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len, |
587 | &rt, msg->msg_flags); | 592 | &rt, msg->msg_flags); |
588 | 593 | ||
589 | else { | 594 | else { |
590 | if (!ipc.addr) | 595 | if (!ipc.addr) |
591 | ipc.addr = rt->rt_dst; | 596 | ipc.addr = fl4.daddr; |
592 | lock_sock(sk); | 597 | lock_sock(sk); |
593 | err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, | 598 | err = ip_append_data(sk, &fl4, ip_generic_getfrag, |
594 | &ipc, &rt, msg->msg_flags); | 599 | msg->msg_iov, len, 0, |
600 | &ipc, &rt, msg->msg_flags); | ||
595 | if (err) | 601 | if (err) |
596 | ip_flush_pending_frames(sk); | 602 | ip_flush_pending_frames(sk); |
597 | else if (!(msg->msg_flags & MSG_MORE)) { | 603 | else if (!(msg->msg_flags & MSG_MORE)) { |
598 | err = ip_push_pending_frames(sk); | 604 | err = ip_push_pending_frames(sk, &fl4); |
599 | if (err == -ENOBUFS && !inet->recverr) | 605 | if (err == -ENOBUFS && !inet->recverr) |
600 | err = 0; | 606 | err = 0; |
601 | } | 607 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 99e6e4bb1c72..b24d58e6bbcd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -156,7 +156,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | |||
156 | u32 *p = NULL; | 156 | u32 *p = NULL; |
157 | 157 | ||
158 | if (!rt->peer) | 158 | if (!rt->peer) |
159 | rt_bind_peer(rt, 1); | 159 | rt_bind_peer(rt, rt->rt_dst, 1); |
160 | 160 | ||
161 | peer = rt->peer; | 161 | peer = rt->peer; |
162 | if (peer) { | 162 | if (peer) { |
@@ -424,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
424 | dst_metric(&r->dst, RTAX_WINDOW), | 424 | dst_metric(&r->dst, RTAX_WINDOW), |
425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
426 | dst_metric(&r->dst, RTAX_RTTVAR)), | 426 | dst_metric(&r->dst, RTAX_RTTVAR)), |
427 | r->rt_tos, | 427 | r->rt_key_tos, |
428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
429 | r->dst.hh ? (r->dst.hh->hh_output == | 429 | r->dst.hh ? (r->dst.hh->hh_output == |
430 | dev_queue_xmit) : 0, | 430 | dev_queue_xmit) : 0, |
@@ -724,7 +724,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) | |||
724 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | | 724 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | 725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
726 | (rt1->rt_mark ^ rt2->rt_mark) | | 726 | (rt1->rt_mark ^ rt2->rt_mark) | |
727 | (rt1->rt_tos ^ rt2->rt_tos) | | 727 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | |
728 | (rt1->rt_oif ^ rt2->rt_oif) | | 728 | (rt1->rt_oif ^ rt2->rt_oif) | |
729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; | 729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; |
730 | } | 730 | } |
@@ -968,10 +968,6 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
968 | break; | 968 | break; |
969 | 969 | ||
970 | expire >>= 1; | 970 | expire >>= 1; |
971 | #if RT_CACHE_DEBUG >= 2 | ||
972 | printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, | ||
973 | dst_entries_get_fast(&ipv4_dst_ops), goal, i); | ||
974 | #endif | ||
975 | 971 | ||
976 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) | 972 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
977 | goto out; | 973 | goto out; |
@@ -992,10 +988,6 @@ work_done: | |||
992 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || | 988 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || |
993 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) | 989 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) |
994 | expire = ip_rt_gc_timeout; | 990 | expire = ip_rt_gc_timeout; |
995 | #if RT_CACHE_DEBUG >= 2 | ||
996 | printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, | ||
997 | dst_entries_get_fast(&ipv4_dst_ops), goal, rover); | ||
998 | #endif | ||
999 | out: return 0; | 991 | out: return 0; |
1000 | } | 992 | } |
1001 | 993 | ||
@@ -1179,16 +1171,6 @@ restart: | |||
1179 | 1171 | ||
1180 | rt->dst.rt_next = rt_hash_table[hash].chain; | 1172 | rt->dst.rt_next = rt_hash_table[hash].chain; |
1181 | 1173 | ||
1182 | #if RT_CACHE_DEBUG >= 2 | ||
1183 | if (rt->dst.rt_next) { | ||
1184 | struct rtable *trt; | ||
1185 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", | ||
1186 | hash, &rt->rt_dst); | ||
1187 | for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) | ||
1188 | printk(" . %pI4", &trt->rt_dst); | ||
1189 | printk("\n"); | ||
1190 | } | ||
1191 | #endif | ||
1192 | /* | 1174 | /* |
1193 | * Since lookup is lockfree, we must make sure | 1175 | * Since lookup is lockfree, we must make sure |
1194 | * previous writes to rt are committed to memory | 1176 | * previous writes to rt are committed to memory |
@@ -1211,11 +1193,11 @@ static u32 rt_peer_genid(void) | |||
1211 | return atomic_read(&__rt_peer_genid); | 1193 | return atomic_read(&__rt_peer_genid); |
1212 | } | 1194 | } |
1213 | 1195 | ||
1214 | void rt_bind_peer(struct rtable *rt, int create) | 1196 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) |
1215 | { | 1197 | { |
1216 | struct inet_peer *peer; | 1198 | struct inet_peer *peer; |
1217 | 1199 | ||
1218 | peer = inet_getpeer_v4(rt->rt_dst, create); | 1200 | peer = inet_getpeer_v4(daddr, create); |
1219 | 1201 | ||
1220 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1202 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1221 | inet_putpeer(peer); | 1203 | inet_putpeer(peer); |
@@ -1249,7 +1231,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1249 | 1231 | ||
1250 | if (rt) { | 1232 | if (rt) { |
1251 | if (rt->peer == NULL) | 1233 | if (rt->peer == NULL) |
1252 | rt_bind_peer(rt, 1); | 1234 | rt_bind_peer(rt, rt->rt_dst, 1); |
1253 | 1235 | ||
1254 | /* If peer is attached to destination, it is never detached, | 1236 | /* If peer is attached to destination, it is never detached, |
1255 | so that we need not to grab a lock to dereference it. | 1237 | so that we need not to grab a lock to dereference it. |
@@ -1347,10 +1329,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1347 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 1329 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1348 | rt->rt_oif, | 1330 | rt->rt_oif, |
1349 | rt_genid(dev_net(dst->dev))); | 1331 | rt_genid(dev_net(dst->dev))); |
1350 | #if RT_CACHE_DEBUG >= 1 | ||
1351 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", | ||
1352 | &rt->rt_dst, rt->rt_tos); | ||
1353 | #endif | ||
1354 | rt_del(hash, rt); | 1332 | rt_del(hash, rt); |
1355 | ret = NULL; | 1333 | ret = NULL; |
1356 | } else if (rt->peer && | 1334 | } else if (rt->peer && |
@@ -1399,7 +1377,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1399 | rcu_read_unlock(); | 1377 | rcu_read_unlock(); |
1400 | 1378 | ||
1401 | if (!rt->peer) | 1379 | if (!rt->peer) |
1402 | rt_bind_peer(rt, 1); | 1380 | rt_bind_peer(rt, rt->rt_dst, 1); |
1403 | peer = rt->peer; | 1381 | peer = rt->peer; |
1404 | if (!peer) { | 1382 | if (!peer) { |
1405 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1383 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
@@ -1435,7 +1413,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1435 | peer->rate_tokens == ip_rt_redirect_number && | 1413 | peer->rate_tokens == ip_rt_redirect_number && |
1436 | net_ratelimit()) | 1414 | net_ratelimit()) |
1437 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1415 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1438 | &rt->rt_src, rt->rt_iif, | 1416 | &ip_hdr(skb)->saddr, rt->rt_iif, |
1439 | &rt->rt_dst, &rt->rt_gateway); | 1417 | &rt->rt_dst, &rt->rt_gateway); |
1440 | #endif | 1418 | #endif |
1441 | } | 1419 | } |
@@ -1467,7 +1445,7 @@ static int ip_error(struct sk_buff *skb) | |||
1467 | } | 1445 | } |
1468 | 1446 | ||
1469 | if (!rt->peer) | 1447 | if (!rt->peer) |
1470 | rt_bind_peer(rt, 1); | 1448 | rt_bind_peer(rt, rt->rt_dst, 1); |
1471 | peer = rt->peer; | 1449 | peer = rt->peer; |
1472 | 1450 | ||
1473 | send = true; | 1451 | send = true; |
@@ -1507,7 +1485,7 @@ static inline unsigned short guess_mtu(unsigned short old_mtu) | |||
1507 | return 68; | 1485 | return 68; |
1508 | } | 1486 | } |
1509 | 1487 | ||
1510 | unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | 1488 | unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, |
1511 | unsigned short new_mtu, | 1489 | unsigned short new_mtu, |
1512 | struct net_device *dev) | 1490 | struct net_device *dev) |
1513 | { | 1491 | { |
@@ -1574,7 +1552,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1574 | dst_confirm(dst); | 1552 | dst_confirm(dst); |
1575 | 1553 | ||
1576 | if (!rt->peer) | 1554 | if (!rt->peer) |
1577 | rt_bind_peer(rt, 1); | 1555 | rt_bind_peer(rt, rt->rt_dst, 1); |
1578 | peer = rt->peer; | 1556 | peer = rt->peer; |
1579 | if (peer) { | 1557 | if (peer) { |
1580 | if (mtu < ip_rt_min_pmtu) | 1558 | if (mtu < ip_rt_min_pmtu) |
@@ -1631,7 +1609,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | |||
1631 | struct inet_peer *peer; | 1609 | struct inet_peer *peer; |
1632 | 1610 | ||
1633 | if (!rt->peer) | 1611 | if (!rt->peer) |
1634 | rt_bind_peer(rt, 0); | 1612 | rt_bind_peer(rt, rt->rt_dst, 0); |
1635 | 1613 | ||
1636 | peer = rt->peer; | 1614 | peer = rt->peer; |
1637 | if (peer && peer->pmtu_expires) | 1615 | if (peer && peer->pmtu_expires) |
@@ -1699,22 +1677,26 @@ static int ip_rt_bug(struct sk_buff *skb) | |||
1699 | in IP options! | 1677 | in IP options! |
1700 | */ | 1678 | */ |
1701 | 1679 | ||
1702 | void ip_rt_get_source(u8 *addr, struct rtable *rt) | 1680 | void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) |
1703 | { | 1681 | { |
1704 | __be32 src; | 1682 | __be32 src; |
1705 | struct fib_result res; | ||
1706 | 1683 | ||
1707 | if (rt_is_output_route(rt)) | 1684 | if (rt_is_output_route(rt)) |
1708 | src = rt->rt_src; | 1685 | src = ip_hdr(skb)->saddr; |
1709 | else { | 1686 | else { |
1710 | struct flowi4 fl4 = { | 1687 | struct fib_result res; |
1711 | .daddr = rt->rt_key_dst, | 1688 | struct flowi4 fl4; |
1712 | .saddr = rt->rt_key_src, | 1689 | struct iphdr *iph; |
1713 | .flowi4_tos = rt->rt_tos, | 1690 | |
1714 | .flowi4_oif = rt->rt_oif, | 1691 | iph = ip_hdr(skb); |
1715 | .flowi4_iif = rt->rt_iif, | 1692 | |
1716 | .flowi4_mark = rt->rt_mark, | 1693 | memset(&fl4, 0, sizeof(fl4)); |
1717 | }; | 1694 | fl4.daddr = iph->daddr; |
1695 | fl4.saddr = iph->saddr; | ||
1696 | fl4.flowi4_tos = iph->tos; | ||
1697 | fl4.flowi4_oif = rt->dst.dev->ifindex; | ||
1698 | fl4.flowi4_iif = skb->dev->ifindex; | ||
1699 | fl4.flowi4_mark = skb->mark; | ||
1718 | 1700 | ||
1719 | rcu_read_lock(); | 1701 | rcu_read_lock(); |
1720 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) | 1702 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) |
@@ -1767,7 +1749,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | |||
1767 | return mtu; | 1749 | return mtu; |
1768 | } | 1750 | } |
1769 | 1751 | ||
1770 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, | 1752 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, |
1771 | struct fib_info *fi) | 1753 | struct fib_info *fi) |
1772 | { | 1754 | { |
1773 | struct inet_peer *peer; | 1755 | struct inet_peer *peer; |
@@ -1776,7 +1758,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, | |||
1776 | /* If a peer entry exists for this destination, we must hook | 1758 | /* If a peer entry exists for this destination, we must hook |
1777 | * it up in order to get at cached metrics. | 1759 | * it up in order to get at cached metrics. |
1778 | */ | 1760 | */ |
1779 | if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | 1761 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) |
1780 | create = 1; | 1762 | create = 1; |
1781 | 1763 | ||
1782 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | 1764 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); |
@@ -1803,7 +1785,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, | |||
1803 | } | 1785 | } |
1804 | } | 1786 | } |
1805 | 1787 | ||
1806 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, | 1788 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, |
1807 | const struct fib_result *res, | 1789 | const struct fib_result *res, |
1808 | struct fib_info *fi, u16 type, u32 itag) | 1790 | struct fib_info *fi, u16 type, u32 itag) |
1809 | { | 1791 | { |
@@ -1813,7 +1795,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, | |||
1813 | if (FIB_RES_GW(*res) && | 1795 | if (FIB_RES_GW(*res) && |
1814 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1796 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1815 | rt->rt_gateway = FIB_RES_GW(*res); | 1797 | rt->rt_gateway = FIB_RES_GW(*res); |
1816 | rt_init_metrics(rt, oldflp4, fi); | 1798 | rt_init_metrics(rt, fl4, fi); |
1817 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1799 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1818 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1800 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1819 | #endif | 1801 | #endif |
@@ -1830,20 +1812,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, | |||
1830 | #endif | 1812 | #endif |
1831 | set_class_tag(rt, itag); | 1813 | set_class_tag(rt, itag); |
1832 | #endif | 1814 | #endif |
1833 | rt->rt_type = type; | ||
1834 | } | 1815 | } |
1835 | 1816 | ||
1836 | static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) | 1817 | static struct rtable *rt_dst_alloc(struct net_device *dev, |
1818 | bool nopolicy, bool noxfrm) | ||
1837 | { | 1819 | { |
1838 | struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); | 1820 | return dst_alloc(&ipv4_dst_ops, dev, 1, -1, |
1839 | if (rt) { | 1821 | DST_HOST | |
1840 | rt->dst.obsolete = -1; | 1822 | (nopolicy ? DST_NOPOLICY : 0) | |
1841 | 1823 | (noxfrm ? DST_NOXFRM : 0)); | |
1842 | rt->dst.flags = DST_HOST | | ||
1843 | (nopolicy ? DST_NOPOLICY : 0) | | ||
1844 | (noxfrm ? DST_NOXFRM : 0); | ||
1845 | } | ||
1846 | return rt; | ||
1847 | } | 1824 | } |
1848 | 1825 | ||
1849 | /* called in rcu_read_lock() section */ | 1826 | /* called in rcu_read_lock() section */ |
@@ -1871,36 +1848,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1871 | goto e_inval; | 1848 | goto e_inval; |
1872 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1849 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
1873 | } else { | 1850 | } else { |
1874 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 1851 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
1875 | &itag, 0); | 1852 | &itag); |
1876 | if (err < 0) | 1853 | if (err < 0) |
1877 | goto e_err; | 1854 | goto e_err; |
1878 | } | 1855 | } |
1879 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 1856 | rth = rt_dst_alloc(init_net.loopback_dev, |
1857 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | ||
1880 | if (!rth) | 1858 | if (!rth) |
1881 | goto e_nobufs; | 1859 | goto e_nobufs; |
1882 | 1860 | ||
1861 | #ifdef CONFIG_IP_ROUTE_CLASSID | ||
1862 | rth->dst.tclassid = itag; | ||
1863 | #endif | ||
1883 | rth->dst.output = ip_rt_bug; | 1864 | rth->dst.output = ip_rt_bug; |
1884 | 1865 | ||
1885 | rth->rt_key_dst = daddr; | 1866 | rth->rt_key_dst = daddr; |
1886 | rth->rt_dst = daddr; | ||
1887 | rth->rt_tos = tos; | ||
1888 | rth->rt_mark = skb->mark; | ||
1889 | rth->rt_key_src = saddr; | 1867 | rth->rt_key_src = saddr; |
1868 | rth->rt_genid = rt_genid(dev_net(dev)); | ||
1869 | rth->rt_flags = RTCF_MULTICAST; | ||
1870 | rth->rt_type = RTN_MULTICAST; | ||
1871 | rth->rt_key_tos = tos; | ||
1872 | rth->rt_dst = daddr; | ||
1890 | rth->rt_src = saddr; | 1873 | rth->rt_src = saddr; |
1891 | #ifdef CONFIG_IP_ROUTE_CLASSID | ||
1892 | rth->dst.tclassid = itag; | ||
1893 | #endif | ||
1894 | rth->rt_route_iif = dev->ifindex; | 1874 | rth->rt_route_iif = dev->ifindex; |
1895 | rth->rt_iif = dev->ifindex; | 1875 | rth->rt_iif = dev->ifindex; |
1896 | rth->dst.dev = init_net.loopback_dev; | ||
1897 | dev_hold(rth->dst.dev); | ||
1898 | rth->rt_oif = 0; | 1876 | rth->rt_oif = 0; |
1877 | rth->rt_mark = skb->mark; | ||
1899 | rth->rt_gateway = daddr; | 1878 | rth->rt_gateway = daddr; |
1900 | rth->rt_spec_dst= spec_dst; | 1879 | rth->rt_spec_dst= spec_dst; |
1901 | rth->rt_genid = rt_genid(dev_net(dev)); | 1880 | rth->rt_peer_genid = 0; |
1902 | rth->rt_flags = RTCF_MULTICAST; | 1881 | rth->peer = NULL; |
1903 | rth->rt_type = RTN_MULTICAST; | 1882 | rth->fi = NULL; |
1904 | if (our) { | 1883 | if (our) { |
1905 | rth->dst.input= ip_local_deliver; | 1884 | rth->dst.input= ip_local_deliver; |
1906 | rth->rt_flags |= RTCF_LOCAL; | 1885 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1981,8 +1960,8 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1981 | } | 1960 | } |
1982 | 1961 | ||
1983 | 1962 | ||
1984 | err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), | 1963 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), |
1985 | in_dev->dev, &spec_dst, &itag, skb->mark); | 1964 | in_dev->dev, &spec_dst, &itag); |
1986 | if (err < 0) { | 1965 | if (err < 0) { |
1987 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1966 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
1988 | saddr); | 1967 | saddr); |
@@ -2013,7 +1992,8 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2013 | } | 1992 | } |
2014 | } | 1993 | } |
2015 | 1994 | ||
2016 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1995 | rth = rt_dst_alloc(out_dev->dev, |
1996 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | ||
2017 | IN_DEV_CONF_GET(out_dev, NOXFRM)); | 1997 | IN_DEV_CONF_GET(out_dev, NOXFRM)); |
2018 | if (!rth) { | 1998 | if (!rth) { |
2019 | err = -ENOBUFS; | 1999 | err = -ENOBUFS; |
@@ -2021,27 +2001,28 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2021 | } | 2001 | } |
2022 | 2002 | ||
2023 | rth->rt_key_dst = daddr; | 2003 | rth->rt_key_dst = daddr; |
2024 | rth->rt_dst = daddr; | ||
2025 | rth->rt_tos = tos; | ||
2026 | rth->rt_mark = skb->mark; | ||
2027 | rth->rt_key_src = saddr; | 2004 | rth->rt_key_src = saddr; |
2005 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | ||
2006 | rth->rt_flags = flags; | ||
2007 | rth->rt_type = res->type; | ||
2008 | rth->rt_key_tos = tos; | ||
2009 | rth->rt_dst = daddr; | ||
2028 | rth->rt_src = saddr; | 2010 | rth->rt_src = saddr; |
2029 | rth->rt_gateway = daddr; | ||
2030 | rth->rt_route_iif = in_dev->dev->ifindex; | 2011 | rth->rt_route_iif = in_dev->dev->ifindex; |
2031 | rth->rt_iif = in_dev->dev->ifindex; | 2012 | rth->rt_iif = in_dev->dev->ifindex; |
2032 | rth->dst.dev = (out_dev)->dev; | ||
2033 | dev_hold(rth->dst.dev); | ||
2034 | rth->rt_oif = 0; | 2013 | rth->rt_oif = 0; |
2014 | rth->rt_mark = skb->mark; | ||
2015 | rth->rt_gateway = daddr; | ||
2035 | rth->rt_spec_dst= spec_dst; | 2016 | rth->rt_spec_dst= spec_dst; |
2017 | rth->rt_peer_genid = 0; | ||
2018 | rth->peer = NULL; | ||
2019 | rth->fi = NULL; | ||
2036 | 2020 | ||
2037 | rth->dst.input = ip_forward; | 2021 | rth->dst.input = ip_forward; |
2038 | rth->dst.output = ip_output; | 2022 | rth->dst.output = ip_output; |
2039 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | ||
2040 | 2023 | ||
2041 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); | 2024 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); |
2042 | 2025 | ||
2043 | rth->rt_flags = flags; | ||
2044 | |||
2045 | *result = rth; | 2026 | *result = rth; |
2046 | err = 0; | 2027 | err = 0; |
2047 | cleanup: | 2028 | cleanup: |
@@ -2150,9 +2131,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2150 | goto brd_input; | 2131 | goto brd_input; |
2151 | 2132 | ||
2152 | if (res.type == RTN_LOCAL) { | 2133 | if (res.type == RTN_LOCAL) { |
2153 | err = fib_validate_source(saddr, daddr, tos, | 2134 | err = fib_validate_source(skb, saddr, daddr, tos, |
2154 | net->loopback_dev->ifindex, | 2135 | net->loopback_dev->ifindex, |
2155 | dev, &spec_dst, &itag, skb->mark); | 2136 | dev, &spec_dst, &itag); |
2156 | if (err < 0) | 2137 | if (err < 0) |
2157 | goto martian_source_keep_err; | 2138 | goto martian_source_keep_err; |
2158 | if (err) | 2139 | if (err) |
@@ -2176,8 +2157,8 @@ brd_input: | |||
2176 | if (ipv4_is_zeronet(saddr)) | 2157 | if (ipv4_is_zeronet(saddr)) |
2177 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 2158 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
2178 | else { | 2159 | else { |
2179 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2160 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
2180 | &itag, skb->mark); | 2161 | &itag); |
2181 | if (err < 0) | 2162 | if (err < 0) |
2182 | goto martian_source_keep_err; | 2163 | goto martian_source_keep_err; |
2183 | if (err) | 2164 | if (err) |
@@ -2188,36 +2169,42 @@ brd_input: | |||
2188 | RT_CACHE_STAT_INC(in_brd); | 2169 | RT_CACHE_STAT_INC(in_brd); |
2189 | 2170 | ||
2190 | local_input: | 2171 | local_input: |
2191 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 2172 | rth = rt_dst_alloc(net->loopback_dev, |
2173 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | ||
2192 | if (!rth) | 2174 | if (!rth) |
2193 | goto e_nobufs; | 2175 | goto e_nobufs; |
2194 | 2176 | ||
2177 | rth->dst.input= ip_local_deliver; | ||
2195 | rth->dst.output= ip_rt_bug; | 2178 | rth->dst.output= ip_rt_bug; |
2196 | rth->rt_genid = rt_genid(net); | 2179 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2180 | rth->dst.tclassid = itag; | ||
2181 | #endif | ||
2197 | 2182 | ||
2198 | rth->rt_key_dst = daddr; | 2183 | rth->rt_key_dst = daddr; |
2199 | rth->rt_dst = daddr; | ||
2200 | rth->rt_tos = tos; | ||
2201 | rth->rt_mark = skb->mark; | ||
2202 | rth->rt_key_src = saddr; | 2184 | rth->rt_key_src = saddr; |
2185 | rth->rt_genid = rt_genid(net); | ||
2186 | rth->rt_flags = flags|RTCF_LOCAL; | ||
2187 | rth->rt_type = res.type; | ||
2188 | rth->rt_key_tos = tos; | ||
2189 | rth->rt_dst = daddr; | ||
2203 | rth->rt_src = saddr; | 2190 | rth->rt_src = saddr; |
2204 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2191 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2205 | rth->dst.tclassid = itag; | 2192 | rth->dst.tclassid = itag; |
2206 | #endif | 2193 | #endif |
2207 | rth->rt_route_iif = dev->ifindex; | 2194 | rth->rt_route_iif = dev->ifindex; |
2208 | rth->rt_iif = dev->ifindex; | 2195 | rth->rt_iif = dev->ifindex; |
2209 | rth->dst.dev = net->loopback_dev; | 2196 | rth->rt_oif = 0; |
2210 | dev_hold(rth->dst.dev); | 2197 | rth->rt_mark = skb->mark; |
2211 | rth->rt_gateway = daddr; | 2198 | rth->rt_gateway = daddr; |
2212 | rth->rt_spec_dst= spec_dst; | 2199 | rth->rt_spec_dst= spec_dst; |
2213 | rth->dst.input= ip_local_deliver; | 2200 | rth->rt_peer_genid = 0; |
2214 | rth->rt_flags = flags|RTCF_LOCAL; | 2201 | rth->peer = NULL; |
2202 | rth->fi = NULL; | ||
2215 | if (res.type == RTN_UNREACHABLE) { | 2203 | if (res.type == RTN_UNREACHABLE) { |
2216 | rth->dst.input= ip_error; | 2204 | rth->dst.input= ip_error; |
2217 | rth->dst.error= -err; | 2205 | rth->dst.error= -err; |
2218 | rth->rt_flags &= ~RTCF_LOCAL; | 2206 | rth->rt_flags &= ~RTCF_LOCAL; |
2219 | } | 2207 | } |
2220 | rth->rt_type = res.type; | ||
2221 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); | 2208 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); |
2222 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); | 2209 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); |
2223 | err = 0; | 2210 | err = 0; |
@@ -2288,7 +2275,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2288 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | | 2275 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
2289 | (rth->rt_iif ^ iif) | | 2276 | (rth->rt_iif ^ iif) | |
2290 | rth->rt_oif | | 2277 | rth->rt_oif | |
2291 | (rth->rt_tos ^ tos)) == 0 && | 2278 | (rth->rt_key_tos ^ tos)) == 0 && |
2292 | rth->rt_mark == skb->mark && | 2279 | rth->rt_mark == skb->mark && |
2293 | net_eq(dev_net(rth->dst.dev), net) && | 2280 | net_eq(dev_net(rth->dst.dev), net) && |
2294 | !rt_is_expired(rth)) { | 2281 | !rt_is_expired(rth)) { |
@@ -2349,12 +2336,12 @@ EXPORT_SYMBOL(ip_route_input_common); | |||
2349 | /* called with rcu_read_lock() */ | 2336 | /* called with rcu_read_lock() */ |
2350 | static struct rtable *__mkroute_output(const struct fib_result *res, | 2337 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2351 | const struct flowi4 *fl4, | 2338 | const struct flowi4 *fl4, |
2352 | const struct flowi4 *oldflp4, | 2339 | __be32 orig_daddr, __be32 orig_saddr, |
2353 | struct net_device *dev_out, | 2340 | int orig_oif, struct net_device *dev_out, |
2354 | unsigned int flags) | 2341 | unsigned int flags) |
2355 | { | 2342 | { |
2356 | struct fib_info *fi = res->fi; | 2343 | struct fib_info *fi = res->fi; |
2357 | u32 tos = RT_FL_TOS(oldflp4); | 2344 | u32 tos = RT_FL_TOS(fl4); |
2358 | struct in_device *in_dev; | 2345 | struct in_device *in_dev; |
2359 | u16 type = res->type; | 2346 | u16 type = res->type; |
2360 | struct rtable *rth; | 2347 | struct rtable *rth; |
@@ -2381,8 +2368,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2381 | fi = NULL; | 2368 | fi = NULL; |
2382 | } else if (type == RTN_MULTICAST) { | 2369 | } else if (type == RTN_MULTICAST) { |
2383 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2370 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2384 | if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, | 2371 | if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, |
2385 | oldflp4->flowi4_proto)) | 2372 | fl4->flowi4_proto)) |
2386 | flags &= ~RTCF_LOCAL; | 2373 | flags &= ~RTCF_LOCAL; |
2387 | /* If multicast route do not exist use | 2374 | /* If multicast route do not exist use |
2388 | * default one, but do not gateway in this case. | 2375 | * default one, but do not gateway in this case. |
@@ -2392,29 +2379,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2392 | fi = NULL; | 2379 | fi = NULL; |
2393 | } | 2380 | } |
2394 | 2381 | ||
2395 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | 2382 | rth = rt_dst_alloc(dev_out, |
2383 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | ||
2396 | IN_DEV_CONF_GET(in_dev, NOXFRM)); | 2384 | IN_DEV_CONF_GET(in_dev, NOXFRM)); |
2397 | if (!rth) | 2385 | if (!rth) |
2398 | return ERR_PTR(-ENOBUFS); | 2386 | return ERR_PTR(-ENOBUFS); |
2399 | 2387 | ||
2400 | rth->rt_key_dst = oldflp4->daddr; | 2388 | rth->dst.output = ip_output; |
2401 | rth->rt_tos = tos; | 2389 | |
2402 | rth->rt_key_src = oldflp4->saddr; | 2390 | rth->rt_key_dst = orig_daddr; |
2403 | rth->rt_oif = oldflp4->flowi4_oif; | 2391 | rth->rt_key_src = orig_saddr; |
2404 | rth->rt_mark = oldflp4->flowi4_mark; | 2392 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2393 | rth->rt_flags = flags; | ||
2394 | rth->rt_type = type; | ||
2395 | rth->rt_key_tos = tos; | ||
2405 | rth->rt_dst = fl4->daddr; | 2396 | rth->rt_dst = fl4->daddr; |
2406 | rth->rt_src = fl4->saddr; | 2397 | rth->rt_src = fl4->saddr; |
2407 | rth->rt_route_iif = 0; | 2398 | rth->rt_route_iif = 0; |
2408 | rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; | 2399 | rth->rt_iif = orig_oif ? : dev_out->ifindex; |
2409 | /* get references to the devices that are to be hold by the routing | 2400 | rth->rt_oif = orig_oif; |
2410 | cache entry */ | 2401 | rth->rt_mark = fl4->flowi4_mark; |
2411 | rth->dst.dev = dev_out; | ||
2412 | dev_hold(dev_out); | ||
2413 | rth->rt_gateway = fl4->daddr; | 2402 | rth->rt_gateway = fl4->daddr; |
2414 | rth->rt_spec_dst= fl4->saddr; | 2403 | rth->rt_spec_dst= fl4->saddr; |
2415 | 2404 | rth->rt_peer_genid = 0; | |
2416 | rth->dst.output=ip_output; | 2405 | rth->peer = NULL; |
2417 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2406 | rth->fi = NULL; |
2418 | 2407 | ||
2419 | RT_CACHE_STAT_INC(out_slow_tot); | 2408 | RT_CACHE_STAT_INC(out_slow_tot); |
2420 | 2409 | ||
@@ -2432,7 +2421,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2432 | #ifdef CONFIG_IP_MROUTE | 2421 | #ifdef CONFIG_IP_MROUTE |
2433 | if (type == RTN_MULTICAST) { | 2422 | if (type == RTN_MULTICAST) { |
2434 | if (IN_DEV_MFORWARD(in_dev) && | 2423 | if (IN_DEV_MFORWARD(in_dev) && |
2435 | !ipv4_is_local_multicast(oldflp4->daddr)) { | 2424 | !ipv4_is_local_multicast(fl4->daddr)) { |
2436 | rth->dst.input = ip_mr_input; | 2425 | rth->dst.input = ip_mr_input; |
2437 | rth->dst.output = ip_mc_output; | 2426 | rth->dst.output = ip_mc_output; |
2438 | } | 2427 | } |
@@ -2440,9 +2429,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2440 | #endif | 2429 | #endif |
2441 | } | 2430 | } |
2442 | 2431 | ||
2443 | rt_set_nexthop(rth, oldflp4, res, fi, type, 0); | 2432 | rt_set_nexthop(rth, fl4, res, fi, type, 0); |
2444 | 2433 | ||
2445 | rth->rt_flags = flags; | ||
2446 | return rth; | 2434 | return rth; |
2447 | } | 2435 | } |
2448 | 2436 | ||
@@ -2451,36 +2439,37 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2451 | * called with rcu_read_lock(); | 2439 | * called with rcu_read_lock(); |
2452 | */ | 2440 | */ |
2453 | 2441 | ||
2454 | static struct rtable *ip_route_output_slow(struct net *net, | 2442 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) |
2455 | const struct flowi4 *oldflp4) | ||
2456 | { | 2443 | { |
2457 | u32 tos = RT_FL_TOS(oldflp4); | ||
2458 | struct flowi4 fl4; | ||
2459 | struct fib_result res; | ||
2460 | unsigned int flags = 0; | ||
2461 | struct net_device *dev_out = NULL; | 2444 | struct net_device *dev_out = NULL; |
2445 | u32 tos = RT_FL_TOS(fl4); | ||
2446 | unsigned int flags = 0; | ||
2447 | struct fib_result res; | ||
2462 | struct rtable *rth; | 2448 | struct rtable *rth; |
2449 | __be32 orig_daddr; | ||
2450 | __be32 orig_saddr; | ||
2451 | int orig_oif; | ||
2463 | 2452 | ||
2464 | res.fi = NULL; | 2453 | res.fi = NULL; |
2465 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2454 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
2466 | res.r = NULL; | 2455 | res.r = NULL; |
2467 | #endif | 2456 | #endif |
2468 | 2457 | ||
2469 | fl4.flowi4_oif = oldflp4->flowi4_oif; | 2458 | orig_daddr = fl4->daddr; |
2470 | fl4.flowi4_iif = net->loopback_dev->ifindex; | 2459 | orig_saddr = fl4->saddr; |
2471 | fl4.flowi4_mark = oldflp4->flowi4_mark; | 2460 | orig_oif = fl4->flowi4_oif; |
2472 | fl4.daddr = oldflp4->daddr; | 2461 | |
2473 | fl4.saddr = oldflp4->saddr; | 2462 | fl4->flowi4_iif = net->loopback_dev->ifindex; |
2474 | fl4.flowi4_tos = tos & IPTOS_RT_MASK; | 2463 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; |
2475 | fl4.flowi4_scope = ((tos & RTO_ONLINK) ? | 2464 | fl4->flowi4_scope = ((tos & RTO_ONLINK) ? |
2476 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | 2465 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); |
2477 | 2466 | ||
2478 | rcu_read_lock(); | 2467 | rcu_read_lock(); |
2479 | if (oldflp4->saddr) { | 2468 | if (fl4->saddr) { |
2480 | rth = ERR_PTR(-EINVAL); | 2469 | rth = ERR_PTR(-EINVAL); |
2481 | if (ipv4_is_multicast(oldflp4->saddr) || | 2470 | if (ipv4_is_multicast(fl4->saddr) || |
2482 | ipv4_is_lbcast(oldflp4->saddr) || | 2471 | ipv4_is_lbcast(fl4->saddr) || |
2483 | ipv4_is_zeronet(oldflp4->saddr)) | 2472 | ipv4_is_zeronet(fl4->saddr)) |
2484 | goto out; | 2473 | goto out; |
2485 | 2474 | ||
2486 | /* I removed check for oif == dev_out->oif here. | 2475 | /* I removed check for oif == dev_out->oif here. |
@@ -2491,11 +2480,11 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2491 | of another iface. --ANK | 2480 | of another iface. --ANK |
2492 | */ | 2481 | */ |
2493 | 2482 | ||
2494 | if (oldflp4->flowi4_oif == 0 && | 2483 | if (fl4->flowi4_oif == 0 && |
2495 | (ipv4_is_multicast(oldflp4->daddr) || | 2484 | (ipv4_is_multicast(fl4->daddr) || |
2496 | ipv4_is_lbcast(oldflp4->daddr))) { | 2485 | ipv4_is_lbcast(fl4->daddr))) { |
2497 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2486 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2498 | dev_out = __ip_dev_find(net, oldflp4->saddr, false); | 2487 | dev_out = __ip_dev_find(net, fl4->saddr, false); |
2499 | if (dev_out == NULL) | 2488 | if (dev_out == NULL) |
2500 | goto out; | 2489 | goto out; |
2501 | 2490 | ||
@@ -2514,20 +2503,20 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2514 | Luckily, this hack is good workaround. | 2503 | Luckily, this hack is good workaround. |
2515 | */ | 2504 | */ |
2516 | 2505 | ||
2517 | fl4.flowi4_oif = dev_out->ifindex; | 2506 | fl4->flowi4_oif = dev_out->ifindex; |
2518 | goto make_route; | 2507 | goto make_route; |
2519 | } | 2508 | } |
2520 | 2509 | ||
2521 | if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { | 2510 | if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
2522 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2511 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2523 | if (!__ip_dev_find(net, oldflp4->saddr, false)) | 2512 | if (!__ip_dev_find(net, fl4->saddr, false)) |
2524 | goto out; | 2513 | goto out; |
2525 | } | 2514 | } |
2526 | } | 2515 | } |
2527 | 2516 | ||
2528 | 2517 | ||
2529 | if (oldflp4->flowi4_oif) { | 2518 | if (fl4->flowi4_oif) { |
2530 | dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); | 2519 | dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); |
2531 | rth = ERR_PTR(-ENODEV); | 2520 | rth = ERR_PTR(-ENODEV); |
2532 | if (dev_out == NULL) | 2521 | if (dev_out == NULL) |
2533 | goto out; | 2522 | goto out; |
@@ -2537,37 +2526,37 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2537 | rth = ERR_PTR(-ENETUNREACH); | 2526 | rth = ERR_PTR(-ENETUNREACH); |
2538 | goto out; | 2527 | goto out; |
2539 | } | 2528 | } |
2540 | if (ipv4_is_local_multicast(oldflp4->daddr) || | 2529 | if (ipv4_is_local_multicast(fl4->daddr) || |
2541 | ipv4_is_lbcast(oldflp4->daddr)) { | 2530 | ipv4_is_lbcast(fl4->daddr)) { |
2542 | if (!fl4.saddr) | 2531 | if (!fl4->saddr) |
2543 | fl4.saddr = inet_select_addr(dev_out, 0, | 2532 | fl4->saddr = inet_select_addr(dev_out, 0, |
2544 | RT_SCOPE_LINK); | 2533 | RT_SCOPE_LINK); |
2545 | goto make_route; | 2534 | goto make_route; |
2546 | } | 2535 | } |
2547 | if (!fl4.saddr) { | 2536 | if (fl4->saddr) { |
2548 | if (ipv4_is_multicast(oldflp4->daddr)) | 2537 | if (ipv4_is_multicast(fl4->daddr)) |
2549 | fl4.saddr = inet_select_addr(dev_out, 0, | 2538 | fl4->saddr = inet_select_addr(dev_out, 0, |
2550 | fl4.flowi4_scope); | 2539 | fl4->flowi4_scope); |
2551 | else if (!oldflp4->daddr) | 2540 | else if (!fl4->daddr) |
2552 | fl4.saddr = inet_select_addr(dev_out, 0, | 2541 | fl4->saddr = inet_select_addr(dev_out, 0, |
2553 | RT_SCOPE_HOST); | 2542 | RT_SCOPE_HOST); |
2554 | } | 2543 | } |
2555 | } | 2544 | } |
2556 | 2545 | ||
2557 | if (!fl4.daddr) { | 2546 | if (!fl4->daddr) { |
2558 | fl4.daddr = fl4.saddr; | 2547 | fl4->daddr = fl4->saddr; |
2559 | if (!fl4.daddr) | 2548 | if (!fl4->daddr) |
2560 | fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); | 2549 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); |
2561 | dev_out = net->loopback_dev; | 2550 | dev_out = net->loopback_dev; |
2562 | fl4.flowi4_oif = net->loopback_dev->ifindex; | 2551 | fl4->flowi4_oif = net->loopback_dev->ifindex; |
2563 | res.type = RTN_LOCAL; | 2552 | res.type = RTN_LOCAL; |
2564 | flags |= RTCF_LOCAL; | 2553 | flags |= RTCF_LOCAL; |
2565 | goto make_route; | 2554 | goto make_route; |
2566 | } | 2555 | } |
2567 | 2556 | ||
2568 | if (fib_lookup(net, &fl4, &res)) { | 2557 | if (fib_lookup(net, fl4, &res)) { |
2569 | res.fi = NULL; | 2558 | res.fi = NULL; |
2570 | if (oldflp4->flowi4_oif) { | 2559 | if (fl4->flowi4_oif) { |
2571 | /* Apparently, routing tables are wrong. Assume, | 2560 | /* Apparently, routing tables are wrong. Assume, |
2572 | that the destination is on link. | 2561 | that the destination is on link. |
2573 | 2562 | ||
@@ -2586,9 +2575,9 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2586 | likely IPv6, but we do not. | 2575 | likely IPv6, but we do not. |
2587 | */ | 2576 | */ |
2588 | 2577 | ||
2589 | if (fl4.saddr == 0) | 2578 | if (fl4->saddr == 0) |
2590 | fl4.saddr = inet_select_addr(dev_out, 0, | 2579 | fl4->saddr = inet_select_addr(dev_out, 0, |
2591 | RT_SCOPE_LINK); | 2580 | RT_SCOPE_LINK); |
2592 | res.type = RTN_UNICAST; | 2581 | res.type = RTN_UNICAST; |
2593 | goto make_route; | 2582 | goto make_route; |
2594 | } | 2583 | } |
@@ -2597,42 +2586,45 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2597 | } | 2586 | } |
2598 | 2587 | ||
2599 | if (res.type == RTN_LOCAL) { | 2588 | if (res.type == RTN_LOCAL) { |
2600 | if (!fl4.saddr) { | 2589 | if (!fl4->saddr) { |
2601 | if (res.fi->fib_prefsrc) | 2590 | if (res.fi->fib_prefsrc) |
2602 | fl4.saddr = res.fi->fib_prefsrc; | 2591 | fl4->saddr = res.fi->fib_prefsrc; |
2603 | else | 2592 | else |
2604 | fl4.saddr = fl4.daddr; | 2593 | fl4->saddr = fl4->daddr; |
2605 | } | 2594 | } |
2606 | dev_out = net->loopback_dev; | 2595 | dev_out = net->loopback_dev; |
2607 | fl4.flowi4_oif = dev_out->ifindex; | 2596 | fl4->flowi4_oif = dev_out->ifindex; |
2608 | res.fi = NULL; | 2597 | res.fi = NULL; |
2609 | flags |= RTCF_LOCAL; | 2598 | flags |= RTCF_LOCAL; |
2610 | goto make_route; | 2599 | goto make_route; |
2611 | } | 2600 | } |
2612 | 2601 | ||
2613 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2602 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2614 | if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) | 2603 | if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) |
2615 | fib_select_multipath(&res); | 2604 | fib_select_multipath(&res); |
2616 | else | 2605 | else |
2617 | #endif | 2606 | #endif |
2618 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) | 2607 | if (!res.prefixlen && |
2608 | res.table->tb_num_default > 1 && | ||
2609 | res.type == RTN_UNICAST && !fl4->flowi4_oif) | ||
2619 | fib_select_default(&res); | 2610 | fib_select_default(&res); |
2620 | 2611 | ||
2621 | if (!fl4.saddr) | 2612 | if (!fl4->saddr) |
2622 | fl4.saddr = FIB_RES_PREFSRC(net, res); | 2613 | fl4->saddr = FIB_RES_PREFSRC(net, res); |
2623 | 2614 | ||
2624 | dev_out = FIB_RES_DEV(res); | 2615 | dev_out = FIB_RES_DEV(res); |
2625 | fl4.flowi4_oif = dev_out->ifindex; | 2616 | fl4->flowi4_oif = dev_out->ifindex; |
2626 | 2617 | ||
2627 | 2618 | ||
2628 | make_route: | 2619 | make_route: |
2629 | rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); | 2620 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, |
2621 | dev_out, flags); | ||
2630 | if (!IS_ERR(rth)) { | 2622 | if (!IS_ERR(rth)) { |
2631 | unsigned int hash; | 2623 | unsigned int hash; |
2632 | 2624 | ||
2633 | hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, | 2625 | hash = rt_hash(orig_daddr, orig_saddr, orig_oif, |
2634 | rt_genid(dev_net(dev_out))); | 2626 | rt_genid(dev_net(dev_out))); |
2635 | rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); | 2627 | rth = rt_intern_hash(hash, rth, NULL, orig_oif); |
2636 | } | 2628 | } |
2637 | 2629 | ||
2638 | out: | 2630 | out: |
@@ -2640,7 +2632,7 @@ out: | |||
2640 | return rth; | 2632 | return rth; |
2641 | } | 2633 | } |
2642 | 2634 | ||
2643 | struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) | 2635 | struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) |
2644 | { | 2636 | { |
2645 | struct rtable *rth; | 2637 | struct rtable *rth; |
2646 | unsigned int hash; | 2638 | unsigned int hash; |
@@ -2658,13 +2650,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) | |||
2658 | rt_is_output_route(rth) && | 2650 | rt_is_output_route(rth) && |
2659 | rth->rt_oif == flp4->flowi4_oif && | 2651 | rth->rt_oif == flp4->flowi4_oif && |
2660 | rth->rt_mark == flp4->flowi4_mark && | 2652 | rth->rt_mark == flp4->flowi4_mark && |
2661 | !((rth->rt_tos ^ flp4->flowi4_tos) & | 2653 | !((rth->rt_key_tos ^ flp4->flowi4_tos) & |
2662 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2654 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2663 | net_eq(dev_net(rth->dst.dev), net) && | 2655 | net_eq(dev_net(rth->dst.dev), net) && |
2664 | !rt_is_expired(rth)) { | 2656 | !rt_is_expired(rth)) { |
2665 | dst_use(&rth->dst, jiffies); | 2657 | dst_use(&rth->dst, jiffies); |
2666 | RT_CACHE_STAT_INC(out_hit); | 2658 | RT_CACHE_STAT_INC(out_hit); |
2667 | rcu_read_unlock_bh(); | 2659 | rcu_read_unlock_bh(); |
2660 | if (!flp4->saddr) | ||
2661 | flp4->saddr = rth->rt_src; | ||
2662 | if (!flp4->daddr) | ||
2663 | flp4->daddr = rth->rt_dst; | ||
2668 | return rth; | 2664 | return rth; |
2669 | } | 2665 | } |
2670 | RT_CACHE_STAT_INC(out_hlist_search); | 2666 | RT_CACHE_STAT_INC(out_hlist_search); |
@@ -2709,7 +2705,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2709 | 2705 | ||
2710 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 2706 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
2711 | { | 2707 | { |
2712 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); | 2708 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); |
2713 | struct rtable *ort = (struct rtable *) dst_orig; | 2709 | struct rtable *ort = (struct rtable *) dst_orig; |
2714 | 2710 | ||
2715 | if (rt) { | 2711 | if (rt) { |
@@ -2726,7 +2722,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2726 | 2722 | ||
2727 | rt->rt_key_dst = ort->rt_key_dst; | 2723 | rt->rt_key_dst = ort->rt_key_dst; |
2728 | rt->rt_key_src = ort->rt_key_src; | 2724 | rt->rt_key_src = ort->rt_key_src; |
2729 | rt->rt_tos = ort->rt_tos; | 2725 | rt->rt_key_tos = ort->rt_key_tos; |
2730 | rt->rt_route_iif = ort->rt_route_iif; | 2726 | rt->rt_route_iif = ort->rt_route_iif; |
2731 | rt->rt_iif = ort->rt_iif; | 2727 | rt->rt_iif = ort->rt_iif; |
2732 | rt->rt_oif = ort->rt_oif; | 2728 | rt->rt_oif = ort->rt_oif; |
@@ -2762,15 +2758,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, | |||
2762 | if (IS_ERR(rt)) | 2758 | if (IS_ERR(rt)) |
2763 | return rt; | 2759 | return rt; |
2764 | 2760 | ||
2765 | if (flp4->flowi4_proto) { | 2761 | if (flp4->flowi4_proto) |
2766 | if (!flp4->saddr) | ||
2767 | flp4->saddr = rt->rt_src; | ||
2768 | if (!flp4->daddr) | ||
2769 | flp4->daddr = rt->rt_dst; | ||
2770 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, | 2762 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
2771 | flowi4_to_flowi(flp4), | 2763 | flowi4_to_flowi(flp4), |
2772 | sk, 0); | 2764 | sk, 0); |
2773 | } | ||
2774 | 2765 | ||
2775 | return rt; | 2766 | return rt; |
2776 | } | 2767 | } |
@@ -2794,7 +2785,7 @@ static int rt_fill_info(struct net *net, | |||
2794 | r->rtm_family = AF_INET; | 2785 | r->rtm_family = AF_INET; |
2795 | r->rtm_dst_len = 32; | 2786 | r->rtm_dst_len = 32; |
2796 | r->rtm_src_len = 0; | 2787 | r->rtm_src_len = 0; |
2797 | r->rtm_tos = rt->rt_tos; | 2788 | r->rtm_tos = rt->rt_key_tos; |
2798 | r->rtm_table = RT_TABLE_MAIN; | 2789 | r->rtm_table = RT_TABLE_MAIN; |
2799 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2790 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); |
2800 | r->rtm_type = rt->rt_type; | 2791 | r->rtm_type = rt->rt_type; |
@@ -2848,7 +2839,9 @@ static int rt_fill_info(struct net *net, | |||
2848 | 2839 | ||
2849 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && | 2840 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && |
2850 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { | 2841 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
2851 | int err = ipmr_get_route(net, skb, r, nowait); | 2842 | int err = ipmr_get_route(net, skb, |
2843 | rt->rt_src, rt->rt_dst, | ||
2844 | r, nowait); | ||
2852 | if (err <= 0) { | 2845 | if (err <= 0) { |
2853 | if (!nowait) { | 2846 | if (!nowait) { |
2854 | if (err == 0) | 2847 | if (err == 0) |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b44c6d2a79b..26461492a847 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
321 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 321 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
322 | */ | 322 | */ |
323 | if (opt && opt->optlen) { | 323 | if (opt && opt->optlen) { |
324 | int opt_size = sizeof(struct ip_options) + opt->optlen; | 324 | int opt_size = sizeof(struct ip_options_rcu) + opt->optlen; |
325 | 325 | ||
326 | ireq->opt = kmalloc(opt_size, GFP_ATOMIC); | 326 | ireq->opt = kmalloc(opt_size, GFP_ATOMIC); |
327 | if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { | 327 | if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) { |
328 | kfree(ireq->opt); | 328 | kfree(ireq->opt); |
329 | ireq->opt = NULL; | 329 | ireq->opt = NULL; |
330 | } | 330 | } |
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
345 | * no easy way to do this. | 345 | * no easy way to do this. |
346 | */ | 346 | */ |
347 | { | 347 | { |
348 | struct flowi4 fl4 = { | 348 | struct flowi4 fl4; |
349 | .flowi4_mark = sk->sk_mark, | 349 | |
350 | .daddr = ((opt && opt->srr) ? | 350 | flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), |
351 | opt->faddr : ireq->rmt_addr), | 351 | RT_SCOPE_UNIVERSE, IPPROTO_TCP, |
352 | .saddr = ireq->loc_addr, | 352 | inet_sk_flowi_flags(sk), |
353 | .flowi4_tos = RT_CONN_FLAGS(sk), | 353 | (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, |
354 | .flowi4_proto = IPPROTO_TCP, | 354 | ireq->loc_addr, th->source, th->dest); |
355 | .flowi4_flags = inet_sk_flowi_flags(sk), | ||
356 | .fl4_sport = th->dest, | ||
357 | .fl4_dport = th->source, | ||
358 | }; | ||
359 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | 355 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
360 | rt = ip_route_output_key(sock_net(sk), &fl4); | 356 | rt = ip_route_output_key(sock_net(sk), &fl4); |
361 | if (IS_ERR(rt)) { | 357 | if (IS_ERR(rt)) { |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 321e6e84dbcc..57d0752e239a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/seqlock.h> | 13 | #include <linux/seqlock.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/nsproxy.h> | ||
16 | #include <net/snmp.h> | 17 | #include <net/snmp.h> |
17 | #include <net/icmp.h> | 18 | #include <net/icmp.h> |
18 | #include <net/ip.h> | 19 | #include <net/ip.h> |
@@ -21,6 +22,7 @@ | |||
21 | #include <net/udp.h> | 22 | #include <net/udp.h> |
22 | #include <net/cipso_ipv4.h> | 23 | #include <net/cipso_ipv4.h> |
23 | #include <net/inet_frag.h> | 24 | #include <net/inet_frag.h> |
25 | #include <net/ping.h> | ||
24 | 26 | ||
25 | static int zero; | 27 | static int zero; |
26 | static int tcp_retr1_max = 255; | 28 | static int tcp_retr1_max = 255; |
@@ -30,6 +32,8 @@ static int tcp_adv_win_scale_min = -31; | |||
30 | static int tcp_adv_win_scale_max = 31; | 32 | static int tcp_adv_win_scale_max = 31; |
31 | static int ip_ttl_min = 1; | 33 | static int ip_ttl_min = 1; |
32 | static int ip_ttl_max = 255; | 34 | static int ip_ttl_max = 255; |
35 | static int ip_ping_group_range_min[] = { 0, 0 }; | ||
36 | static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; | ||
33 | 37 | ||
34 | /* Update system visible IP port range */ | 38 | /* Update system visible IP port range */ |
35 | static void set_local_port_range(int range[2]) | 39 | static void set_local_port_range(int range[2]) |
@@ -68,6 +72,53 @@ static int ipv4_local_port_range(ctl_table *table, int write, | |||
68 | return ret; | 72 | return ret; |
69 | } | 73 | } |
70 | 74 | ||
75 | |||
76 | void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) | ||
77 | { | ||
78 | gid_t *data = table->data; | ||
79 | unsigned seq; | ||
80 | do { | ||
81 | seq = read_seqbegin(&sysctl_local_ports.lock); | ||
82 | |||
83 | *low = data[0]; | ||
84 | *high = data[1]; | ||
85 | } while (read_seqretry(&sysctl_local_ports.lock, seq)); | ||
86 | } | ||
87 | |||
88 | /* Update system visible IP port range */ | ||
89 | static void set_ping_group_range(struct ctl_table *table, int range[2]) | ||
90 | { | ||
91 | gid_t *data = table->data; | ||
92 | write_seqlock(&sysctl_local_ports.lock); | ||
93 | data[0] = range[0]; | ||
94 | data[1] = range[1]; | ||
95 | write_sequnlock(&sysctl_local_ports.lock); | ||
96 | } | ||
97 | |||
98 | /* Validate changes from /proc interface. */ | ||
99 | static int ipv4_ping_group_range(ctl_table *table, int write, | ||
100 | void __user *buffer, | ||
101 | size_t *lenp, loff_t *ppos) | ||
102 | { | ||
103 | int ret; | ||
104 | gid_t range[2]; | ||
105 | ctl_table tmp = { | ||
106 | .data = &range, | ||
107 | .maxlen = sizeof(range), | ||
108 | .mode = table->mode, | ||
109 | .extra1 = &ip_ping_group_range_min, | ||
110 | .extra2 = &ip_ping_group_range_max, | ||
111 | }; | ||
112 | |||
113 | inet_get_ping_group_range_table(table, range, range + 1); | ||
114 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | ||
115 | |||
116 | if (write && ret == 0) | ||
117 | set_ping_group_range(table, range); | ||
118 | |||
119 | return ret; | ||
120 | } | ||
121 | |||
71 | static int proc_tcp_congestion_control(ctl_table *ctl, int write, | 122 | static int proc_tcp_congestion_control(ctl_table *ctl, int write, |
72 | void __user *buffer, size_t *lenp, loff_t *ppos) | 123 | void __user *buffer, size_t *lenp, loff_t *ppos) |
73 | { | 124 | { |
@@ -677,6 +728,13 @@ static struct ctl_table ipv4_net_table[] = { | |||
677 | .mode = 0644, | 728 | .mode = 0644, |
678 | .proc_handler = proc_dointvec | 729 | .proc_handler = proc_dointvec |
679 | }, | 730 | }, |
731 | { | ||
732 | .procname = "ping_group_range", | ||
733 | .data = &init_net.ipv4.sysctl_ping_group_range, | ||
734 | .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), | ||
735 | .mode = 0644, | ||
736 | .proc_handler = ipv4_ping_group_range, | ||
737 | }, | ||
680 | { } | 738 | { } |
681 | }; | 739 | }; |
682 | 740 | ||
@@ -711,8 +769,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
711 | &net->ipv4.sysctl_icmp_ratemask; | 769 | &net->ipv4.sysctl_icmp_ratemask; |
712 | table[6].data = | 770 | table[6].data = |
713 | &net->ipv4.sysctl_rt_cache_rebuild_count; | 771 | &net->ipv4.sysctl_rt_cache_rebuild_count; |
772 | table[7].data = | ||
773 | &net->ipv4.sysctl_ping_group_range; | ||
774 | |||
714 | } | 775 | } |
715 | 776 | ||
777 | /* | ||
778 | * Sane defaults - nobody may create ping sockets. | ||
779 | * Boot scripts should set this to distro-specific group. | ||
780 | */ | ||
781 | net->ipv4.sysctl_ping_group_range[0] = 1; | ||
782 | net->ipv4.sysctl_ping_group_range[1] = 0; | ||
783 | |||
716 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; | 784 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; |
717 | 785 | ||
718 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | 786 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b22d45010545..054a59d21eb0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -999,7 +999,8 @@ new_segment: | |||
999 | /* We have some space in skb head. Superb! */ | 999 | /* We have some space in skb head. Superb! */ |
1000 | if (copy > skb_tailroom(skb)) | 1000 | if (copy > skb_tailroom(skb)) |
1001 | copy = skb_tailroom(skb); | 1001 | copy = skb_tailroom(skb); |
1002 | if ((err = skb_add_data(skb, from, copy)) != 0) | 1002 | err = skb_add_data_nocache(sk, skb, from, copy); |
1003 | if (err) | ||
1003 | goto do_fault; | 1004 | goto do_fault; |
1004 | } else { | 1005 | } else { |
1005 | int merge = 0; | 1006 | int merge = 0; |
@@ -1042,8 +1043,8 @@ new_segment: | |||
1042 | 1043 | ||
1043 | /* Time to copy data. We are close to | 1044 | /* Time to copy data. We are close to |
1044 | * the end! */ | 1045 | * the end! */ |
1045 | err = skb_copy_to_page(sk, from, skb, page, | 1046 | err = skb_copy_to_page_nocache(sk, from, skb, |
1046 | off, copy); | 1047 | page, off, copy); |
1047 | if (err) { | 1048 | if (err) { |
1048 | /* If this page was new, give it to the | 1049 | /* If this page was new, give it to the |
1049 | * socket so it does not get leaked. | 1050 | * socket so it does not get leaked. |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f7e6c2c2d2bb..3c8d9b6f1ea4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique); | |||
146 | /* This will initiate an outgoing connection. */ | 146 | /* This will initiate an outgoing connection. */ |
147 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 147 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
148 | { | 148 | { |
149 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
149 | struct inet_sock *inet = inet_sk(sk); | 150 | struct inet_sock *inet = inet_sk(sk); |
150 | struct tcp_sock *tp = tcp_sk(sk); | 151 | struct tcp_sock *tp = tcp_sk(sk); |
151 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
152 | __be16 orig_sport, orig_dport; | 152 | __be16 orig_sport, orig_dport; |
153 | struct rtable *rt; | ||
154 | __be32 daddr, nexthop; | 153 | __be32 daddr, nexthop; |
154 | struct flowi4 *fl4; | ||
155 | struct rtable *rt; | ||
155 | int err; | 156 | int err; |
157 | struct ip_options_rcu *inet_opt; | ||
156 | 158 | ||
157 | if (addr_len < sizeof(struct sockaddr_in)) | 159 | if (addr_len < sizeof(struct sockaddr_in)) |
158 | return -EINVAL; | 160 | return -EINVAL; |
@@ -161,15 +163,18 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
161 | return -EAFNOSUPPORT; | 163 | return -EAFNOSUPPORT; |
162 | 164 | ||
163 | nexthop = daddr = usin->sin_addr.s_addr; | 165 | nexthop = daddr = usin->sin_addr.s_addr; |
164 | if (inet->opt && inet->opt->srr) { | 166 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
167 | sock_owned_by_user(sk)); | ||
168 | if (inet_opt && inet_opt->opt.srr) { | ||
165 | if (!daddr) | 169 | if (!daddr) |
166 | return -EINVAL; | 170 | return -EINVAL; |
167 | nexthop = inet->opt->faddr; | 171 | nexthop = inet_opt->opt.faddr; |
168 | } | 172 | } |
169 | 173 | ||
170 | orig_sport = inet->inet_sport; | 174 | orig_sport = inet->inet_sport; |
171 | orig_dport = usin->sin_port; | 175 | orig_dport = usin->sin_port; |
172 | rt = ip_route_connect(nexthop, inet->inet_saddr, | 176 | fl4 = &inet->cork.fl.u.ip4; |
177 | rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, | ||
173 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | 178 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
174 | IPPROTO_TCP, | 179 | IPPROTO_TCP, |
175 | orig_sport, orig_dport, sk, true); | 180 | orig_sport, orig_dport, sk, true); |
@@ -185,11 +190,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
185 | return -ENETUNREACH; | 190 | return -ENETUNREACH; |
186 | } | 191 | } |
187 | 192 | ||
188 | if (!inet->opt || !inet->opt->srr) | 193 | if (!inet_opt || !inet_opt->opt.srr) |
189 | daddr = rt->rt_dst; | 194 | daddr = fl4->daddr; |
190 | 195 | ||
191 | if (!inet->inet_saddr) | 196 | if (!inet->inet_saddr) |
192 | inet->inet_saddr = rt->rt_src; | 197 | inet->inet_saddr = fl4->saddr; |
193 | inet->inet_rcv_saddr = inet->inet_saddr; | 198 | inet->inet_rcv_saddr = inet->inet_saddr; |
194 | 199 | ||
195 | if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { | 200 | if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { |
@@ -200,8 +205,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
200 | } | 205 | } |
201 | 206 | ||
202 | if (tcp_death_row.sysctl_tw_recycle && | 207 | if (tcp_death_row.sysctl_tw_recycle && |
203 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { | 208 | !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { |
204 | struct inet_peer *peer = rt_get_peer(rt); | 209 | struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); |
205 | /* | 210 | /* |
206 | * VJ's idea. We save last timestamp seen from | 211 | * VJ's idea. We save last timestamp seen from |
207 | * the destination in peer table, when entering state | 212 | * the destination in peer table, when entering state |
@@ -221,8 +226,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
221 | inet->inet_daddr = daddr; | 226 | inet->inet_daddr = daddr; |
222 | 227 | ||
223 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 228 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
224 | if (inet->opt) | 229 | if (inet_opt) |
225 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; | 230 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
226 | 231 | ||
227 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; | 232 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; |
228 | 233 | ||
@@ -236,8 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
236 | if (err) | 241 | if (err) |
237 | goto failure; | 242 | goto failure; |
238 | 243 | ||
239 | rt = ip_route_newports(rt, IPPROTO_TCP, | 244 | rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, |
240 | orig_sport, orig_dport, | ||
241 | inet->inet_sport, inet->inet_dport, sk); | 245 | inet->inet_sport, inet->inet_dport, sk); |
242 | if (IS_ERR(rt)) { | 246 | if (IS_ERR(rt)) { |
243 | err = PTR_ERR(rt); | 247 | err = PTR_ERR(rt); |
@@ -279,7 +283,7 @@ EXPORT_SYMBOL(tcp_v4_connect); | |||
279 | /* | 283 | /* |
280 | * This routine does path mtu discovery as defined in RFC1191. | 284 | * This routine does path mtu discovery as defined in RFC1191. |
281 | */ | 285 | */ |
282 | static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) | 286 | static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) |
283 | { | 287 | { |
284 | struct dst_entry *dst; | 288 | struct dst_entry *dst; |
285 | struct inet_sock *inet = inet_sk(sk); | 289 | struct inet_sock *inet = inet_sk(sk); |
@@ -341,7 +345,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) | |||
341 | 345 | ||
342 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | 346 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) |
343 | { | 347 | { |
344 | struct iphdr *iph = (struct iphdr *)icmp_skb->data; | 348 | const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; |
345 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); | 349 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); |
346 | struct inet_connection_sock *icsk; | 350 | struct inet_connection_sock *icsk; |
347 | struct tcp_sock *tp; | 351 | struct tcp_sock *tp; |
@@ -647,7 +651,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
647 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; | 651 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; |
648 | 652 | ||
649 | net = dev_net(skb_dst(skb)->dev); | 653 | net = dev_net(skb_dst(skb)->dev); |
650 | ip_send_reply(net->ipv4.tcp_sock, skb, | 654 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
651 | &arg, arg.iov[0].iov_len); | 655 | &arg, arg.iov[0].iov_len); |
652 | 656 | ||
653 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 657 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
@@ -722,7 +726,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
722 | if (oif) | 726 | if (oif) |
723 | arg.bound_dev_if = oif; | 727 | arg.bound_dev_if = oif; |
724 | 728 | ||
725 | ip_send_reply(net->ipv4.tcp_sock, skb, | 729 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
726 | &arg, arg.iov[0].iov_len); | 730 | &arg, arg.iov[0].iov_len); |
727 | 731 | ||
728 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 732 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
@@ -765,11 +769,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
765 | struct request_values *rvp) | 769 | struct request_values *rvp) |
766 | { | 770 | { |
767 | const struct inet_request_sock *ireq = inet_rsk(req); | 771 | const struct inet_request_sock *ireq = inet_rsk(req); |
772 | struct flowi4 fl4; | ||
768 | int err = -1; | 773 | int err = -1; |
769 | struct sk_buff * skb; | 774 | struct sk_buff * skb; |
770 | 775 | ||
771 | /* First, grab a route. */ | 776 | /* First, grab a route. */ |
772 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | 777 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
773 | return -1; | 778 | return -1; |
774 | 779 | ||
775 | skb = tcp_make_synack(sk, dst, req, rvp); | 780 | skb = tcp_make_synack(sk, dst, req, rvp); |
@@ -820,17 +825,18 @@ static void syn_flood_warning(const struct sk_buff *skb) | |||
820 | /* | 825 | /* |
821 | * Save and compile IPv4 options into the request_sock if needed. | 826 | * Save and compile IPv4 options into the request_sock if needed. |
822 | */ | 827 | */ |
823 | static struct ip_options *tcp_v4_save_options(struct sock *sk, | 828 | static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, |
824 | struct sk_buff *skb) | 829 | struct sk_buff *skb) |
825 | { | 830 | { |
826 | struct ip_options *opt = &(IPCB(skb)->opt); | 831 | const struct ip_options *opt = &(IPCB(skb)->opt); |
827 | struct ip_options *dopt = NULL; | 832 | struct ip_options_rcu *dopt = NULL; |
828 | 833 | ||
829 | if (opt && opt->optlen) { | 834 | if (opt && opt->optlen) { |
830 | int opt_size = optlength(opt); | 835 | int opt_size = sizeof(*dopt) + opt->optlen; |
836 | |||
831 | dopt = kmalloc(opt_size, GFP_ATOMIC); | 837 | dopt = kmalloc(opt_size, GFP_ATOMIC); |
832 | if (dopt) { | 838 | if (dopt) { |
833 | if (ip_options_echo(dopt, skb)) { | 839 | if (ip_options_echo(&dopt->opt, skb)) { |
834 | kfree(dopt); | 840 | kfree(dopt); |
835 | dopt = NULL; | 841 | dopt = NULL; |
836 | } | 842 | } |
@@ -1333,6 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1333 | req->cookie_ts = tmp_opt.tstamp_ok; | 1339 | req->cookie_ts = tmp_opt.tstamp_ok; |
1334 | } else if (!isn) { | 1340 | } else if (!isn) { |
1335 | struct inet_peer *peer = NULL; | 1341 | struct inet_peer *peer = NULL; |
1342 | struct flowi4 fl4; | ||
1336 | 1343 | ||
1337 | /* VJ's idea. We save last timestamp seen | 1344 | /* VJ's idea. We save last timestamp seen |
1338 | * from the destination in peer table, when entering | 1345 | * from the destination in peer table, when entering |
@@ -1345,9 +1352,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1345 | */ | 1352 | */ |
1346 | if (tmp_opt.saw_tstamp && | 1353 | if (tmp_opt.saw_tstamp && |
1347 | tcp_death_row.sysctl_tw_recycle && | 1354 | tcp_death_row.sysctl_tw_recycle && |
1348 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1355 | (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && |
1349 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1356 | fl4.daddr == saddr && |
1350 | peer->daddr.addr.a4 == saddr) { | 1357 | (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { |
1351 | inet_peer_refcheck(peer); | 1358 | inet_peer_refcheck(peer); |
1352 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1359 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1353 | (s32)(peer->tcp_ts - req->ts_recent) > | 1360 | (s32)(peer->tcp_ts - req->ts_recent) > |
@@ -1411,19 +1418,16 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1411 | #ifdef CONFIG_TCP_MD5SIG | 1418 | #ifdef CONFIG_TCP_MD5SIG |
1412 | struct tcp_md5sig_key *key; | 1419 | struct tcp_md5sig_key *key; |
1413 | #endif | 1420 | #endif |
1421 | struct ip_options_rcu *inet_opt; | ||
1414 | 1422 | ||
1415 | if (sk_acceptq_is_full(sk)) | 1423 | if (sk_acceptq_is_full(sk)) |
1416 | goto exit_overflow; | 1424 | goto exit_overflow; |
1417 | 1425 | ||
1418 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
1419 | goto exit; | ||
1420 | |||
1421 | newsk = tcp_create_openreq_child(sk, req, skb); | 1426 | newsk = tcp_create_openreq_child(sk, req, skb); |
1422 | if (!newsk) | 1427 | if (!newsk) |
1423 | goto exit_nonewsk; | 1428 | goto exit_nonewsk; |
1424 | 1429 | ||
1425 | newsk->sk_gso_type = SKB_GSO_TCPV4; | 1430 | newsk->sk_gso_type = SKB_GSO_TCPV4; |
1426 | sk_setup_caps(newsk, dst); | ||
1427 | 1431 | ||
1428 | newtp = tcp_sk(newsk); | 1432 | newtp = tcp_sk(newsk); |
1429 | newinet = inet_sk(newsk); | 1433 | newinet = inet_sk(newsk); |
@@ -1431,15 +1435,21 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1431 | newinet->inet_daddr = ireq->rmt_addr; | 1435 | newinet->inet_daddr = ireq->rmt_addr; |
1432 | newinet->inet_rcv_saddr = ireq->loc_addr; | 1436 | newinet->inet_rcv_saddr = ireq->loc_addr; |
1433 | newinet->inet_saddr = ireq->loc_addr; | 1437 | newinet->inet_saddr = ireq->loc_addr; |
1434 | newinet->opt = ireq->opt; | 1438 | inet_opt = ireq->opt; |
1439 | rcu_assign_pointer(newinet->inet_opt, inet_opt); | ||
1435 | ireq->opt = NULL; | 1440 | ireq->opt = NULL; |
1436 | newinet->mc_index = inet_iif(skb); | 1441 | newinet->mc_index = inet_iif(skb); |
1437 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 1442 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1438 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1443 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1439 | if (newinet->opt) | 1444 | if (inet_opt) |
1440 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; | 1445 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
1441 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1446 | newinet->inet_id = newtp->write_seq ^ jiffies; |
1442 | 1447 | ||
1448 | if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) | ||
1449 | goto put_and_exit; | ||
1450 | |||
1451 | sk_setup_caps(newsk, dst); | ||
1452 | |||
1443 | tcp_mtup_init(newsk); | 1453 | tcp_mtup_init(newsk); |
1444 | tcp_sync_mss(newsk, dst_mtu(dst)); | 1454 | tcp_sync_mss(newsk, dst_mtu(dst)); |
1445 | newtp->advmss = dst_metric_advmss(dst); | 1455 | newtp->advmss = dst_metric_advmss(dst); |
@@ -1467,10 +1477,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1467 | } | 1477 | } |
1468 | #endif | 1478 | #endif |
1469 | 1479 | ||
1470 | if (__inet_inherit_port(sk, newsk) < 0) { | 1480 | if (__inet_inherit_port(sk, newsk) < 0) |
1471 | sock_put(newsk); | 1481 | goto put_and_exit; |
1472 | goto exit; | ||
1473 | } | ||
1474 | __inet_hash_nolisten(newsk, NULL); | 1482 | __inet_hash_nolisten(newsk, NULL); |
1475 | 1483 | ||
1476 | return newsk; | 1484 | return newsk; |
@@ -1482,6 +1490,9 @@ exit_nonewsk: | |||
1482 | exit: | 1490 | exit: |
1483 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 1491 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
1484 | return NULL; | 1492 | return NULL; |
1493 | put_and_exit: | ||
1494 | sock_put(newsk); | ||
1495 | goto exit; | ||
1485 | } | 1496 | } |
1486 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | 1497 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |
1487 | 1498 | ||
@@ -1764,12 +1775,13 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) | |||
1764 | struct inet_sock *inet = inet_sk(sk); | 1775 | struct inet_sock *inet = inet_sk(sk); |
1765 | struct inet_peer *peer; | 1776 | struct inet_peer *peer; |
1766 | 1777 | ||
1767 | if (!rt || rt->rt_dst != inet->inet_daddr) { | 1778 | if (!rt || |
1779 | inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { | ||
1768 | peer = inet_getpeer_v4(inet->inet_daddr, 1); | 1780 | peer = inet_getpeer_v4(inet->inet_daddr, 1); |
1769 | *release_it = true; | 1781 | *release_it = true; |
1770 | } else { | 1782 | } else { |
1771 | if (!rt->peer) | 1783 | if (!rt->peer) |
1772 | rt_bind_peer(rt, 1); | 1784 | rt_bind_peer(rt, inet->inet_daddr, 1); |
1773 | peer = rt->peer; | 1785 | peer = rt->peer; |
1774 | *release_it = false; | 1786 | *release_it = false; |
1775 | } | 1787 | } |
@@ -2527,7 +2539,7 @@ void tcp4_proc_exit(void) | |||
2527 | 2539 | ||
2528 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | 2540 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) |
2529 | { | 2541 | { |
2530 | struct iphdr *iph = skb_gro_network_header(skb); | 2542 | const struct iphdr *iph = skb_gro_network_header(skb); |
2531 | 2543 | ||
2532 | switch (skb->ip_summed) { | 2544 | switch (skb->ip_summed) { |
2533 | case CHECKSUM_COMPLETE: | 2545 | case CHECKSUM_COMPLETE: |
@@ -2548,7 +2560,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2548 | 2560 | ||
2549 | int tcp4_gro_complete(struct sk_buff *skb) | 2561 | int tcp4_gro_complete(struct sk_buff *skb) |
2550 | { | 2562 | { |
2551 | struct iphdr *iph = ip_hdr(skb); | 2563 | const struct iphdr *iph = ip_hdr(skb); |
2552 | struct tcphdr *th = tcp_hdr(skb); | 2564 | struct tcphdr *th = tcp_hdr(skb); |
2553 | 2565 | ||
2554 | th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), | 2566 | th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 17388c7f49c4..882e0b0964d0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -899,7 +899,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
899 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, | 899 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, |
900 | tcp_skb_pcount(skb)); | 900 | tcp_skb_pcount(skb)); |
901 | 901 | ||
902 | err = icsk->icsk_af_ops->queue_xmit(skb); | 902 | err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); |
903 | if (likely(err <= 0)) | 903 | if (likely(err <= 0)) |
904 | return err; | 904 | return err; |
905 | 905 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f87a8eb76f3b..599374f65c76 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -578,7 +578,7 @@ found: | |||
578 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | 578 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
579 | { | 579 | { |
580 | struct inet_sock *inet; | 580 | struct inet_sock *inet; |
581 | struct iphdr *iph = (struct iphdr *)skb->data; | 581 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
582 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); | 582 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); |
583 | const int type = icmp_hdr(skb)->type; | 583 | const int type = icmp_hdr(skb)->type; |
584 | const int code = icmp_hdr(skb)->code; | 584 | const int code = icmp_hdr(skb)->code; |
@@ -706,12 +706,11 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) | |||
706 | } | 706 | } |
707 | } | 707 | } |
708 | 708 | ||
709 | static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) | 709 | static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) |
710 | { | 710 | { |
711 | struct sock *sk = skb->sk; | 711 | struct sock *sk = skb->sk; |
712 | struct inet_sock *inet = inet_sk(sk); | 712 | struct inet_sock *inet = inet_sk(sk); |
713 | struct udphdr *uh; | 713 | struct udphdr *uh; |
714 | struct rtable *rt = (struct rtable *)skb_dst(skb); | ||
715 | int err = 0; | 714 | int err = 0; |
716 | int is_udplite = IS_UDPLITE(sk); | 715 | int is_udplite = IS_UDPLITE(sk); |
717 | int offset = skb_transport_offset(skb); | 716 | int offset = skb_transport_offset(skb); |
@@ -723,7 +722,7 @@ static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) | |||
723 | */ | 722 | */ |
724 | uh = udp_hdr(skb); | 723 | uh = udp_hdr(skb); |
725 | uh->source = inet->inet_sport; | 724 | uh->source = inet->inet_sport; |
726 | uh->dest = dport; | 725 | uh->dest = fl4->fl4_dport; |
727 | uh->len = htons(len); | 726 | uh->len = htons(len); |
728 | uh->check = 0; | 727 | uh->check = 0; |
729 | 728 | ||
@@ -737,14 +736,14 @@ static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) | |||
737 | 736 | ||
738 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 737 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
739 | 738 | ||
740 | udp4_hwcsum(skb, rt->rt_src, daddr); | 739 | udp4_hwcsum(skb, fl4->saddr, fl4->daddr); |
741 | goto send; | 740 | goto send; |
742 | 741 | ||
743 | } else | 742 | } else |
744 | csum = udp_csum(skb); | 743 | csum = udp_csum(skb); |
745 | 744 | ||
746 | /* add protocol-dependent pseudo-header */ | 745 | /* add protocol-dependent pseudo-header */ |
747 | uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, | 746 | uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, |
748 | sk->sk_protocol, csum); | 747 | sk->sk_protocol, csum); |
749 | if (uh->check == 0) | 748 | if (uh->check == 0) |
750 | uh->check = CSUM_MANGLED_0; | 749 | uh->check = CSUM_MANGLED_0; |
@@ -774,11 +773,11 @@ static int udp_push_pending_frames(struct sock *sk) | |||
774 | struct sk_buff *skb; | 773 | struct sk_buff *skb; |
775 | int err = 0; | 774 | int err = 0; |
776 | 775 | ||
777 | skb = ip_finish_skb(sk); | 776 | skb = ip_finish_skb(sk, fl4); |
778 | if (!skb) | 777 | if (!skb) |
779 | goto out; | 778 | goto out; |
780 | 779 | ||
781 | err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); | 780 | err = udp_send_skb(skb, fl4); |
782 | 781 | ||
783 | out: | 782 | out: |
784 | up->len = 0; | 783 | up->len = 0; |
@@ -791,6 +790,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
791 | { | 790 | { |
792 | struct inet_sock *inet = inet_sk(sk); | 791 | struct inet_sock *inet = inet_sk(sk); |
793 | struct udp_sock *up = udp_sk(sk); | 792 | struct udp_sock *up = udp_sk(sk); |
793 | struct flowi4 fl4_stack; | ||
794 | struct flowi4 *fl4; | 794 | struct flowi4 *fl4; |
795 | int ulen = len; | 795 | int ulen = len; |
796 | struct ipcm_cookie ipc; | 796 | struct ipcm_cookie ipc; |
@@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
804 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 804 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
805 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 805 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
806 | struct sk_buff *skb; | 806 | struct sk_buff *skb; |
807 | struct ip_options_data opt_copy; | ||
807 | 808 | ||
808 | if (len > 0xFFFF) | 809 | if (len > 0xFFFF) |
809 | return -EMSGSIZE; | 810 | return -EMSGSIZE; |
@@ -820,6 +821,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
820 | 821 | ||
821 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | 822 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
822 | 823 | ||
824 | fl4 = &inet->cork.fl.u.ip4; | ||
823 | if (up->pending) { | 825 | if (up->pending) { |
824 | /* | 826 | /* |
825 | * There are pending frames. | 827 | * There are pending frames. |
@@ -877,22 +879,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
877 | free = 1; | 879 | free = 1; |
878 | connected = 0; | 880 | connected = 0; |
879 | } | 881 | } |
880 | if (!ipc.opt) | 882 | if (!ipc.opt) { |
881 | ipc.opt = inet->opt; | 883 | struct ip_options_rcu *inet_opt; |
884 | |||
885 | rcu_read_lock(); | ||
886 | inet_opt = rcu_dereference(inet->inet_opt); | ||
887 | if (inet_opt) { | ||
888 | memcpy(&opt_copy, inet_opt, | ||
889 | sizeof(*inet_opt) + inet_opt->opt.optlen); | ||
890 | ipc.opt = &opt_copy.opt; | ||
891 | } | ||
892 | rcu_read_unlock(); | ||
893 | } | ||
882 | 894 | ||
883 | saddr = ipc.addr; | 895 | saddr = ipc.addr; |
884 | ipc.addr = faddr = daddr; | 896 | ipc.addr = faddr = daddr; |
885 | 897 | ||
886 | if (ipc.opt && ipc.opt->srr) { | 898 | if (ipc.opt && ipc.opt->opt.srr) { |
887 | if (!daddr) | 899 | if (!daddr) |
888 | return -EINVAL; | 900 | return -EINVAL; |
889 | faddr = ipc.opt->faddr; | 901 | faddr = ipc.opt->opt.faddr; |
890 | connected = 0; | 902 | connected = 0; |
891 | } | 903 | } |
892 | tos = RT_TOS(inet->tos); | 904 | tos = RT_TOS(inet->tos); |
893 | if (sock_flag(sk, SOCK_LOCALROUTE) || | 905 | if (sock_flag(sk, SOCK_LOCALROUTE) || |
894 | (msg->msg_flags & MSG_DONTROUTE) || | 906 | (msg->msg_flags & MSG_DONTROUTE) || |
895 | (ipc.opt && ipc.opt->is_strictroute)) { | 907 | (ipc.opt && ipc.opt->opt.is_strictroute)) { |
896 | tos |= RTO_ONLINK; | 908 | tos |= RTO_ONLINK; |
897 | connected = 0; | 909 | connected = 0; |
898 | } | 910 | } |
@@ -909,22 +921,16 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
909 | rt = (struct rtable *)sk_dst_check(sk, 0); | 921 | rt = (struct rtable *)sk_dst_check(sk, 0); |
910 | 922 | ||
911 | if (rt == NULL) { | 923 | if (rt == NULL) { |
912 | struct flowi4 fl4 = { | ||
913 | .flowi4_oif = ipc.oif, | ||
914 | .flowi4_mark = sk->sk_mark, | ||
915 | .daddr = faddr, | ||
916 | .saddr = saddr, | ||
917 | .flowi4_tos = tos, | ||
918 | .flowi4_proto = sk->sk_protocol, | ||
919 | .flowi4_flags = (inet_sk_flowi_flags(sk) | | ||
920 | FLOWI_FLAG_CAN_SLEEP), | ||
921 | .fl4_sport = inet->inet_sport, | ||
922 | .fl4_dport = dport, | ||
923 | }; | ||
924 | struct net *net = sock_net(sk); | 924 | struct net *net = sock_net(sk); |
925 | 925 | ||
926 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); | 926 | fl4 = &fl4_stack; |
927 | rt = ip_route_output_flow(net, &fl4, sk); | 927 | flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, |
928 | RT_SCOPE_UNIVERSE, sk->sk_protocol, | ||
929 | inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, | ||
930 | faddr, saddr, dport, inet->inet_sport); | ||
931 | |||
932 | security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); | ||
933 | rt = ip_route_output_flow(net, fl4, sk); | ||
928 | if (IS_ERR(rt)) { | 934 | if (IS_ERR(rt)) { |
929 | err = PTR_ERR(rt); | 935 | err = PTR_ERR(rt); |
930 | rt = NULL; | 936 | rt = NULL; |
@@ -945,18 +951,18 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
945 | goto do_confirm; | 951 | goto do_confirm; |
946 | back_from_confirm: | 952 | back_from_confirm: |
947 | 953 | ||
948 | saddr = rt->rt_src; | 954 | saddr = fl4->saddr; |
949 | if (!ipc.addr) | 955 | if (!ipc.addr) |
950 | daddr = ipc.addr = rt->rt_dst; | 956 | daddr = ipc.addr = fl4->daddr; |
951 | 957 | ||
952 | /* Lockless fast path for the non-corking case. */ | 958 | /* Lockless fast path for the non-corking case. */ |
953 | if (!corkreq) { | 959 | if (!corkreq) { |
954 | skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, | 960 | skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen, |
955 | sizeof(struct udphdr), &ipc, &rt, | 961 | sizeof(struct udphdr), &ipc, &rt, |
956 | msg->msg_flags); | 962 | msg->msg_flags); |
957 | err = PTR_ERR(skb); | 963 | err = PTR_ERR(skb); |
958 | if (skb && !IS_ERR(skb)) | 964 | if (skb && !IS_ERR(skb)) |
959 | err = udp_send_skb(skb, daddr, dport); | 965 | err = udp_send_skb(skb, fl4); |
960 | goto out; | 966 | goto out; |
961 | } | 967 | } |
962 | 968 | ||
@@ -982,9 +988,9 @@ back_from_confirm: | |||
982 | 988 | ||
983 | do_append_data: | 989 | do_append_data: |
984 | up->len += ulen; | 990 | up->len += ulen; |
985 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | 991 | err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen, |
986 | sizeof(struct udphdr), &ipc, &rt, | 992 | sizeof(struct udphdr), &ipc, &rt, |
987 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 993 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
988 | if (err) | 994 | if (err) |
989 | udp_flush_pending_frames(sk); | 995 | udp_flush_pending_frames(sk); |
990 | else if (!corkreq) | 996 | else if (!corkreq) |
@@ -1024,6 +1030,7 @@ EXPORT_SYMBOL(udp_sendmsg); | |||
1024 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | 1030 | int udp_sendpage(struct sock *sk, struct page *page, int offset, |
1025 | size_t size, int flags) | 1031 | size_t size, int flags) |
1026 | { | 1032 | { |
1033 | struct inet_sock *inet = inet_sk(sk); | ||
1027 | struct udp_sock *up = udp_sk(sk); | 1034 | struct udp_sock *up = udp_sk(sk); |
1028 | int ret; | 1035 | int ret; |
1029 | 1036 | ||
@@ -1048,7 +1055,8 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, | |||
1048 | return -EINVAL; | 1055 | return -EINVAL; |
1049 | } | 1056 | } |
1050 | 1057 | ||
1051 | ret = ip_append_page(sk, page, offset, size, flags); | 1058 | ret = ip_append_page(sk, &inet->cork.fl.u.ip4, |
1059 | page, offset, size, flags); | ||
1052 | if (ret == -EOPNOTSUPP) { | 1060 | if (ret == -EOPNOTSUPP) { |
1053 | release_sock(sk); | 1061 | release_sock(sk); |
1054 | return sock_no_sendpage(sk->sk_socket, page, offset, | 1062 | return sock_no_sendpage(sk->sk_socket, page, offset, |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index d20a05e970d8..981e43eaf704 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -18,38 +18,46 @@ | |||
18 | 18 | ||
19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; | 19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
20 | 20 | ||
21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | 21 | static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, |
22 | const xfrm_address_t *saddr, | 22 | int tos, |
23 | const xfrm_address_t *daddr) | 23 | const xfrm_address_t *saddr, |
24 | const xfrm_address_t *daddr) | ||
24 | { | 25 | { |
25 | struct flowi4 fl4 = { | ||
26 | .daddr = daddr->a4, | ||
27 | .flowi4_tos = tos, | ||
28 | }; | ||
29 | struct rtable *rt; | 26 | struct rtable *rt; |
30 | 27 | ||
28 | memset(fl4, 0, sizeof(*fl4)); | ||
29 | fl4->daddr = daddr->a4; | ||
30 | fl4->flowi4_tos = tos; | ||
31 | if (saddr) | 31 | if (saddr) |
32 | fl4.saddr = saddr->a4; | 32 | fl4->saddr = saddr->a4; |
33 | 33 | ||
34 | rt = __ip_route_output_key(net, &fl4); | 34 | rt = __ip_route_output_key(net, fl4); |
35 | if (!IS_ERR(rt)) | 35 | if (!IS_ERR(rt)) |
36 | return &rt->dst; | 36 | return &rt->dst; |
37 | 37 | ||
38 | return ERR_CAST(rt); | 38 | return ERR_CAST(rt); |
39 | } | 39 | } |
40 | 40 | ||
41 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | ||
42 | const xfrm_address_t *saddr, | ||
43 | const xfrm_address_t *daddr) | ||
44 | { | ||
45 | struct flowi4 fl4; | ||
46 | |||
47 | return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr); | ||
48 | } | ||
49 | |||
41 | static int xfrm4_get_saddr(struct net *net, | 50 | static int xfrm4_get_saddr(struct net *net, |
42 | xfrm_address_t *saddr, xfrm_address_t *daddr) | 51 | xfrm_address_t *saddr, xfrm_address_t *daddr) |
43 | { | 52 | { |
44 | struct dst_entry *dst; | 53 | struct dst_entry *dst; |
45 | struct rtable *rt; | 54 | struct flowi4 fl4; |
46 | 55 | ||
47 | dst = xfrm4_dst_lookup(net, 0, NULL, daddr); | 56 | dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr); |
48 | if (IS_ERR(dst)) | 57 | if (IS_ERR(dst)) |
49 | return -EHOSTUNREACH; | 58 | return -EHOSTUNREACH; |
50 | 59 | ||
51 | rt = (struct rtable *)dst; | 60 | saddr->a4 = fl4.saddr; |
52 | saddr->a4 = rt->rt_src; | ||
53 | dst_release(dst); | 61 | dst_release(dst); |
54 | return 0; | 62 | return 0; |
55 | } | 63 | } |
@@ -73,7 +81,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
73 | 81 | ||
74 | rt->rt_key_dst = fl4->daddr; | 82 | rt->rt_key_dst = fl4->daddr; |
75 | rt->rt_key_src = fl4->saddr; | 83 | rt->rt_key_src = fl4->saddr; |
76 | rt->rt_tos = fl4->flowi4_tos; | 84 | rt->rt_key_tos = fl4->flowi4_tos; |
77 | rt->rt_route_iif = fl4->flowi4_iif; | 85 | rt->rt_route_iif = fl4->flowi4_iif; |
78 | rt->rt_iif = fl4->flowi4_iif; | 86 | rt->rt_iif = fl4->flowi4_iif; |
79 | rt->rt_oif = fl4->flowi4_oif; | 87 | rt->rt_oif = fl4->flowi4_oif; |
@@ -102,7 +110,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
102 | static void | 110 | static void |
103 | _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | 111 | _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) |
104 | { | 112 | { |
105 | struct iphdr *iph = ip_hdr(skb); | 113 | const struct iphdr *iph = ip_hdr(skb); |
106 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; | 114 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
107 | struct flowi4 *fl4 = &fl->u.ip4; | 115 | struct flowi4 *fl4 = &fl->u.ip4; |
108 | 116 | ||
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 805d63ef4340..d9ac0a0058b5 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
@@ -55,7 +55,7 @@ xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, | |||
55 | 55 | ||
56 | int xfrm4_extract_header(struct sk_buff *skb) | 56 | int xfrm4_extract_header(struct sk_buff *skb) |
57 | { | 57 | { |
58 | struct iphdr *iph = ip_hdr(skb); | 58 | const struct iphdr *iph = ip_hdr(skb); |
59 | 59 | ||
60 | XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); | 60 | XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); |
61 | XFRM_MODE_SKB_CB(skb)->id = iph->id; | 61 | XFRM_MODE_SKB_CB(skb)->id = iph->id; |