aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-20 16:43:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-20 16:43:21 -0400
commit06f4e926d256d902dd9a53dcb400fd74974ce087 (patch)
tree0b438b67f5f0eff6fd617bc497a9dace6164a488 /net/ipv4
parent8e7bfcbab3825d1b404d615cb1b54f44ff81f981 (diff)
parentd93515611bbc70c2fe4db232e5feb448ed8e4cc9 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1446 commits) macvlan: fix panic if lowerdev in a bond tg3: Add braces around 5906 workaround. tg3: Fix NETIF_F_LOOPBACK error macvlan: remove one synchronize_rcu() call networking: NET_CLS_ROUTE4 depends on INET irda: Fix error propagation in ircomm_lmp_connect_response() irda: Kill set but unused variable 'bytes' in irlan_check_command_param() irda: Kill set but unused variable 'clen' in ircomm_connect_indication() rxrpc: Fix set but unused variable 'usage' in rxrpc_get_transport() be2net: Kill set but unused variable 'req' in lancer_fw_download() irda: Kill set but unused vars 'saddr' and 'daddr' in irlan_provider_connect_indication() atl1c: atl1c_resume() is only used when CONFIG_PM_SLEEP is defined. rxrpc: Fix set but unused variable 'usage' in rxrpc_get_peer(). rxrpc: Kill set but unused variable 'local' in rxrpc_UDP_error_handler() rxrpc: Kill set but unused variable 'sp' in rxrpc_process_connection() rxrpc: Kill set but unused variable 'sp' in rxrpc_rotate_tx_window() pkt_sched: Kill set but unused variable 'protocol' in tc_classify() isdn: capi: Use pr_debug() instead of ifdefs. tg3: Update version to 3.119 tg3: Apply rx_discards fix to 5719/5720 ... Fix up trivial conflicts in arch/x86/Kconfig and net/mac80211/agg-tx.c as per Davem.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c53
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/cipso_ipv4.c113
-rw-r--r--net/ipv4/datagram.c22
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/icmp.c133
-rw-r--r--net/ipv4/igmp.c22
-rw-r--r--net/ipv4/inet_connection_sock.c59
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_lro.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c58
-rw-r--r--net/ipv4/ip_gre.c70
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_options.c57
-rw-r--r--net/ipv4/ip_output.c158
-rw-r--r--net/ipv4/ip_sockglue.c37
-rw-r--r--net/ipv4/ipcomp.c4
-rw-r--r--net/ipv4/ipconfig.c35
-rw-r--r--net/ipv4/ipip.c36
-rw-r--r--net/ipv4/ipmr.c39
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/ip_tables.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/ping.c935
-rw-r--r--net/ipv4/raw.c92
-rw-r--r--net/ipv4/route.c385
-rw-r--r--net/ipv4/syncookies.c22
-rw-r--r--net/ipv4/sysctl_net_ipv4.c68
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_ipv4.c98
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c78
-rw-r--r--net/ipv4/xfrm4_policy.c38
-rw-r--r--net/ipv4/xfrm4_state.c2
39 files changed, 1999 insertions, 830 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 0dc772d0d125..f2dc69cffb57 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 17obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 807d83c02ef6..cc1463156cd0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -105,6 +105,7 @@
105#include <net/tcp.h> 105#include <net/tcp.h>
106#include <net/udp.h> 106#include <net/udp.h>
107#include <net/udplite.h> 107#include <net/udplite.h>
108#include <net/ping.h>
108#include <linux/skbuff.h> 109#include <linux/skbuff.h>
109#include <net/sock.h> 110#include <net/sock.h>
110#include <net/raw.h> 111#include <net/raw.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
153 WARN_ON(sk->sk_wmem_queued); 154 WARN_ON(sk->sk_wmem_queued);
154 WARN_ON(sk->sk_forward_alloc); 155 WARN_ON(sk->sk_forward_alloc);
155 156
156 kfree(inet->opt); 157 kfree(rcu_dereference_protected(inet->inet_opt, 1));
157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); 158 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 159 sk_refcnt_debug_dec(sk);
159} 160}
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
1008 .flags = INET_PROTOSW_PERMANENT, 1009 .flags = INET_PROTOSW_PERMANENT,
1009 }, 1010 },
1010 1011
1012 {
1013 .type = SOCK_DGRAM,
1014 .protocol = IPPROTO_ICMP,
1015 .prot = &ping_prot,
1016 .ops = &inet_dgram_ops,
1017 .no_check = UDP_CSUM_DEFAULT,
1018 .flags = INET_PROTOSW_REUSE,
1019 },
1011 1020
1012 { 1021 {
1013 .type = SOCK_RAW, 1022 .type = SOCK_RAW,
@@ -1103,14 +1112,19 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1103 struct inet_sock *inet = inet_sk(sk); 1112 struct inet_sock *inet = inet_sk(sk);
1104 __be32 old_saddr = inet->inet_saddr; 1113 __be32 old_saddr = inet->inet_saddr;
1105 __be32 daddr = inet->inet_daddr; 1114 __be32 daddr = inet->inet_daddr;
1115 struct flowi4 *fl4;
1106 struct rtable *rt; 1116 struct rtable *rt;
1107 __be32 new_saddr; 1117 __be32 new_saddr;
1118 struct ip_options_rcu *inet_opt;
1108 1119
1109 if (inet->opt && inet->opt->srr) 1120 inet_opt = rcu_dereference_protected(inet->inet_opt,
1110 daddr = inet->opt->faddr; 1121 sock_owned_by_user(sk));
1122 if (inet_opt && inet_opt->opt.srr)
1123 daddr = inet_opt->opt.faddr;
1111 1124
1112 /* Query new route. */ 1125 /* Query new route. */
1113 rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), 1126 fl4 = &inet->cork.fl.u.ip4;
1127 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
1114 sk->sk_bound_dev_if, sk->sk_protocol, 1128 sk->sk_bound_dev_if, sk->sk_protocol,
1115 inet->inet_sport, inet->inet_dport, sk, false); 1129 inet->inet_sport, inet->inet_dport, sk, false);
1116 if (IS_ERR(rt)) 1130 if (IS_ERR(rt))
@@ -1118,7 +1132,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1118 1132
1119 sk_setup_caps(sk, &rt->dst); 1133 sk_setup_caps(sk, &rt->dst);
1120 1134
1121 new_saddr = rt->rt_src; 1135 new_saddr = fl4->saddr;
1122 1136
1123 if (new_saddr == old_saddr) 1137 if (new_saddr == old_saddr)
1124 return 0; 1138 return 0;
@@ -1147,6 +1161,8 @@ int inet_sk_rebuild_header(struct sock *sk)
1147 struct inet_sock *inet = inet_sk(sk); 1161 struct inet_sock *inet = inet_sk(sk);
1148 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); 1162 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1149 __be32 daddr; 1163 __be32 daddr;
1164 struct ip_options_rcu *inet_opt;
1165 struct flowi4 *fl4;
1150 int err; 1166 int err;
1151 1167
1152 /* Route is OK, nothing to do. */ 1168 /* Route is OK, nothing to do. */
@@ -1154,10 +1170,14 @@ int inet_sk_rebuild_header(struct sock *sk)
1154 return 0; 1170 return 0;
1155 1171
1156 /* Reroute. */ 1172 /* Reroute. */
1173 rcu_read_lock();
1174 inet_opt = rcu_dereference(inet->inet_opt);
1157 daddr = inet->inet_daddr; 1175 daddr = inet->inet_daddr;
1158 if (inet->opt && inet->opt->srr) 1176 if (inet_opt && inet_opt->opt.srr)
1159 daddr = inet->opt->faddr; 1177 daddr = inet_opt->opt.faddr;
1160 rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, 1178 rcu_read_unlock();
1179 fl4 = &inet->cork.fl.u.ip4;
1180 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
1161 inet->inet_dport, inet->inet_sport, 1181 inet->inet_dport, inet->inet_sport,
1162 sk->sk_protocol, RT_CONN_FLAGS(sk), 1182 sk->sk_protocol, RT_CONN_FLAGS(sk),
1163 sk->sk_bound_dev_if); 1183 sk->sk_bound_dev_if);
@@ -1186,7 +1206,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1186 1206
1187static int inet_gso_send_check(struct sk_buff *skb) 1207static int inet_gso_send_check(struct sk_buff *skb)
1188{ 1208{
1189 struct iphdr *iph; 1209 const struct iphdr *iph;
1190 const struct net_protocol *ops; 1210 const struct net_protocol *ops;
1191 int proto; 1211 int proto;
1192 int ihl; 1212 int ihl;
@@ -1293,7 +1313,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1293 const struct net_protocol *ops; 1313 const struct net_protocol *ops;
1294 struct sk_buff **pp = NULL; 1314 struct sk_buff **pp = NULL;
1295 struct sk_buff *p; 1315 struct sk_buff *p;
1296 struct iphdr *iph; 1316 const struct iphdr *iph;
1297 unsigned int hlen; 1317 unsigned int hlen;
1298 unsigned int off; 1318 unsigned int off;
1299 unsigned int id; 1319 unsigned int id;
@@ -1516,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
1516 1536
1517static const struct net_protocol icmp_protocol = { 1537static const struct net_protocol icmp_protocol = {
1518 .handler = icmp_rcv, 1538 .handler = icmp_rcv,
1539 .err_handler = ping_err,
1519 .no_policy = 1, 1540 .no_policy = 1,
1520 .netns_ok = 1, 1541 .netns_ok = 1,
1521}; 1542};
@@ -1631,6 +1652,10 @@ static int __init inet_init(void)
1631 if (rc) 1652 if (rc)
1632 goto out_unregister_udp_proto; 1653 goto out_unregister_udp_proto;
1633 1654
1655 rc = proto_register(&ping_prot, 1);
1656 if (rc)
1657 goto out_unregister_raw_proto;
1658
1634 /* 1659 /*
1635 * Tell SOCKET that we are alive... 1660 * Tell SOCKET that we are alive...
1636 */ 1661 */
@@ -1686,6 +1711,8 @@ static int __init inet_init(void)
1686 /* Add UDP-Lite (RFC 3828) */ 1711 /* Add UDP-Lite (RFC 3828) */
1687 udplite4_register(); 1712 udplite4_register();
1688 1713
1714 ping_init();
1715
1689 /* 1716 /*
1690 * Set the ICMP layer up 1717 * Set the ICMP layer up
1691 */ 1718 */
@@ -1716,6 +1743,8 @@ static int __init inet_init(void)
1716 rc = 0; 1743 rc = 0;
1717out: 1744out:
1718 return rc; 1745 return rc;
1746out_unregister_raw_proto:
1747 proto_unregister(&raw_prot);
1719out_unregister_udp_proto: 1748out_unregister_udp_proto:
1720 proto_unregister(&udp_prot); 1749 proto_unregister(&udp_prot);
1721out_unregister_tcp_proto: 1750out_unregister_tcp_proto:
@@ -1740,11 +1769,15 @@ static int __init ipv4_proc_init(void)
1740 goto out_tcp; 1769 goto out_tcp;
1741 if (udp4_proc_init()) 1770 if (udp4_proc_init())
1742 goto out_udp; 1771 goto out_udp;
1772 if (ping_proc_init())
1773 goto out_ping;
1743 if (ip_misc_proc_init()) 1774 if (ip_misc_proc_init())
1744 goto out_misc; 1775 goto out_misc;
1745out: 1776out:
1746 return rc; 1777 return rc;
1747out_misc: 1778out_misc:
1779 ping_proc_exit();
1780out_ping:
1748 udp4_proc_exit(); 1781 udp4_proc_exit();
1749out_udp: 1782out_udp:
1750 tcp4_proc_exit(); 1783 tcp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4286fd3cc0e2..c1f4154552fc 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
73 * into IP header for icv calculation. Options are already checked 73 * into IP header for icv calculation. Options are already checked
74 * for validity, so paranoia is not required. */ 74 * for validity, so paranoia is not required. */
75 75
76static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) 76static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
77{ 77{
78 unsigned char * optptr = (unsigned char*)(iph+1); 78 unsigned char * optptr = (unsigned char*)(iph+1);
79 int l = iph->ihl*4 - sizeof(struct iphdr); 79 int l = iph->ihl*4 - sizeof(struct iphdr);
@@ -396,7 +396,7 @@ out:
396static void ah4_err(struct sk_buff *skb, u32 info) 396static void ah4_err(struct sk_buff *skb, u32 info)
397{ 397{
398 struct net *net = dev_net(skb->dev); 398 struct net *net = dev_net(skb->dev);
399 struct iphdr *iph = (struct iphdr *)skb->data; 399 const struct iphdr *iph = (const struct iphdr *)skb->data;
400 struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); 400 struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
401 struct xfrm_state *x; 401 struct xfrm_state *x;
402 402
@@ -404,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
404 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 404 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
405 return; 405 return;
406 406
407 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); 407 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
408 ah->spi, IPPROTO_AH, AF_INET);
408 if (!x) 409 if (!x)
409 return; 410 return;
410 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", 411 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a0af7ea87870..2b3c23c287cd 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
1857 return CIPSO_V4_HDR_LEN + ret_val; 1857 return CIPSO_V4_HDR_LEN + ret_val;
1858} 1858}
1859 1859
1860static void opt_kfree_rcu(struct rcu_head *head)
1861{
1862 kfree(container_of(head, struct ip_options_rcu, rcu));
1863}
1864
1860/** 1865/**
1861 * cipso_v4_sock_setattr - Add a CIPSO option to a socket 1866 * cipso_v4_sock_setattr - Add a CIPSO option to a socket
1862 * @sk: the socket 1867 * @sk: the socket
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
1879 unsigned char *buf = NULL; 1884 unsigned char *buf = NULL;
1880 u32 buf_len; 1885 u32 buf_len;
1881 u32 opt_len; 1886 u32 opt_len;
1882 struct ip_options *opt = NULL; 1887 struct ip_options_rcu *old, *opt = NULL;
1883 struct inet_sock *sk_inet; 1888 struct inet_sock *sk_inet;
1884 struct inet_connection_sock *sk_conn; 1889 struct inet_connection_sock *sk_conn;
1885 1890
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
1915 ret_val = -ENOMEM; 1920 ret_val = -ENOMEM;
1916 goto socket_setattr_failure; 1921 goto socket_setattr_failure;
1917 } 1922 }
1918 memcpy(opt->__data, buf, buf_len); 1923 memcpy(opt->opt.__data, buf, buf_len);
1919 opt->optlen = opt_len; 1924 opt->opt.optlen = opt_len;
1920 opt->cipso = sizeof(struct iphdr); 1925 opt->opt.cipso = sizeof(struct iphdr);
1921 kfree(buf); 1926 kfree(buf);
1922 buf = NULL; 1927 buf = NULL;
1923 1928
1924 sk_inet = inet_sk(sk); 1929 sk_inet = inet_sk(sk);
1930
1931 old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk));
1925 if (sk_inet->is_icsk) { 1932 if (sk_inet->is_icsk) {
1926 sk_conn = inet_csk(sk); 1933 sk_conn = inet_csk(sk);
1927 if (sk_inet->opt) 1934 if (old)
1928 sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; 1935 sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
1929 sk_conn->icsk_ext_hdr_len += opt->optlen; 1936 sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
1930 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); 1937 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1931 } 1938 }
1932 opt = xchg(&sk_inet->opt, opt); 1939 rcu_assign_pointer(sk_inet->inet_opt, opt);
1933 kfree(opt); 1940 if (old)
1941 call_rcu(&old->rcu, opt_kfree_rcu);
1934 1942
1935 return 0; 1943 return 0;
1936 1944
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
1960 unsigned char *buf = NULL; 1968 unsigned char *buf = NULL;
1961 u32 buf_len; 1969 u32 buf_len;
1962 u32 opt_len; 1970 u32 opt_len;
1963 struct ip_options *opt = NULL; 1971 struct ip_options_rcu *opt = NULL;
1964 struct inet_request_sock *req_inet; 1972 struct inet_request_sock *req_inet;
1965 1973
1966 /* We allocate the maximum CIPSO option size here so we are probably 1974 /* We allocate the maximum CIPSO option size here so we are probably
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
1988 ret_val = -ENOMEM; 1996 ret_val = -ENOMEM;
1989 goto req_setattr_failure; 1997 goto req_setattr_failure;
1990 } 1998 }
1991 memcpy(opt->__data, buf, buf_len); 1999 memcpy(opt->opt.__data, buf, buf_len);
1992 opt->optlen = opt_len; 2000 opt->opt.optlen = opt_len;
1993 opt->cipso = sizeof(struct iphdr); 2001 opt->opt.cipso = sizeof(struct iphdr);
1994 kfree(buf); 2002 kfree(buf);
1995 buf = NULL; 2003 buf = NULL;
1996 2004
1997 req_inet = inet_rsk(req); 2005 req_inet = inet_rsk(req);
1998 opt = xchg(&req_inet->opt, opt); 2006 opt = xchg(&req_inet->opt, opt);
1999 kfree(opt); 2007 if (opt)
2008 call_rcu(&opt->rcu, opt_kfree_rcu);
2000 2009
2001 return 0; 2010 return 0;
2002 2011
@@ -2016,34 +2025,34 @@ req_setattr_failure:
2016 * values on failure. 2025 * values on failure.
2017 * 2026 *
2018 */ 2027 */
2019static int cipso_v4_delopt(struct ip_options **opt_ptr) 2028static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
2020{ 2029{
2021 int hdr_delta = 0; 2030 int hdr_delta = 0;
2022 struct ip_options *opt = *opt_ptr; 2031 struct ip_options_rcu *opt = *opt_ptr;
2023 2032
2024 if (opt->srr || opt->rr || opt->ts || opt->router_alert) { 2033 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
2025 u8 cipso_len; 2034 u8 cipso_len;
2026 u8 cipso_off; 2035 u8 cipso_off;
2027 unsigned char *cipso_ptr; 2036 unsigned char *cipso_ptr;
2028 int iter; 2037 int iter;
2029 int optlen_new; 2038 int optlen_new;
2030 2039
2031 cipso_off = opt->cipso - sizeof(struct iphdr); 2040 cipso_off = opt->opt.cipso - sizeof(struct iphdr);
2032 cipso_ptr = &opt->__data[cipso_off]; 2041 cipso_ptr = &opt->opt.__data[cipso_off];
2033 cipso_len = cipso_ptr[1]; 2042 cipso_len = cipso_ptr[1];
2034 2043
2035 if (opt->srr > opt->cipso) 2044 if (opt->opt.srr > opt->opt.cipso)
2036 opt->srr -= cipso_len; 2045 opt->opt.srr -= cipso_len;
2037 if (opt->rr > opt->cipso) 2046 if (opt->opt.rr > opt->opt.cipso)
2038 opt->rr -= cipso_len; 2047 opt->opt.rr -= cipso_len;
2039 if (opt->ts > opt->cipso) 2048 if (opt->opt.ts > opt->opt.cipso)
2040 opt->ts -= cipso_len; 2049 opt->opt.ts -= cipso_len;
2041 if (opt->router_alert > opt->cipso) 2050 if (opt->opt.router_alert > opt->opt.cipso)
2042 opt->router_alert -= cipso_len; 2051 opt->opt.router_alert -= cipso_len;
2043 opt->cipso = 0; 2052 opt->opt.cipso = 0;
2044 2053
2045 memmove(cipso_ptr, cipso_ptr + cipso_len, 2054 memmove(cipso_ptr, cipso_ptr + cipso_len,
2046 opt->optlen - cipso_off - cipso_len); 2055 opt->opt.optlen - cipso_off - cipso_len);
2047 2056
2048 /* determining the new total option length is tricky because of 2057 /* determining the new total option length is tricky because of
2049 * the padding necessary, the only thing i can think to do at 2058 * the padding necessary, the only thing i can think to do at
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2052 * from there we can determine the new total option length */ 2061 * from there we can determine the new total option length */
2053 iter = 0; 2062 iter = 0;
2054 optlen_new = 0; 2063 optlen_new = 0;
2055 while (iter < opt->optlen) 2064 while (iter < opt->opt.optlen)
2056 if (opt->__data[iter] != IPOPT_NOP) { 2065 if (opt->opt.__data[iter] != IPOPT_NOP) {
2057 iter += opt->__data[iter + 1]; 2066 iter += opt->opt.__data[iter + 1];
2058 optlen_new = iter; 2067 optlen_new = iter;
2059 } else 2068 } else
2060 iter++; 2069 iter++;
2061 hdr_delta = opt->optlen; 2070 hdr_delta = opt->opt.optlen;
2062 opt->optlen = (optlen_new + 3) & ~3; 2071 opt->opt.optlen = (optlen_new + 3) & ~3;
2063 hdr_delta -= opt->optlen; 2072 hdr_delta -= opt->opt.optlen;
2064 } else { 2073 } else {
2065 /* only the cipso option was present on the socket so we can 2074 /* only the cipso option was present on the socket so we can
2066 * remove the entire option struct */ 2075 * remove the entire option struct */
2067 *opt_ptr = NULL; 2076 *opt_ptr = NULL;
2068 hdr_delta = opt->optlen; 2077 hdr_delta = opt->opt.optlen;
2069 kfree(opt); 2078 call_rcu(&opt->rcu, opt_kfree_rcu);
2070 } 2079 }
2071 2080
2072 return hdr_delta; 2081 return hdr_delta;
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2083void cipso_v4_sock_delattr(struct sock *sk) 2092void cipso_v4_sock_delattr(struct sock *sk)
2084{ 2093{
2085 int hdr_delta; 2094 int hdr_delta;
2086 struct ip_options *opt; 2095 struct ip_options_rcu *opt;
2087 struct inet_sock *sk_inet; 2096 struct inet_sock *sk_inet;
2088 2097
2089 sk_inet = inet_sk(sk); 2098 sk_inet = inet_sk(sk);
2090 opt = sk_inet->opt; 2099 opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
2091 if (opt == NULL || opt->cipso == 0) 2100 if (opt == NULL || opt->opt.cipso == 0)
2092 return; 2101 return;
2093 2102
2094 hdr_delta = cipso_v4_delopt(&sk_inet->opt); 2103 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
2095 if (sk_inet->is_icsk && hdr_delta > 0) { 2104 if (sk_inet->is_icsk && hdr_delta > 0) {
2096 struct inet_connection_sock *sk_conn = inet_csk(sk); 2105 struct inet_connection_sock *sk_conn = inet_csk(sk);
2097 sk_conn->icsk_ext_hdr_len -= hdr_delta; 2106 sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
2109 */ 2118 */
2110void cipso_v4_req_delattr(struct request_sock *req) 2119void cipso_v4_req_delattr(struct request_sock *req)
2111{ 2120{
2112 struct ip_options *opt; 2121 struct ip_options_rcu *opt;
2113 struct inet_request_sock *req_inet; 2122 struct inet_request_sock *req_inet;
2114 2123
2115 req_inet = inet_rsk(req); 2124 req_inet = inet_rsk(req);
2116 opt = req_inet->opt; 2125 opt = req_inet->opt;
2117 if (opt == NULL || opt->cipso == 0) 2126 if (opt == NULL || opt->opt.cipso == 0)
2118 return; 2127 return;
2119 2128
2120 cipso_v4_delopt(&req_inet->opt); 2129 cipso_v4_delopt(&req_inet->opt);
@@ -2184,14 +2193,18 @@ getattr_return:
2184 */ 2193 */
2185int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) 2194int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
2186{ 2195{
2187 struct ip_options *opt; 2196 struct ip_options_rcu *opt;
2197 int res = -ENOMSG;
2188 2198
2189 opt = inet_sk(sk)->opt; 2199 rcu_read_lock();
2190 if (opt == NULL || opt->cipso == 0) 2200 opt = rcu_dereference(inet_sk(sk)->inet_opt);
2191 return -ENOMSG; 2201 if (opt && opt->opt.cipso)
2192 2202 res = cipso_v4_getattr(opt->opt.__data +
2193 return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), 2203 opt->opt.cipso -
2194 secattr); 2204 sizeof(struct iphdr),
2205 secattr);
2206 rcu_read_unlock();
2207 return res;
2195} 2208}
2196 2209
2197/** 2210/**
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 85bd24ca4f6d..424fafbc8cb0 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
24{ 24{
25 struct inet_sock *inet = inet_sk(sk); 25 struct inet_sock *inet = inet_sk(sk);
26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; 26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
27 struct flowi4 *fl4;
27 struct rtable *rt; 28 struct rtable *rt;
28 __be32 saddr; 29 __be32 saddr;
29 int oif; 30 int oif;
@@ -38,6 +39,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
38 39
39 sk_dst_reset(sk); 40 sk_dst_reset(sk);
40 41
42 lock_sock(sk);
43
41 oif = sk->sk_bound_dev_if; 44 oif = sk->sk_bound_dev_if;
42 saddr = inet->inet_saddr; 45 saddr = inet->inet_saddr;
43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) { 46 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -46,7 +49,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
46 if (!saddr) 49 if (!saddr)
47 saddr = inet->mc_addr; 50 saddr = inet->mc_addr;
48 } 51 }
49 rt = ip_route_connect(usin->sin_addr.s_addr, saddr, 52 fl4 = &inet->cork.fl.u.ip4;
53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
50 RT_CONN_FLAGS(sk), oif, 54 RT_CONN_FLAGS(sk), oif,
51 sk->sk_protocol, 55 sk->sk_protocol,
52 inet->inet_sport, usin->sin_port, sk, true); 56 inet->inet_sport, usin->sin_port, sk, true);
@@ -54,26 +58,30 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
54 err = PTR_ERR(rt); 58 err = PTR_ERR(rt);
55 if (err == -ENETUNREACH) 59 if (err == -ENETUNREACH)
56 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 60 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
57 return err; 61 goto out;
58 } 62 }
59 63
60 if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { 64 if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
61 ip_rt_put(rt); 65 ip_rt_put(rt);
62 return -EACCES; 66 err = -EACCES;
67 goto out;
63 } 68 }
64 if (!inet->inet_saddr) 69 if (!inet->inet_saddr)
65 inet->inet_saddr = rt->rt_src; /* Update source address */ 70 inet->inet_saddr = fl4->saddr; /* Update source address */
66 if (!inet->inet_rcv_saddr) { 71 if (!inet->inet_rcv_saddr) {
67 inet->inet_rcv_saddr = rt->rt_src; 72 inet->inet_rcv_saddr = fl4->saddr;
68 if (sk->sk_prot->rehash) 73 if (sk->sk_prot->rehash)
69 sk->sk_prot->rehash(sk); 74 sk->sk_prot->rehash(sk);
70 } 75 }
71 inet->inet_daddr = rt->rt_dst; 76 inet->inet_daddr = fl4->daddr;
72 inet->inet_dport = usin->sin_port; 77 inet->inet_dport = usin->sin_port;
73 sk->sk_state = TCP_ESTABLISHED; 78 sk->sk_state = TCP_ESTABLISHED;
74 inet->inet_id = jiffies; 79 inet->inet_id = jiffies;
75 80
76 sk_dst_set(sk, &rt->dst); 81 sk_dst_set(sk, &rt->dst);
77 return 0; 82 err = 0;
83out:
84 release_sock(sk);
85 return err;
78} 86}
79EXPORT_SYMBOL(ip4_datagram_connect); 87EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cd9ca0811cfa..0d4a184af16f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1369,7 +1369,7 @@ errout:
1369 1369
1370static size_t inet_get_link_af_size(const struct net_device *dev) 1370static size_t inet_get_link_af_size(const struct net_device *dev)
1371{ 1371{
1372 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1372 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373 1373
1374 if (!in_dev) 1374 if (!in_dev)
1375 return 0; 1375 return 0;
@@ -1379,7 +1379,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
1379 1379
1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381{ 1381{
1382 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1382 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383 struct nlattr *nla; 1383 struct nlattr *nla;
1384 int i; 1384 int i;
1385 1385
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 03f994bcf7de..a5b413416da3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -276,7 +276,7 @@ error:
276 276
277static int esp_input_done2(struct sk_buff *skb, int err) 277static int esp_input_done2(struct sk_buff *skb, int err)
278{ 278{
279 struct iphdr *iph; 279 const struct iphdr *iph;
280 struct xfrm_state *x = xfrm_input_state(skb); 280 struct xfrm_state *x = xfrm_input_state(skb);
281 struct esp_data *esp = x->data; 281 struct esp_data *esp = x->data;
282 struct crypto_aead *aead = esp->aead; 282 struct crypto_aead *aead = esp->aead;
@@ -484,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
484static void esp4_err(struct sk_buff *skb, u32 info) 484static void esp4_err(struct sk_buff *skb, u32 info)
485{ 485{
486 struct net *net = dev_net(skb->dev); 486 struct net *net = dev_net(skb->dev);
487 struct iphdr *iph = (struct iphdr *)skb->data; 487 const struct iphdr *iph = (const struct iphdr *)skb->data;
488 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); 488 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
489 struct xfrm_state *x; 489 struct xfrm_state *x;
490 490
@@ -492,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
492 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 492 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
493 return; 493 return;
494 494
495 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); 495 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
496 esph->spi, IPPROTO_ESP, AF_INET);
496 if (!x) 497 if (!x)
497 return; 498 return;
498 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", 499 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 451088330bbb..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
44#include <net/arp.h> 44#include <net/arp.h>
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h>
47 48
48#ifndef CONFIG_IP_MULTIPLE_TABLES 49#ifndef CONFIG_IP_MULTIPLE_TABLES
49 50
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type);
188 * - check, that packet arrived from expected physical interface. 189 * - check, that packet arrived from expected physical interface.
189 * called with rcu_read_lock() 190 * called with rcu_read_lock()
190 */ 191 */
191int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 192int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
192 struct net_device *dev, __be32 *spec_dst, 193 int oif, struct net_device *dev, __be32 *spec_dst,
193 u32 *itag, u32 mark) 194 u32 *itag)
194{ 195{
195 struct in_device *in_dev; 196 struct in_device *in_dev;
196 struct flowi4 fl4; 197 struct flowi4 fl4;
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
202 203
203 fl4.flowi4_oif = 0; 204 fl4.flowi4_oif = 0;
204 fl4.flowi4_iif = oif; 205 fl4.flowi4_iif = oif;
205 fl4.flowi4_mark = mark;
206 fl4.daddr = src; 206 fl4.daddr = src;
207 fl4.saddr = dst; 207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos; 208 fl4.flowi4_tos = tos;
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
212 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
213 if (in_dev) { 213 if (in_dev) {
214 no_addr = in_dev->ifa_list == NULL; 214 no_addr = in_dev->ifa_list == NULL;
215 rpf = IN_DEV_RPFILTER(in_dev); 215
216 /* Ignore rp_filter for packets protected by IPsec. */
217 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
218
216 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 219 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
217 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 220 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
218 fl4.flowi4_mark = 0;
219 } 221 }
220 222
221 if (in_dev == NULL) 223 if (in_dev == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 11d4d28190bd..c779ce96e5b5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct tnode {
126 struct work_struct work; 126 struct work_struct work;
127 struct tnode *tnode_free; 127 struct tnode *tnode_free;
128 }; 128 };
129 struct rt_trie_node *child[0]; 129 struct rt_trie_node __rcu *child[0];
130}; 130};
131 131
132#ifdef CONFIG_IP_FIB_TRIE_STATS 132#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,7 +151,7 @@ struct trie_stat {
151}; 151};
152 152
153struct trie { 153struct trie {
154 struct rt_trie_node *trie; 154 struct rt_trie_node __rcu *trie;
155#ifdef CONFIG_IP_FIB_TRIE_STATS 155#ifdef CONFIG_IP_FIB_TRIE_STATS
156 struct trie_use_stats stats; 156 struct trie_use_stats stats;
157#endif 157#endif
@@ -177,16 +177,29 @@ static const int sync_pages = 128;
177static struct kmem_cache *fn_alias_kmem __read_mostly; 177static struct kmem_cache *fn_alias_kmem __read_mostly;
178static struct kmem_cache *trie_leaf_kmem __read_mostly; 178static struct kmem_cache *trie_leaf_kmem __read_mostly;
179 179
180static inline struct tnode *node_parent(struct rt_trie_node *node) 180/*
181 * caller must hold RTNL
182 */
183static inline struct tnode *node_parent(const struct rt_trie_node *node)
181{ 184{
182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 185 unsigned long parent;
186
187 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
188
189 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
183} 190}
184 191
185static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) 192/*
193 * caller must hold RCU read lock or RTNL
194 */
195static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
186{ 196{
187 struct tnode *ret = node_parent(node); 197 unsigned long parent;
198
199 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
200 lockdep_rtnl_is_held());
188 201
189 return rcu_dereference_rtnl(ret); 202 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
190} 203}
191 204
192/* Same as rcu_assign_pointer 205/* Same as rcu_assign_pointer
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
198 node->parent = (unsigned long)ptr | NODE_TYPE(node); 211 node->parent = (unsigned long)ptr | NODE_TYPE(node);
199} 212}
200 213
201static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) 214/*
215 * caller must hold RTNL
216 */
217static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
202{ 218{
203 BUG_ON(i >= 1U << tn->bits); 219 BUG_ON(i >= 1U << tn->bits);
204 220
205 return tn->child[i]; 221 return rtnl_dereference(tn->child[i]);
206} 222}
207 223
208static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 224/*
225 * caller must hold RCU read lock or RTNL
226 */
227static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
209{ 228{
210 struct rt_trie_node *ret = tnode_get_child(tn, i); 229 BUG_ON(i >= 1U << tn->bits);
211 230
212 return rcu_dereference_rtnl(ret); 231 return rcu_dereference_rtnl(tn->child[i]);
213} 232}
214 233
215static inline int tnode_child_length(const struct tnode *tn) 234static inline int tnode_child_length(const struct tnode *tn)
@@ -482,7 +501,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
482static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 501static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
483 int wasfull) 502 int wasfull)
484{ 503{
485 struct rt_trie_node *chi = tn->child[i]; 504 struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
486 int isfull; 505 int isfull;
487 506
488 BUG_ON(i >= 1<<tn->bits); 507 BUG_ON(i >= 1<<tn->bits);
@@ -660,7 +679,7 @@ one_child:
660 for (i = 0; i < tnode_child_length(tn); i++) { 679 for (i = 0; i < tnode_child_length(tn); i++) {
661 struct rt_trie_node *n; 680 struct rt_trie_node *n;
662 681
663 n = tn->child[i]; 682 n = rtnl_dereference(tn->child[i]);
664 if (!n) 683 if (!n)
665 continue; 684 continue;
666 685
@@ -674,6 +693,20 @@ one_child:
674 return (struct rt_trie_node *) tn; 693 return (struct rt_trie_node *) tn;
675} 694}
676 695
696
697static void tnode_clean_free(struct tnode *tn)
698{
699 int i;
700 struct tnode *tofree;
701
702 for (i = 0; i < tnode_child_length(tn); i++) {
703 tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
704 if (tofree)
705 tnode_free(tofree);
706 }
707 tnode_free(tn);
708}
709
677static struct tnode *inflate(struct trie *t, struct tnode *tn) 710static struct tnode *inflate(struct trie *t, struct tnode *tn)
678{ 711{
679 struct tnode *oldtnode = tn; 712 struct tnode *oldtnode = tn;
@@ -750,8 +783,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
750 inode = (struct tnode *) node; 783 inode = (struct tnode *) node;
751 784
752 if (inode->bits == 1) { 785 if (inode->bits == 1) {
753 put_child(t, tn, 2*i, inode->child[0]); 786 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
754 put_child(t, tn, 2*i+1, inode->child[1]); 787 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
755 788
756 tnode_free_safe(inode); 789 tnode_free_safe(inode);
757 continue; 790 continue;
@@ -792,8 +825,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
792 825
793 size = tnode_child_length(left); 826 size = tnode_child_length(left);
794 for (j = 0; j < size; j++) { 827 for (j = 0; j < size; j++) {
795 put_child(t, left, j, inode->child[j]); 828 put_child(t, left, j, rtnl_dereference(inode->child[j]));
796 put_child(t, right, j, inode->child[j + size]); 829 put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
797 } 830 }
798 put_child(t, tn, 2*i, resize(t, left)); 831 put_child(t, tn, 2*i, resize(t, left));
799 put_child(t, tn, 2*i+1, resize(t, right)); 832 put_child(t, tn, 2*i+1, resize(t, right));
@@ -803,18 +836,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
803 tnode_free_safe(oldtnode); 836 tnode_free_safe(oldtnode);
804 return tn; 837 return tn;
805nomem: 838nomem:
806 { 839 tnode_clean_free(tn);
807 int size = tnode_child_length(tn); 840 return ERR_PTR(-ENOMEM);
808 int j;
809
810 for (j = 0; j < size; j++)
811 if (tn->child[j])
812 tnode_free((struct tnode *)tn->child[j]);
813
814 tnode_free(tn);
815
816 return ERR_PTR(-ENOMEM);
817 }
818} 841}
819 842
820static struct tnode *halve(struct trie *t, struct tnode *tn) 843static struct tnode *halve(struct trie *t, struct tnode *tn)
@@ -885,18 +908,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
885 tnode_free_safe(oldtnode); 908 tnode_free_safe(oldtnode);
886 return tn; 909 return tn;
887nomem: 910nomem:
888 { 911 tnode_clean_free(tn);
889 int size = tnode_child_length(tn); 912 return ERR_PTR(-ENOMEM);
890 int j;
891
892 for (j = 0; j < size; j++)
893 if (tn->child[j])
894 tnode_free((struct tnode *)tn->child[j]);
895
896 tnode_free(tn);
897
898 return ERR_PTR(-ENOMEM);
899 }
900} 913}
901 914
902/* readside must use rcu_read_lock currently dump routines 915/* readside must use rcu_read_lock currently dump routines
@@ -1028,7 +1041,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1028 t_key cindex; 1041 t_key cindex;
1029 1042
1030 pos = 0; 1043 pos = 0;
1031 n = t->trie; 1044 n = rtnl_dereference(t->trie);
1032 1045
1033 /* If we point to NULL, stop. Either the tree is empty and we should 1046 /* If we point to NULL, stop. Either the tree is empty and we should
1034 * just put a new leaf in if, or we have reached an empty child slot, 1047 * just put a new leaf in if, or we have reached an empty child slot,
@@ -1314,6 +1327,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1314 } 1327 }
1315 } 1328 }
1316 1329
1330 if (!plen)
1331 tb->tb_num_default++;
1332
1317 list_add_tail_rcu(&new_fa->fa_list, 1333 list_add_tail_rcu(&new_fa->fa_list,
1318 (fa ? &fa->fa_list : fa_head)); 1334 (fa ? &fa->fa_list : fa_head));
1319 1335
@@ -1679,6 +1695,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1679 1695
1680 list_del_rcu(&fa->fa_list); 1696 list_del_rcu(&fa->fa_list);
1681 1697
1698 if (!plen)
1699 tb->tb_num_default--;
1700
1682 if (list_empty(fa_head)) { 1701 if (list_empty(fa_head)) {
1683 hlist_del_rcu(&li->hlist); 1702 hlist_del_rcu(&li->hlist);
1684 free_leaf_info(li); 1703 free_leaf_info(li);
@@ -1751,7 +1770,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1751 continue; 1770 continue;
1752 1771
1753 if (IS_LEAF(c)) { 1772 if (IS_LEAF(c)) {
1754 prefetch(p->child[idx]); 1773 prefetch(rcu_dereference_rtnl(p->child[idx]));
1755 return (struct leaf *) c; 1774 return (struct leaf *) c;
1756 } 1775 }
1757 1776
@@ -1969,6 +1988,7 @@ struct fib_table *fib_trie_table(u32 id)
1969 1988
1970 tb->tb_id = id; 1989 tb->tb_id = id;
1971 tb->tb_default = -1; 1990 tb->tb_default = -1;
1991 tb->tb_num_default = 0;
1972 1992
1973 t = (struct trie *) tb->tb_data; 1993 t = (struct trie *) tb->tb_data;
1974 memset(t, 0, sizeof(*t)); 1994 memset(t, 0, sizeof(*t));
@@ -2264,7 +2284,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2264 2284
2265 /* walk rest of this hash chain */ 2285 /* walk rest of this hash chain */
2266 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 2286 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2267 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 2287 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2268 tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 2288 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2269 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 2289 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2270 if (n) 2290 if (n)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5f8a71d3a2a..5395e45dcce6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -83,6 +83,7 @@
83#include <net/tcp.h> 83#include <net/tcp.h>
84#include <net/udp.h> 84#include <net/udp.h>
85#include <net/raw.h> 85#include <net/raw.h>
86#include <net/ping.h>
86#include <linux/skbuff.h> 87#include <linux/skbuff.h>
87#include <net/sock.h> 88#include <net/sock.h>
88#include <linux/errno.h> 89#include <linux/errno.h>
@@ -108,8 +109,7 @@ struct icmp_bxm {
108 __be32 times[3]; 109 __be32 times[3];
109 } data; 110 } data;
110 int head_len; 111 int head_len;
111 struct ip_options replyopts; 112 struct ip_options_data replyopts;
112 unsigned char optbuf[40];
113}; 113};
114 114
115/* An array of errno for error messages from dest unreach. */ 115/* An array of errno for error messages from dest unreach. */
@@ -234,7 +234,7 @@ static inline void icmp_xmit_unlock(struct sock *sk)
234 */ 234 */
235 235
236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
237 int type, int code) 237 struct flowi4 *fl4, int type, int code)
238{ 238{
239 struct dst_entry *dst = &rt->dst; 239 struct dst_entry *dst = &rt->dst;
240 bool rc = true; 240 bool rc = true;
@@ -253,7 +253,7 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
253 /* Limit if icmp type is enabled in ratemask. */ 253 /* Limit if icmp type is enabled in ratemask. */
254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { 254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
255 if (!rt->peer) 255 if (!rt->peer)
256 rt_bind_peer(rt, 1); 256 rt_bind_peer(rt, fl4->daddr, 1);
257 rc = inet_peer_xrlim_allow(rt->peer, 257 rc = inet_peer_xrlim_allow(rt->peer,
258 net->ipv4.sysctl_icmp_ratelimit); 258 net->ipv4.sysctl_icmp_ratelimit);
259 } 259 }
@@ -291,13 +291,14 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
291} 291}
292 292
293static void icmp_push_reply(struct icmp_bxm *icmp_param, 293static void icmp_push_reply(struct icmp_bxm *icmp_param,
294 struct flowi4 *fl4,
294 struct ipcm_cookie *ipc, struct rtable **rt) 295 struct ipcm_cookie *ipc, struct rtable **rt)
295{ 296{
296 struct sock *sk; 297 struct sock *sk;
297 struct sk_buff *skb; 298 struct sk_buff *skb;
298 299
299 sk = icmp_sk(dev_net((*rt)->dst.dev)); 300 sk = icmp_sk(dev_net((*rt)->dst.dev));
300 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 301 if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
301 icmp_param->data_len+icmp_param->head_len, 302 icmp_param->data_len+icmp_param->head_len,
302 icmp_param->head_len, 303 icmp_param->head_len,
303 ipc, rt, MSG_DONTWAIT) < 0) { 304 ipc, rt, MSG_DONTWAIT) < 0) {
@@ -316,7 +317,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
316 icmp_param->head_len, csum); 317 icmp_param->head_len, csum);
317 icmph->checksum = csum_fold(csum); 318 icmph->checksum = csum_fold(csum);
318 skb->ip_summed = CHECKSUM_NONE; 319 skb->ip_summed = CHECKSUM_NONE;
319 ip_push_pending_frames(sk); 320 ip_push_pending_frames(sk, fl4);
320 } 321 }
321} 322}
322 323
@@ -329,11 +330,12 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
329 struct ipcm_cookie ipc; 330 struct ipcm_cookie ipc;
330 struct rtable *rt = skb_rtable(skb); 331 struct rtable *rt = skb_rtable(skb);
331 struct net *net = dev_net(rt->dst.dev); 332 struct net *net = dev_net(rt->dst.dev);
333 struct flowi4 fl4;
332 struct sock *sk; 334 struct sock *sk;
333 struct inet_sock *inet; 335 struct inet_sock *inet;
334 __be32 daddr; 336 __be32 daddr;
335 337
336 if (ip_options_echo(&icmp_param->replyopts, skb)) 338 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
337 return; 339 return;
338 340
339 sk = icmp_xmit_lock(net); 341 sk = icmp_xmit_lock(net);
@@ -344,65 +346,60 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
344 icmp_param->data.icmph.checksum = 0; 346 icmp_param->data.icmph.checksum = 0;
345 347
346 inet->tos = ip_hdr(skb)->tos; 348 inet->tos = ip_hdr(skb)->tos;
347 daddr = ipc.addr = rt->rt_src; 349 daddr = ipc.addr = ip_hdr(skb)->saddr;
348 ipc.opt = NULL; 350 ipc.opt = NULL;
349 ipc.tx_flags = 0; 351 ipc.tx_flags = 0;
350 if (icmp_param->replyopts.optlen) { 352 if (icmp_param->replyopts.opt.opt.optlen) {
351 ipc.opt = &icmp_param->replyopts; 353 ipc.opt = &icmp_param->replyopts.opt;
352 if (ipc.opt->srr) 354 if (ipc.opt->opt.srr)
353 daddr = icmp_param->replyopts.faddr; 355 daddr = icmp_param->replyopts.opt.opt.faddr;
354 } 356 }
355 { 357 memset(&fl4, 0, sizeof(fl4));
356 struct flowi4 fl4 = { 358 fl4.daddr = daddr;
357 .daddr = daddr, 359 fl4.saddr = rt->rt_spec_dst;
358 .saddr = rt->rt_spec_dst, 360 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
359 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), 361 fl4.flowi4_proto = IPPROTO_ICMP;
360 .flowi4_proto = IPPROTO_ICMP, 362 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
361 }; 363 rt = ip_route_output_key(net, &fl4);
362 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 364 if (IS_ERR(rt))
363 rt = ip_route_output_key(net, &fl4); 365 goto out_unlock;
364 if (IS_ERR(rt)) 366 if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type,
365 goto out_unlock;
366 }
367 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
368 icmp_param->data.icmph.code)) 367 icmp_param->data.icmph.code))
369 icmp_push_reply(icmp_param, &ipc, &rt); 368 icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
370 ip_rt_put(rt); 369 ip_rt_put(rt);
371out_unlock: 370out_unlock:
372 icmp_xmit_unlock(sk); 371 icmp_xmit_unlock(sk);
373} 372}
374 373
375static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, 374static struct rtable *icmp_route_lookup(struct net *net,
376 struct iphdr *iph, 375 struct flowi4 *fl4,
376 struct sk_buff *skb_in,
377 const struct iphdr *iph,
377 __be32 saddr, u8 tos, 378 __be32 saddr, u8 tos,
378 int type, int code, 379 int type, int code,
379 struct icmp_bxm *param) 380 struct icmp_bxm *param)
380{ 381{
381 struct flowi4 fl4 = {
382 .daddr = (param->replyopts.srr ?
383 param->replyopts.faddr : iph->saddr),
384 .saddr = saddr,
385 .flowi4_tos = RT_TOS(tos),
386 .flowi4_proto = IPPROTO_ICMP,
387 .fl4_icmp_type = type,
388 .fl4_icmp_code = code,
389 };
390 struct rtable *rt, *rt2; 382 struct rtable *rt, *rt2;
391 int err; 383 int err;
392 384
393 security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); 385 memset(fl4, 0, sizeof(*fl4));
394 rt = __ip_route_output_key(net, &fl4); 386 fl4->daddr = (param->replyopts.opt.opt.srr ?
387 param->replyopts.opt.opt.faddr : iph->saddr);
388 fl4->saddr = saddr;
389 fl4->flowi4_tos = RT_TOS(tos);
390 fl4->flowi4_proto = IPPROTO_ICMP;
391 fl4->fl4_icmp_type = type;
392 fl4->fl4_icmp_code = code;
393 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
394 rt = __ip_route_output_key(net, fl4);
395 if (IS_ERR(rt)) 395 if (IS_ERR(rt))
396 return rt; 396 return rt;
397 397
398 /* No need to clone since we're just using its address. */ 398 /* No need to clone since we're just using its address. */
399 rt2 = rt; 399 rt2 = rt;
400 400
401 if (!fl4.saddr)
402 fl4.saddr = rt->rt_src;
403
404 rt = (struct rtable *) xfrm_lookup(net, &rt->dst, 401 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
405 flowi4_to_flowi(&fl4), NULL, 0); 402 flowi4_to_flowi(fl4), NULL, 0);
406 if (!IS_ERR(rt)) { 403 if (!IS_ERR(rt)) {
407 if (rt != rt2) 404 if (rt != rt2)
408 return rt; 405 return rt;
@@ -411,19 +408,19 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
411 } else 408 } else
412 return rt; 409 return rt;
413 410
414 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); 411 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET);
415 if (err) 412 if (err)
416 goto relookup_failed; 413 goto relookup_failed;
417 414
418 if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { 415 if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) {
419 rt2 = __ip_route_output_key(net, &fl4); 416 rt2 = __ip_route_output_key(net, fl4);
420 if (IS_ERR(rt2)) 417 if (IS_ERR(rt2))
421 err = PTR_ERR(rt2); 418 err = PTR_ERR(rt2);
422 } else { 419 } else {
423 struct flowi4 fl4_2 = {}; 420 struct flowi4 fl4_2 = {};
424 unsigned long orefdst; 421 unsigned long orefdst;
425 422
426 fl4_2.daddr = fl4.saddr; 423 fl4_2.daddr = fl4->saddr;
427 rt2 = ip_route_output_key(net, &fl4_2); 424 rt2 = ip_route_output_key(net, &fl4_2);
428 if (IS_ERR(rt2)) { 425 if (IS_ERR(rt2)) {
429 err = PTR_ERR(rt2); 426 err = PTR_ERR(rt2);
@@ -431,7 +428,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
431 } 428 }
432 /* Ugh! */ 429 /* Ugh! */
433 orefdst = skb_in->_skb_refdst; /* save old refdst */ 430 orefdst = skb_in->_skb_refdst; /* save old refdst */
434 err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, 431 err = ip_route_input(skb_in, fl4->daddr, fl4->saddr,
435 RT_TOS(tos), rt2->dst.dev); 432 RT_TOS(tos), rt2->dst.dev);
436 433
437 dst_release(&rt2->dst); 434 dst_release(&rt2->dst);
@@ -443,7 +440,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
443 goto relookup_failed; 440 goto relookup_failed;
444 441
445 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, 442 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
446 flowi4_to_flowi(&fl4), NULL, 443 flowi4_to_flowi(fl4), NULL,
447 XFRM_LOOKUP_ICMP); 444 XFRM_LOOKUP_ICMP);
448 if (!IS_ERR(rt2)) { 445 if (!IS_ERR(rt2)) {
449 dst_release(&rt->dst); 446 dst_release(&rt->dst);
@@ -482,6 +479,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
482 struct icmp_bxm icmp_param; 479 struct icmp_bxm icmp_param;
483 struct rtable *rt = skb_rtable(skb_in); 480 struct rtable *rt = skb_rtable(skb_in);
484 struct ipcm_cookie ipc; 481 struct ipcm_cookie ipc;
482 struct flowi4 fl4;
485 __be32 saddr; 483 __be32 saddr;
486 u8 tos; 484 u8 tos;
487 struct net *net; 485 struct net *net;
@@ -581,7 +579,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
581 IPTOS_PREC_INTERNETCONTROL) : 579 IPTOS_PREC_INTERNETCONTROL) :
582 iph->tos; 580 iph->tos;
583 581
584 if (ip_options_echo(&icmp_param.replyopts, skb_in)) 582 if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
585 goto out_unlock; 583 goto out_unlock;
586 584
587 585
@@ -597,15 +595,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
597 icmp_param.offset = skb_network_offset(skb_in); 595 icmp_param.offset = skb_network_offset(skb_in);
598 inet_sk(sk)->tos = tos; 596 inet_sk(sk)->tos = tos;
599 ipc.addr = iph->saddr; 597 ipc.addr = iph->saddr;
600 ipc.opt = &icmp_param.replyopts; 598 ipc.opt = &icmp_param.replyopts.opt;
601 ipc.tx_flags = 0; 599 ipc.tx_flags = 0;
602 600
603 rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, 601 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
604 type, code, &icmp_param); 602 type, code, &icmp_param);
605 if (IS_ERR(rt)) 603 if (IS_ERR(rt))
606 goto out_unlock; 604 goto out_unlock;
607 605
608 if (!icmpv4_xrlim_allow(net, rt, type, code)) 606 if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
609 goto ende; 607 goto ende;
610 608
611 /* RFC says return as much as we can without exceeding 576 bytes. */ 609 /* RFC says return as much as we can without exceeding 576 bytes. */
@@ -613,7 +611,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
613 room = dst_mtu(&rt->dst); 611 room = dst_mtu(&rt->dst);
614 if (room > 576) 612 if (room > 576)
615 room = 576; 613 room = 576;
616 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 614 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
617 room -= sizeof(struct icmphdr); 615 room -= sizeof(struct icmphdr);
618 616
619 icmp_param.data_len = skb_in->len - icmp_param.offset; 617 icmp_param.data_len = skb_in->len - icmp_param.offset;
@@ -621,7 +619,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
621 icmp_param.data_len = room; 619 icmp_param.data_len = room;
622 icmp_param.head_len = sizeof(struct icmphdr); 620 icmp_param.head_len = sizeof(struct icmphdr);
623 621
624 icmp_push_reply(&icmp_param, &ipc, &rt); 622 icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
625ende: 623ende:
626 ip_rt_put(rt); 624 ip_rt_put(rt);
627out_unlock: 625out_unlock:
@@ -637,7 +635,7 @@ EXPORT_SYMBOL(icmp_send);
637 635
638static void icmp_unreach(struct sk_buff *skb) 636static void icmp_unreach(struct sk_buff *skb)
639{ 637{
640 struct iphdr *iph; 638 const struct iphdr *iph;
641 struct icmphdr *icmph; 639 struct icmphdr *icmph;
642 int hash, protocol; 640 int hash, protocol;
643 const struct net_protocol *ipprot; 641 const struct net_protocol *ipprot;
@@ -656,7 +654,7 @@ static void icmp_unreach(struct sk_buff *skb)
656 goto out_err; 654 goto out_err;
657 655
658 icmph = icmp_hdr(skb); 656 icmph = icmp_hdr(skb);
659 iph = (struct iphdr *)skb->data; 657 iph = (const struct iphdr *)skb->data;
660 658
661 if (iph->ihl < 5) /* Mangled header, drop. */ 659 if (iph->ihl < 5) /* Mangled header, drop. */
662 goto out_err; 660 goto out_err;
@@ -729,7 +727,7 @@ static void icmp_unreach(struct sk_buff *skb)
729 if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) 727 if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
730 goto out; 728 goto out;
731 729
732 iph = (struct iphdr *)skb->data; 730 iph = (const struct iphdr *)skb->data;
733 protocol = iph->protocol; 731 protocol = iph->protocol;
734 732
735 /* 733 /*
@@ -758,7 +756,7 @@ out_err:
758 756
759static void icmp_redirect(struct sk_buff *skb) 757static void icmp_redirect(struct sk_buff *skb)
760{ 758{
761 struct iphdr *iph; 759 const struct iphdr *iph;
762 760
763 if (skb->len < sizeof(struct iphdr)) 761 if (skb->len < sizeof(struct iphdr))
764 goto out_err; 762 goto out_err;
@@ -769,7 +767,7 @@ static void icmp_redirect(struct sk_buff *skb)
769 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 767 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
770 goto out; 768 goto out;
771 769
772 iph = (struct iphdr *)skb->data; 770 iph = (const struct iphdr *)skb->data;
773 771
774 switch (icmp_hdr(skb)->code & 7) { 772 switch (icmp_hdr(skb)->code & 7) {
775 case ICMP_REDIR_NET: 773 case ICMP_REDIR_NET:
@@ -784,6 +782,15 @@ static void icmp_redirect(struct sk_buff *skb)
784 iph->saddr, skb->dev); 782 iph->saddr, skb->dev);
785 break; 783 break;
786 } 784 }
785
786 /* Ping wants to see redirects.
787 * Let's pretend they are errors of sorts... */
788 if (iph->protocol == IPPROTO_ICMP &&
789 iph->ihl >= 5 &&
790 pskb_may_pull(skb, (iph->ihl<<2)+8)) {
791 ping_err(skb, icmp_hdr(skb)->un.gateway);
792 }
793
787out: 794out:
788 return; 795 return;
789out_err: 796out_err:
@@ -933,12 +940,12 @@ static void icmp_address_reply(struct sk_buff *skb)
933 BUG_ON(mp == NULL); 940 BUG_ON(mp == NULL);
934 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 941 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
935 if (*mp == ifa->ifa_mask && 942 if (*mp == ifa->ifa_mask &&
936 inet_ifa_match(rt->rt_src, ifa)) 943 inet_ifa_match(ip_hdr(skb)->saddr, ifa))
937 break; 944 break;
938 } 945 }
939 if (!ifa && net_ratelimit()) { 946 if (!ifa && net_ratelimit()) {
940 printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", 947 printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n",
941 mp, dev->name, &rt->rt_src); 948 mp, dev->name, &ip_hdr(skb)->saddr);
942 } 949 }
943 } 950 }
944} 951}
@@ -1044,7 +1051,7 @@ error:
1044 */ 1051 */
1045static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { 1052static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1046 [ICMP_ECHOREPLY] = { 1053 [ICMP_ECHOREPLY] = {
1047 .handler = icmp_discard, 1054 .handler = ping_rcv,
1048 }, 1055 },
1049 [1] = { 1056 [1] = {
1050 .handler = icmp_discard, 1057 .handler = icmp_discard,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8f62d66d0857..672e476c8c8a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -303,6 +303,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
303 struct iphdr *pip; 303 struct iphdr *pip;
304 struct igmpv3_report *pig; 304 struct igmpv3_report *pig;
305 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
306 struct flowi4 fl4;
306 307
307 while (1) { 308 while (1) {
308 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), 309 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
@@ -315,18 +316,13 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
315 } 316 }
316 igmp_skb_size(skb) = size; 317 igmp_skb_size(skb) = size;
317 318
318 rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, 319 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
319 0, 0, 320 0, 0,
320 IPPROTO_IGMP, 0, dev->ifindex); 321 IPPROTO_IGMP, 0, dev->ifindex);
321 if (IS_ERR(rt)) { 322 if (IS_ERR(rt)) {
322 kfree_skb(skb); 323 kfree_skb(skb);
323 return NULL; 324 return NULL;
324 } 325 }
325 if (rt->rt_src == 0) {
326 kfree_skb(skb);
327 ip_rt_put(rt);
328 return NULL;
329 }
330 326
331 skb_dst_set(skb, &rt->dst); 327 skb_dst_set(skb, &rt->dst);
332 skb->dev = dev; 328 skb->dev = dev;
@@ -342,8 +338,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
342 pip->tos = 0xc0; 338 pip->tos = 0xc0;
343 pip->frag_off = htons(IP_DF); 339 pip->frag_off = htons(IP_DF);
344 pip->ttl = 1; 340 pip->ttl = 1;
345 pip->daddr = rt->rt_dst; 341 pip->daddr = fl4.daddr;
346 pip->saddr = rt->rt_src; 342 pip->saddr = fl4.saddr;
347 pip->protocol = IPPROTO_IGMP; 343 pip->protocol = IPPROTO_IGMP;
348 pip->tot_len = 0; /* filled in later */ 344 pip->tot_len = 0; /* filled in later */
349 ip_select_ident(pip, &rt->dst, NULL); 345 ip_select_ident(pip, &rt->dst, NULL);
@@ -649,6 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
649 struct net_device *dev = in_dev->dev; 645 struct net_device *dev = in_dev->dev;
650 struct net *net = dev_net(dev); 646 struct net *net = dev_net(dev);
651 __be32 group = pmc ? pmc->multiaddr : 0; 647 __be32 group = pmc ? pmc->multiaddr : 0;
648 struct flowi4 fl4;
652 __be32 dst; 649 __be32 dst;
653 650
654 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) 651 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
@@ -658,17 +655,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
658 else 655 else
659 dst = group; 656 dst = group;
660 657
661 rt = ip_route_output_ports(net, NULL, dst, 0, 658 rt = ip_route_output_ports(net, &fl4, NULL, dst, 0,
662 0, 0, 659 0, 0,
663 IPPROTO_IGMP, 0, dev->ifindex); 660 IPPROTO_IGMP, 0, dev->ifindex);
664 if (IS_ERR(rt)) 661 if (IS_ERR(rt))
665 return -1; 662 return -1;
666 663
667 if (rt->rt_src == 0) {
668 ip_rt_put(rt);
669 return -1;
670 }
671
672 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 664 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
673 if (skb == NULL) { 665 if (skb == NULL) {
674 ip_rt_put(rt); 666 ip_rt_put(rt);
@@ -689,7 +681,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
689 iph->frag_off = htons(IP_DF); 681 iph->frag_off = htons(IP_DF);
690 iph->ttl = 1; 682 iph->ttl = 1;
691 iph->daddr = dst; 683 iph->daddr = dst;
692 iph->saddr = rt->rt_src; 684 iph->saddr = fl4.saddr;
693 iph->protocol = IPPROTO_IGMP; 685 iph->protocol = IPPROTO_IGMP;
694 ip_select_ident(iph, &rt->dst, NULL); 686 ip_select_ident(iph, &rt->dst, NULL);
695 ((u8*)&iph[1])[0] = IPOPT_RA; 687 ((u8*)&iph[1])[0] = IPOPT_RA;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 38f23e721b80..61fac4cabc78 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -350,30 +350,24 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
350EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 350EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
351 351
352struct dst_entry *inet_csk_route_req(struct sock *sk, 352struct dst_entry *inet_csk_route_req(struct sock *sk,
353 struct flowi4 *fl4,
353 const struct request_sock *req) 354 const struct request_sock *req)
354{ 355{
355 struct rtable *rt; 356 struct rtable *rt;
356 const struct inet_request_sock *ireq = inet_rsk(req); 357 const struct inet_request_sock *ireq = inet_rsk(req);
357 struct ip_options *opt = inet_rsk(req)->opt; 358 struct ip_options_rcu *opt = inet_rsk(req)->opt;
358 struct flowi4 fl4 = {
359 .flowi4_oif = sk->sk_bound_dev_if,
360 .flowi4_mark = sk->sk_mark,
361 .daddr = ((opt && opt->srr) ?
362 opt->faddr : ireq->rmt_addr),
363 .saddr = ireq->loc_addr,
364 .flowi4_tos = RT_CONN_FLAGS(sk),
365 .flowi4_proto = sk->sk_protocol,
366 .flowi4_flags = inet_sk_flowi_flags(sk),
367 .fl4_sport = inet_sk(sk)->inet_sport,
368 .fl4_dport = ireq->rmt_port,
369 };
370 struct net *net = sock_net(sk); 359 struct net *net = sock_net(sk);
371 360
372 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 361 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
373 rt = ip_route_output_flow(net, &fl4, sk); 362 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
363 sk->sk_protocol, inet_sk_flowi_flags(sk),
364 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
365 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
366 security_req_classify_flow(req, flowi4_to_flowi(fl4));
367 rt = ip_route_output_flow(net, fl4, sk);
374 if (IS_ERR(rt)) 368 if (IS_ERR(rt))
375 goto no_route; 369 goto no_route;
376 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 370 if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
377 goto route_err; 371 goto route_err;
378 return &rt->dst; 372 return &rt->dst;
379 373
@@ -385,6 +379,39 @@ no_route:
385} 379}
386EXPORT_SYMBOL_GPL(inet_csk_route_req); 380EXPORT_SYMBOL_GPL(inet_csk_route_req);
387 381
382struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
383 struct sock *newsk,
384 const struct request_sock *req)
385{
386 const struct inet_request_sock *ireq = inet_rsk(req);
387 struct inet_sock *newinet = inet_sk(newsk);
388 struct ip_options_rcu *opt = ireq->opt;
389 struct net *net = sock_net(sk);
390 struct flowi4 *fl4;
391 struct rtable *rt;
392
393 fl4 = &newinet->cork.fl.u.ip4;
394 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
395 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
396 sk->sk_protocol, inet_sk_flowi_flags(sk),
397 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
398 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
399 security_req_classify_flow(req, flowi4_to_flowi(fl4));
400 rt = ip_route_output_flow(net, fl4, sk);
401 if (IS_ERR(rt))
402 goto no_route;
403 if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
404 goto route_err;
405 return &rt->dst;
406
407route_err:
408 ip_rt_put(rt);
409no_route:
410 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
411 return NULL;
412}
413EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
414
388static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 415static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
389 const u32 rnd, const u32 synq_hsize) 416 const u32 rnd, const u32 synq_hsize)
390{ 417{
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada17129fce..6ffe94ca5bc9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -124,7 +124,7 @@ static int inet_csk_diag_fill(struct sock *sk,
124 124
125#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 125#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
126 if (r->idiag_family == AF_INET6) { 126 if (r->idiag_family == AF_INET6) {
127 struct ipv6_pinfo *np = inet6_sk(sk); 127 const struct ipv6_pinfo *np = inet6_sk(sk);
128 128
129 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 129 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
130 &np->rcv_saddr); 130 &np->rcv_saddr);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 47038cb6c138..85a0f75dae64 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -51,8 +51,8 @@ MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
51 * Basic tcp checks whether packet is suitable for LRO 51 * Basic tcp checks whether packet is suitable for LRO
52 */ 52 */
53 53
54static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, 54static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
55 int len, struct net_lro_desc *lro_desc) 55 int len, const struct net_lro_desc *lro_desc)
56{ 56{
57 /* check ip header: don't aggregate padded frames */ 57 /* check ip header: don't aggregate padded frames */
58 if (ntohs(iph->tot_len) != len) 58 if (ntohs(iph->tot_len) != len)
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 99461f09320f..3b34d1c86270 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb)
84 84
85 rt = skb_rtable(skb); 85 rt = skb_rtable(skb);
86 86
87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 87 if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway)
88 goto sr_failed; 88 goto sr_failed;
89 89
90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && 90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b1d282f11be7..0ad6035f6366 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -77,22 +77,40 @@ struct ipq {
77 struct inet_peer *peer; 77 struct inet_peer *peer;
78}; 78};
79 79
80#define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */ 80/* RFC 3168 support :
81#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */ 81 * We want to check ECN values of all fragments, do detect invalid combinations.
82 * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
83 */
84#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
85#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
86#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
87#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
82 88
83static inline u8 ip4_frag_ecn(u8 tos) 89static inline u8 ip4_frag_ecn(u8 tos)
84{ 90{
85 tos = (tos & INET_ECN_MASK) + 1; 91 return 1 << (tos & INET_ECN_MASK);
86 /*
87 * After the last operation we have (in binary):
88 * INET_ECN_NOT_ECT => 001
89 * INET_ECN_ECT_1 => 010
90 * INET_ECN_ECT_0 => 011
91 * INET_ECN_CE => 100
92 */
93 return (tos & 2) ? 0 : tos;
94} 92}
95 93
94/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
95 * Value : 0xff if frame should be dropped.
96 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
97 */
98static const u8 ip4_frag_ecn_table[16] = {
99 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
100 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
101 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
102 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
103
104 /* invalid combinations : drop frame */
105 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
106 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
107 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
108 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
109 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
110 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
111 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
112};
113
96static struct inet_frags ip4_frags; 114static struct inet_frags ip4_frags;
97 115
98int ip_frag_nqueues(struct net *net) 116int ip_frag_nqueues(struct net *net)
@@ -524,9 +542,15 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
524 int len; 542 int len;
525 int ihlen; 543 int ihlen;
526 int err; 544 int err;
545 u8 ecn;
527 546
528 ipq_kill(qp); 547 ipq_kill(qp);
529 548
549 ecn = ip4_frag_ecn_table[qp->ecn];
550 if (unlikely(ecn == 0xff)) {
551 err = -EINVAL;
552 goto out_fail;
553 }
530 /* Make the one we just received the head. */ 554 /* Make the one we just received the head. */
531 if (prev) { 555 if (prev) {
532 head = prev->next; 556 head = prev->next;
@@ -605,17 +629,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
605 iph = ip_hdr(head); 629 iph = ip_hdr(head);
606 iph->frag_off = 0; 630 iph->frag_off = 0;
607 iph->tot_len = htons(len); 631 iph->tot_len = htons(len);
608 /* RFC3168 5.3 Fragmentation support 632 iph->tos |= ecn;
609 * If one fragment had INET_ECN_NOT_ECT,
610 * reassembled frame also has INET_ECN_NOT_ECT
611 * Elif one fragment had INET_ECN_CE
612 * reassembled frame also has INET_ECN_CE
613 */
614 if (qp->ecn & IPFRAG_ECN_CLEAR)
615 iph->tos &= ~INET_ECN_MASK;
616 else if (qp->ecn & IPFRAG_ECN_SET_CE)
617 iph->tos |= INET_ECN_CE;
618
619 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 633 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
620 qp->q.fragments = NULL; 634 qp->q.fragments = NULL;
621 qp->q.fragments_tail = NULL; 635 qp->q.fragments_tail = NULL;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index da5941f18c3c..8871067560db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -413,11 +413,6 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
413 413
414 dev_net_set(dev, net); 414 dev_net_set(dev, net);
415 415
416 if (strchr(name, '%')) {
417 if (dev_alloc_name(dev, name) < 0)
418 goto failed_free;
419 }
420
421 nt = netdev_priv(dev); 416 nt = netdev_priv(dev);
422 nt->parms = *parms; 417 nt->parms = *parms;
423 dev->rtnl_link_ops = &ipgre_link_ops; 418 dev->rtnl_link_ops = &ipgre_link_ops;
@@ -462,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
462 by themself??? 457 by themself???
463 */ 458 */
464 459
465 struct iphdr *iph = (struct iphdr *)skb->data; 460 const struct iphdr *iph = (const struct iphdr *)skb->data;
466 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 461 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
467 int grehlen = (iph->ihl<<2) + 4; 462 int grehlen = (iph->ihl<<2) + 4;
468 const int type = icmp_hdr(skb)->type; 463 const int type = icmp_hdr(skb)->type;
@@ -534,7 +529,7 @@ out:
534 rcu_read_unlock(); 529 rcu_read_unlock();
535} 530}
536 531
537static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 532static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
538{ 533{
539 if (INET_ECN_is_ce(iph->tos)) { 534 if (INET_ECN_is_ce(iph->tos)) {
540 if (skb->protocol == htons(ETH_P_IP)) { 535 if (skb->protocol == htons(ETH_P_IP)) {
@@ -546,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
546} 541}
547 542
548static inline u8 543static inline u8
549ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 544ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
550{ 545{
551 u8 inner = 0; 546 u8 inner = 0;
552 if (skb->protocol == htons(ETH_P_IP)) 547 if (skb->protocol == htons(ETH_P_IP))
553 inner = old_iph->tos; 548 inner = old_iph->tos;
554 else if (skb->protocol == htons(ETH_P_IPV6)) 549 else if (skb->protocol == htons(ETH_P_IPV6))
555 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 550 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
556 return INET_ECN_encapsulate(tos, inner); 551 return INET_ECN_encapsulate(tos, inner);
557} 552}
558 553
559static int ipgre_rcv(struct sk_buff *skb) 554static int ipgre_rcv(struct sk_buff *skb)
560{ 555{
561 struct iphdr *iph; 556 const struct iphdr *iph;
562 u8 *h; 557 u8 *h;
563 __be16 flags; 558 __be16 flags;
564 __sum16 csum = 0; 559 __sum16 csum = 0;
@@ -697,8 +692,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
697{ 692{
698 struct ip_tunnel *tunnel = netdev_priv(dev); 693 struct ip_tunnel *tunnel = netdev_priv(dev);
699 struct pcpu_tstats *tstats; 694 struct pcpu_tstats *tstats;
700 struct iphdr *old_iph = ip_hdr(skb); 695 const struct iphdr *old_iph = ip_hdr(skb);
701 struct iphdr *tiph; 696 const struct iphdr *tiph;
697 struct flowi4 fl4;
702 u8 tos; 698 u8 tos;
703 __be16 df; 699 __be16 df;
704 struct rtable *rt; /* Route to the other host */ 700 struct rtable *rt; /* Route to the other host */
@@ -714,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
714 710
715 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 711 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
716 gre_hlen = 0; 712 gre_hlen = 0;
717 tiph = (struct iphdr *)skb->data; 713 tiph = (const struct iphdr *)skb->data;
718 } else { 714 } else {
719 gre_hlen = tunnel->hlen; 715 gre_hlen = tunnel->hlen;
720 tiph = &tunnel->parms.iph; 716 tiph = &tunnel->parms.iph;
@@ -735,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
735 } 731 }
736#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
737 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
738 struct in6_addr *addr6; 734 const struct in6_addr *addr6;
739 int addr_type; 735 int addr_type;
740 struct neighbour *neigh = skb_dst(skb)->neighbour; 736 struct neighbour *neigh = skb_dst(skb)->neighbour;
741 737
742 if (neigh == NULL) 738 if (neigh == NULL)
743 goto tx_error; 739 goto tx_error;
744 740
745 addr6 = (struct in6_addr *)&neigh->primary_key; 741 addr6 = (const struct in6_addr *)&neigh->primary_key;
746 addr_type = ipv6_addr_type(addr6); 742 addr_type = ipv6_addr_type(addr6);
747 743
748 if (addr_type == IPV6_ADDR_ANY) { 744 if (addr_type == IPV6_ADDR_ANY) {
@@ -766,10 +762,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
766 if (skb->protocol == htons(ETH_P_IP)) 762 if (skb->protocol == htons(ETH_P_IP))
767 tos = old_iph->tos; 763 tos = old_iph->tos;
768 else if (skb->protocol == htons(ETH_P_IPV6)) 764 else if (skb->protocol == htons(ETH_P_IPV6))
769 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 765 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
770 } 766 }
771 767
772 rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, 768 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
773 tunnel->parms.o_key, RT_TOS(tos), 769 tunnel->parms.o_key, RT_TOS(tos),
774 tunnel->parms.link); 770 tunnel->parms.link);
775 if (IS_ERR(rt)) { 771 if (IS_ERR(rt)) {
@@ -873,15 +869,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
873 iph->frag_off = df; 869 iph->frag_off = df;
874 iph->protocol = IPPROTO_GRE; 870 iph->protocol = IPPROTO_GRE;
875 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 871 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
876 iph->daddr = rt->rt_dst; 872 iph->daddr = fl4.daddr;
877 iph->saddr = rt->rt_src; 873 iph->saddr = fl4.saddr;
878 874
879 if ((iph->ttl = tiph->ttl) == 0) { 875 if ((iph->ttl = tiph->ttl) == 0) {
880 if (skb->protocol == htons(ETH_P_IP)) 876 if (skb->protocol == htons(ETH_P_IP))
881 iph->ttl = old_iph->ttl; 877 iph->ttl = old_iph->ttl;
882#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 878#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
883 else if (skb->protocol == htons(ETH_P_IPV6)) 879 else if (skb->protocol == htons(ETH_P_IPV6))
884 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 880 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
885#endif 881#endif
886 else 882 else
887 iph->ttl = ip4_dst_hoplimit(&rt->dst); 883 iph->ttl = ip4_dst_hoplimit(&rt->dst);
@@ -927,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
927{ 923{
928 struct net_device *tdev = NULL; 924 struct net_device *tdev = NULL;
929 struct ip_tunnel *tunnel; 925 struct ip_tunnel *tunnel;
930 struct iphdr *iph; 926 const struct iphdr *iph;
931 int hlen = LL_MAX_HEADER; 927 int hlen = LL_MAX_HEADER;
932 int mtu = ETH_DATA_LEN; 928 int mtu = ETH_DATA_LEN;
933 int addend = sizeof(struct iphdr) + 4; 929 int addend = sizeof(struct iphdr) + 4;
@@ -938,12 +934,14 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
938 /* Guess output device to choose reasonable mtu and needed_headroom */ 934 /* Guess output device to choose reasonable mtu and needed_headroom */
939 935
940 if (iph->daddr) { 936 if (iph->daddr) {
941 struct rtable *rt = ip_route_output_gre(dev_net(dev), 937 struct flowi4 fl4;
942 iph->daddr, iph->saddr, 938 struct rtable *rt;
943 tunnel->parms.o_key, 939
944 RT_TOS(iph->tos), 940 rt = ip_route_output_gre(dev_net(dev), &fl4,
945 tunnel->parms.link); 941 iph->daddr, iph->saddr,
946 942 tunnel->parms.o_key,
943 RT_TOS(iph->tos),
944 tunnel->parms.link);
947 if (!IS_ERR(rt)) { 945 if (!IS_ERR(rt)) {
948 tdev = rt->dst.dev; 946 tdev = rt->dst.dev;
949 ip_rt_put(rt); 947 ip_rt_put(rt);
@@ -1180,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1180 1178
1181static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1179static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1182{ 1180{
1183 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); 1181 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1184 memcpy(haddr, &iph->saddr, 4); 1182 memcpy(haddr, &iph->saddr, 4);
1185 return 4; 1183 return 4;
1186} 1184}
@@ -1196,13 +1194,15 @@ static int ipgre_open(struct net_device *dev)
1196 struct ip_tunnel *t = netdev_priv(dev); 1194 struct ip_tunnel *t = netdev_priv(dev);
1197 1195
1198 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1196 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1199 struct rtable *rt = ip_route_output_gre(dev_net(dev), 1197 struct flowi4 fl4;
1200 t->parms.iph.daddr, 1198 struct rtable *rt;
1201 t->parms.iph.saddr, 1199
1202 t->parms.o_key, 1200 rt = ip_route_output_gre(dev_net(dev), &fl4,
1203 RT_TOS(t->parms.iph.tos), 1201 t->parms.iph.daddr,
1204 t->parms.link); 1202 t->parms.iph.saddr,
1205 1203 t->parms.o_key,
1204 RT_TOS(t->parms.iph.tos),
1205 t->parms.link);
1206 if (IS_ERR(rt)) 1206 if (IS_ERR(rt))
1207 return -EADDRNOTAVAIL; 1207 return -EADDRNOTAVAIL;
1208 dev = rt->dst.dev; 1208 dev = rt->dst.dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d7b2b0987a3b..c8f48efc5fd3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb)
268static inline int ip_rcv_options(struct sk_buff *skb) 268static inline int ip_rcv_options(struct sk_buff *skb)
269{ 269{
270 struct ip_options *opt; 270 struct ip_options *opt;
271 struct iphdr *iph; 271 const struct iphdr *iph;
272 struct net_device *dev = skb->dev; 272 struct net_device *dev = skb->dev;
273 273
274 /* It looks as overkill, because not all 274 /* It looks as overkill, because not all
@@ -374,7 +374,7 @@ drop:
374 */ 374 */
375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
376{ 376{
377 struct iphdr *iph; 377 const struct iphdr *iph;
378 u32 len; 378 u32 len;
379 379
380 /* When the interface is in promisc. mode, drop all the crap 380 /* When the interface is in promisc. mode, drop all the crap
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2391b24e8251..c3118e1cd3bb 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -36,8 +36,8 @@
36 * saddr is address of outgoing interface. 36 * saddr is address of outgoing interface.
37 */ 37 */
38 38
39void ip_options_build(struct sk_buff * skb, struct ip_options * opt, 39void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
40 __be32 daddr, struct rtable *rt, int is_frag) 40 __be32 daddr, struct rtable *rt, int is_frag)
41{ 41{
42 unsigned char *iph = skb_network_header(skb); 42 unsigned char *iph = skb_network_header(skb);
43 43
@@ -50,9 +50,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
50 50
51 if (!is_frag) { 51 if (!is_frag) {
52 if (opt->rr_needaddr) 52 if (opt->rr_needaddr)
53 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); 53 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt);
54 if (opt->ts_needaddr) 54 if (opt->ts_needaddr)
55 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); 55 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
56 if (opt->ts_needtime) { 56 if (opt->ts_needtime) {
57 struct timespec tv; 57 struct timespec tv;
58 __be32 midtime; 58 __be32 midtime;
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
83 * NOTE: dopt cannot point to skb. 83 * NOTE: dopt cannot point to skb.
84 */ 84 */
85 85
86int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 86int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
87{ 87{
88 struct ip_options *sopt; 88 const struct ip_options *sopt;
89 unsigned char *sptr, *dptr; 89 unsigned char *sptr, *dptr;
90 int soffset, doffset; 90 int soffset, doffset;
91 int optlen; 91 int optlen;
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
95 95
96 sopt = &(IPCB(skb)->opt); 96 sopt = &(IPCB(skb)->opt);
97 97
98 if (sopt->optlen == 0) { 98 if (sopt->optlen == 0)
99 dopt->optlen = 0;
100 return 0; 99 return 0;
101 }
102 100
103 sptr = skb_network_header(skb); 101 sptr = skb_network_header(skb);
104 dptr = dopt->__data; 102 dptr = dopt->__data;
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
157 dopt->optlen += optlen; 155 dopt->optlen += optlen;
158 } 156 }
159 if (sopt->srr) { 157 if (sopt->srr) {
160 unsigned char * start = sptr+sopt->srr; 158 unsigned char *start = sptr+sopt->srr;
161 __be32 faddr; 159 __be32 faddr;
162 160
163 optlen = start[1]; 161 optlen = start[1];
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt)
499 } 497 }
500} 498}
501 499
502static struct ip_options *ip_options_get_alloc(const int optlen) 500static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
503{ 501{
504 return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), 502 return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
505 GFP_KERNEL); 503 GFP_KERNEL);
506} 504}
507 505
508static int ip_options_get_finish(struct net *net, struct ip_options **optp, 506static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
509 struct ip_options *opt, int optlen) 507 struct ip_options_rcu *opt, int optlen)
510{ 508{
511 while (optlen & 3) 509 while (optlen & 3)
512 opt->__data[optlen++] = IPOPT_END; 510 opt->opt.__data[optlen++] = IPOPT_END;
513 opt->optlen = optlen; 511 opt->opt.optlen = optlen;
514 if (optlen && ip_options_compile(net, opt, NULL)) { 512 if (optlen && ip_options_compile(net, &opt->opt, NULL)) {
515 kfree(opt); 513 kfree(opt);
516 return -EINVAL; 514 return -EINVAL;
517 } 515 }
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp,
520 return 0; 518 return 0;
521} 519}
522 520
523int ip_options_get_from_user(struct net *net, struct ip_options **optp, 521int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
524 unsigned char __user *data, int optlen) 522 unsigned char __user *data, int optlen)
525{ 523{
526 struct ip_options *opt = ip_options_get_alloc(optlen); 524 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
527 525
528 if (!opt) 526 if (!opt)
529 return -ENOMEM; 527 return -ENOMEM;
530 if (optlen && copy_from_user(opt->__data, data, optlen)) { 528 if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
531 kfree(opt); 529 kfree(opt);
532 return -EFAULT; 530 return -EFAULT;
533 } 531 }
534 return ip_options_get_finish(net, optp, opt, optlen); 532 return ip_options_get_finish(net, optp, opt, optlen);
535} 533}
536 534
537int ip_options_get(struct net *net, struct ip_options **optp, 535int ip_options_get(struct net *net, struct ip_options_rcu **optp,
538 unsigned char *data, int optlen) 536 unsigned char *data, int optlen)
539{ 537{
540 struct ip_options *opt = ip_options_get_alloc(optlen); 538 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
541 539
542 if (!opt) 540 if (!opt)
543 return -ENOMEM; 541 return -ENOMEM;
544 if (optlen) 542 if (optlen)
545 memcpy(opt->__data, data, optlen); 543 memcpy(opt->opt.__data, data, optlen);
546 return ip_options_get_finish(net, optp, opt, optlen); 544 return ip_options_get_finish(net, optp, opt, optlen);
547} 545}
548 546
@@ -555,7 +553,7 @@ void ip_forward_options(struct sk_buff *skb)
555 553
556 if (opt->rr_needaddr) { 554 if (opt->rr_needaddr) {
557 optptr = (unsigned char *)raw + opt->rr; 555 optptr = (unsigned char *)raw + opt->rr;
558 ip_rt_get_source(&optptr[optptr[2]-5], rt); 556 ip_rt_get_source(&optptr[optptr[2]-5], skb, rt);
559 opt->is_changed = 1; 557 opt->is_changed = 1;
560 } 558 }
561 if (opt->srr_is_hit) { 559 if (opt->srr_is_hit) {
@@ -569,19 +567,18 @@ void ip_forward_options(struct sk_buff *skb)
569 ) { 567 ) {
570 if (srrptr + 3 > srrspace) 568 if (srrptr + 3 > srrspace)
571 break; 569 break;
572 if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) 570 if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0)
573 break; 571 break;
574 } 572 }
575 if (srrptr + 3 <= srrspace) { 573 if (srrptr + 3 <= srrspace) {
576 opt->is_changed = 1; 574 opt->is_changed = 1;
577 ip_rt_get_source(&optptr[srrptr-1], rt); 575 ip_rt_get_source(&optptr[srrptr-1], skb, rt);
578 ip_hdr(skb)->daddr = rt->rt_dst;
579 optptr[2] = srrptr+4; 576 optptr[2] = srrptr+4;
580 } else if (net_ratelimit()) 577 } else if (net_ratelimit())
581 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); 578 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
582 if (opt->ts_needaddr) { 579 if (opt->ts_needaddr) {
583 optptr = raw + opt->ts; 580 optptr = raw + opt->ts;
584 ip_rt_get_source(&optptr[optptr[2]-9], rt); 581 ip_rt_get_source(&optptr[optptr[2]-9], skb, rt);
585 opt->is_changed = 1; 582 opt->is_changed = 1;
586 } 583 }
587 } 584 }
@@ -603,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
603 unsigned long orefdst; 600 unsigned long orefdst;
604 int err; 601 int err;
605 602
606 if (!opt->srr || !rt) 603 if (!rt)
607 return 0; 604 return 0;
608 605
609 if (skb->pkt_type != PACKET_HOST) 606 if (skb->pkt_type != PACKET_HOST)
@@ -637,7 +634,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
637 if (rt2->rt_type != RTN_LOCAL) 634 if (rt2->rt_type != RTN_LOCAL)
638 break; 635 break;
639 /* Superfast 8) loopback forward */ 636 /* Superfast 8) loopback forward */
640 memcpy(&iph->daddr, &optptr[srrptr-1], 4); 637 iph->daddr = nexthop;
641 opt->is_changed = 1; 638 opt->is_changed = 1;
642 } 639 }
643 if (srrptr <= srrspace) { 640 if (srrptr <= srrspace) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 459c011b1d4a..98af3697c718 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
140 * 140 *
141 */ 141 */
142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 __be32 saddr, __be32 daddr, struct ip_options *opt) 143 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
144{ 144{
145 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
146 struct rtable *rt = skb_rtable(skb); 146 struct rtable *rt = skb_rtable(skb);
147 struct iphdr *iph; 147 struct iphdr *iph;
148 148
149 /* Build the IP header. */ 149 /* Build the IP header. */
150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
151 skb_reset_network_header(skb); 151 skb_reset_network_header(skb);
152 iph = ip_hdr(skb); 152 iph = ip_hdr(skb);
153 iph->version = 4; 153 iph->version = 4;
@@ -158,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
158 else 158 else
159 iph->frag_off = 0; 159 iph->frag_off = 0;
160 iph->ttl = ip_select_ttl(inet, &rt->dst); 160 iph->ttl = ip_select_ttl(inet, &rt->dst);
161 iph->daddr = rt->rt_dst; 161 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
162 iph->saddr = rt->rt_src; 162 iph->saddr = saddr;
163 iph->protocol = sk->sk_protocol; 163 iph->protocol = sk->sk_protocol;
164 ip_select_ident(iph, &rt->dst, sk); 164 ip_select_ident(iph, &rt->dst, sk);
165 165
166 if (opt && opt->optlen) { 166 if (opt && opt->opt.optlen) {
167 iph->ihl += opt->optlen>>2; 167 iph->ihl += opt->opt.optlen>>2;
168 ip_options_build(skb, opt, daddr, rt, 0); 168 ip_options_build(skb, &opt->opt, daddr, rt, 0);
169 } 169 }
170 170
171 skb->priority = sk->sk_priority; 171 skb->priority = sk->sk_priority;
@@ -312,11 +312,12 @@ int ip_output(struct sk_buff *skb)
312 !(IPCB(skb)->flags & IPSKB_REROUTED)); 312 !(IPCB(skb)->flags & IPSKB_REROUTED));
313} 313}
314 314
315int ip_queue_xmit(struct sk_buff *skb) 315int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
316{ 316{
317 struct sock *sk = skb->sk; 317 struct sock *sk = skb->sk;
318 struct inet_sock *inet = inet_sk(sk); 318 struct inet_sock *inet = inet_sk(sk);
319 struct ip_options *opt = inet->opt; 319 struct ip_options_rcu *inet_opt;
320 struct flowi4 *fl4;
320 struct rtable *rt; 321 struct rtable *rt;
321 struct iphdr *iph; 322 struct iphdr *iph;
322 int res; 323 int res;
@@ -325,6 +326,8 @@ int ip_queue_xmit(struct sk_buff *skb)
325 * f.e. by something like SCTP. 326 * f.e. by something like SCTP.
326 */ 327 */
327 rcu_read_lock(); 328 rcu_read_lock();
329 inet_opt = rcu_dereference(inet->inet_opt);
330 fl4 = &fl->u.ip4;
328 rt = skb_rtable(skb); 331 rt = skb_rtable(skb);
329 if (rt != NULL) 332 if (rt != NULL)
330 goto packet_routed; 333 goto packet_routed;
@@ -336,14 +339,14 @@ int ip_queue_xmit(struct sk_buff *skb)
336 339
337 /* Use correct destination address if we have options. */ 340 /* Use correct destination address if we have options. */
338 daddr = inet->inet_daddr; 341 daddr = inet->inet_daddr;
339 if(opt && opt->srr) 342 if (inet_opt && inet_opt->opt.srr)
340 daddr = opt->faddr; 343 daddr = inet_opt->opt.faddr;
341 344
342 /* If this fails, retransmit mechanism of transport layer will 345 /* If this fails, retransmit mechanism of transport layer will
343 * keep trying until route appears or the connection times 346 * keep trying until route appears or the connection times
344 * itself out. 347 * itself out.
345 */ 348 */
346 rt = ip_route_output_ports(sock_net(sk), sk, 349 rt = ip_route_output_ports(sock_net(sk), fl4, sk,
347 daddr, inet->inet_saddr, 350 daddr, inet->inet_saddr,
348 inet->inet_dport, 351 inet->inet_dport,
349 inet->inet_sport, 352 inet->inet_sport,
@@ -357,11 +360,11 @@ int ip_queue_xmit(struct sk_buff *skb)
357 skb_dst_set_noref(skb, &rt->dst); 360 skb_dst_set_noref(skb, &rt->dst);
358 361
359packet_routed: 362packet_routed:
360 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 363 if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
361 goto no_route; 364 goto no_route;
362 365
363 /* OK, we know where to send it, allocate and build IP header. */ 366 /* OK, we know where to send it, allocate and build IP header. */
364 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 367 skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
365 skb_reset_network_header(skb); 368 skb_reset_network_header(skb);
366 iph = ip_hdr(skb); 369 iph = ip_hdr(skb);
367 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 370 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
@@ -371,13 +374,13 @@ packet_routed:
371 iph->frag_off = 0; 374 iph->frag_off = 0;
372 iph->ttl = ip_select_ttl(inet, &rt->dst); 375 iph->ttl = ip_select_ttl(inet, &rt->dst);
373 iph->protocol = sk->sk_protocol; 376 iph->protocol = sk->sk_protocol;
374 iph->saddr = rt->rt_src; 377 iph->saddr = fl4->saddr;
375 iph->daddr = rt->rt_dst; 378 iph->daddr = fl4->daddr;
376 /* Transport layer set skb->h.foo itself. */ 379 /* Transport layer set skb->h.foo itself. */
377 380
378 if (opt && opt->optlen) { 381 if (inet_opt && inet_opt->opt.optlen) {
379 iph->ihl += opt->optlen >> 2; 382 iph->ihl += inet_opt->opt.optlen >> 2;
380 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 383 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
381 } 384 }
382 385
383 ip_select_ident_more(iph, &rt->dst, sk, 386 ip_select_ident_more(iph, &rt->dst, sk,
@@ -773,7 +776,9 @@ static inline int ip_ufo_append_data(struct sock *sk,
773 (length - transhdrlen)); 776 (length - transhdrlen));
774} 777}
775 778
776static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, 779static int __ip_append_data(struct sock *sk,
780 struct flowi4 *fl4,
781 struct sk_buff_head *queue,
777 struct inet_cork *cork, 782 struct inet_cork *cork,
778 int getfrag(void *from, char *to, int offset, 783 int getfrag(void *from, char *to, int offset,
779 int len, int odd, struct sk_buff *skb), 784 int len, int odd, struct sk_buff *skb),
@@ -805,7 +810,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue,
805 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 810 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
806 811
807 if (cork->length + length > 0xFFFF - fragheaderlen) { 812 if (cork->length + length > 0xFFFF - fragheaderlen) {
808 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 813 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
809 mtu-exthdrlen); 814 mtu-exthdrlen);
810 return -EMSGSIZE; 815 return -EMSGSIZE;
811 } 816 }
@@ -1033,7 +1038,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1033 struct ipcm_cookie *ipc, struct rtable **rtp) 1038 struct ipcm_cookie *ipc, struct rtable **rtp)
1034{ 1039{
1035 struct inet_sock *inet = inet_sk(sk); 1040 struct inet_sock *inet = inet_sk(sk);
1036 struct ip_options *opt; 1041 struct ip_options_rcu *opt;
1037 struct rtable *rt; 1042 struct rtable *rt;
1038 1043
1039 /* 1044 /*
@@ -1047,7 +1052,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1047 if (unlikely(cork->opt == NULL)) 1052 if (unlikely(cork->opt == NULL))
1048 return -ENOBUFS; 1053 return -ENOBUFS;
1049 } 1054 }
1050 memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); 1055 memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
1051 cork->flags |= IPCORK_OPT; 1056 cork->flags |= IPCORK_OPT;
1052 cork->addr = ipc->addr; 1057 cork->addr = ipc->addr;
1053 } 1058 }
@@ -1080,7 +1085,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1080 * 1085 *
1081 * LATER: length must be adjusted by pad at tail, when it is required. 1086 * LATER: length must be adjusted by pad at tail, when it is required.
1082 */ 1087 */
1083int ip_append_data(struct sock *sk, 1088int ip_append_data(struct sock *sk, struct flowi4 *fl4,
1084 int getfrag(void *from, char *to, int offset, int len, 1089 int getfrag(void *from, char *to, int offset, int len,
1085 int odd, struct sk_buff *skb), 1090 int odd, struct sk_buff *skb),
1086 void *from, int length, int transhdrlen, 1091 void *from, int length, int transhdrlen,
@@ -1094,24 +1099,25 @@ int ip_append_data(struct sock *sk,
1094 return 0; 1099 return 0;
1095 1100
1096 if (skb_queue_empty(&sk->sk_write_queue)) { 1101 if (skb_queue_empty(&sk->sk_write_queue)) {
1097 err = ip_setup_cork(sk, &inet->cork, ipc, rtp); 1102 err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp);
1098 if (err) 1103 if (err)
1099 return err; 1104 return err;
1100 } else { 1105 } else {
1101 transhdrlen = 0; 1106 transhdrlen = 0;
1102 } 1107 }
1103 1108
1104 return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, 1109 return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
1105 from, length, transhdrlen, flags); 1110 from, length, transhdrlen, flags);
1106} 1111}
1107 1112
1108ssize_t ip_append_page(struct sock *sk, struct page *page, 1113ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1109 int offset, size_t size, int flags) 1114 int offset, size_t size, int flags)
1110{ 1115{
1111 struct inet_sock *inet = inet_sk(sk); 1116 struct inet_sock *inet = inet_sk(sk);
1112 struct sk_buff *skb; 1117 struct sk_buff *skb;
1113 struct rtable *rt; 1118 struct rtable *rt;
1114 struct ip_options *opt = NULL; 1119 struct ip_options *opt = NULL;
1120 struct inet_cork *cork;
1115 int hh_len; 1121 int hh_len;
1116 int mtu; 1122 int mtu;
1117 int len; 1123 int len;
@@ -1127,28 +1133,29 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1127 if (skb_queue_empty(&sk->sk_write_queue)) 1133 if (skb_queue_empty(&sk->sk_write_queue))
1128 return -EINVAL; 1134 return -EINVAL;
1129 1135
1130 rt = (struct rtable *)inet->cork.dst; 1136 cork = &inet->cork.base;
1131 if (inet->cork.flags & IPCORK_OPT) 1137 rt = (struct rtable *)cork->dst;
1132 opt = inet->cork.opt; 1138 if (cork->flags & IPCORK_OPT)
1139 opt = cork->opt;
1133 1140
1134 if (!(rt->dst.dev->features&NETIF_F_SG)) 1141 if (!(rt->dst.dev->features&NETIF_F_SG))
1135 return -EOPNOTSUPP; 1142 return -EOPNOTSUPP;
1136 1143
1137 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1144 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1138 mtu = inet->cork.fragsize; 1145 mtu = cork->fragsize;
1139 1146
1140 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1147 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1141 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1148 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1142 1149
1143 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1150 if (cork->length + size > 0xFFFF - fragheaderlen) {
1144 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); 1151 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu);
1145 return -EMSGSIZE; 1152 return -EMSGSIZE;
1146 } 1153 }
1147 1154
1148 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1155 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1149 return -EINVAL; 1156 return -EINVAL;
1150 1157
1151 inet->cork.length += size; 1158 cork->length += size;
1152 if ((size + skb->len > mtu) && 1159 if ((size + skb->len > mtu) &&
1153 (sk->sk_protocol == IPPROTO_UDP) && 1160 (sk->sk_protocol == IPPROTO_UDP) &&
1154 (rt->dst.dev->features & NETIF_F_UFO)) { 1161 (rt->dst.dev->features & NETIF_F_UFO)) {
@@ -1243,7 +1250,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1243 return 0; 1250 return 0;
1244 1251
1245error: 1252error:
1246 inet->cork.length -= size; 1253 cork->length -= size;
1247 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1254 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1248 return err; 1255 return err;
1249} 1256}
@@ -1262,6 +1269,7 @@ static void ip_cork_release(struct inet_cork *cork)
1262 * and push them out. 1269 * and push them out.
1263 */ 1270 */
1264struct sk_buff *__ip_make_skb(struct sock *sk, 1271struct sk_buff *__ip_make_skb(struct sock *sk,
1272 struct flowi4 *fl4,
1265 struct sk_buff_head *queue, 1273 struct sk_buff_head *queue,
1266 struct inet_cork *cork) 1274 struct inet_cork *cork)
1267{ 1275{
@@ -1319,17 +1327,18 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1319 iph = (struct iphdr *)skb->data; 1327 iph = (struct iphdr *)skb->data;
1320 iph->version = 4; 1328 iph->version = 4;
1321 iph->ihl = 5; 1329 iph->ihl = 5;
1322 if (opt) {
1323 iph->ihl += opt->optlen>>2;
1324 ip_options_build(skb, opt, cork->addr, rt, 0);
1325 }
1326 iph->tos = inet->tos; 1330 iph->tos = inet->tos;
1327 iph->frag_off = df; 1331 iph->frag_off = df;
1328 ip_select_ident(iph, &rt->dst, sk); 1332 ip_select_ident(iph, &rt->dst, sk);
1329 iph->ttl = ttl; 1333 iph->ttl = ttl;
1330 iph->protocol = sk->sk_protocol; 1334 iph->protocol = sk->sk_protocol;
1331 iph->saddr = rt->rt_src; 1335 iph->saddr = fl4->saddr;
1332 iph->daddr = rt->rt_dst; 1336 iph->daddr = fl4->daddr;
1337
1338 if (opt) {
1339 iph->ihl += opt->optlen>>2;
1340 ip_options_build(skb, opt, cork->addr, rt, 0);
1341 }
1333 1342
1334 skb->priority = sk->sk_priority; 1343 skb->priority = sk->sk_priority;
1335 skb->mark = sk->sk_mark; 1344 skb->mark = sk->sk_mark;
@@ -1365,11 +1374,11 @@ int ip_send_skb(struct sk_buff *skb)
1365 return err; 1374 return err;
1366} 1375}
1367 1376
1368int ip_push_pending_frames(struct sock *sk) 1377int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
1369{ 1378{
1370 struct sk_buff *skb; 1379 struct sk_buff *skb;
1371 1380
1372 skb = ip_finish_skb(sk); 1381 skb = ip_finish_skb(sk, fl4);
1373 if (!skb) 1382 if (!skb)
1374 return 0; 1383 return 0;
1375 1384
@@ -1394,17 +1403,18 @@ static void __ip_flush_pending_frames(struct sock *sk,
1394 1403
1395void ip_flush_pending_frames(struct sock *sk) 1404void ip_flush_pending_frames(struct sock *sk)
1396{ 1405{
1397 __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); 1406 __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
1398} 1407}
1399 1408
1400struct sk_buff *ip_make_skb(struct sock *sk, 1409struct sk_buff *ip_make_skb(struct sock *sk,
1410 struct flowi4 *fl4,
1401 int getfrag(void *from, char *to, int offset, 1411 int getfrag(void *from, char *to, int offset,
1402 int len, int odd, struct sk_buff *skb), 1412 int len, int odd, struct sk_buff *skb),
1403 void *from, int length, int transhdrlen, 1413 void *from, int length, int transhdrlen,
1404 struct ipcm_cookie *ipc, struct rtable **rtp, 1414 struct ipcm_cookie *ipc, struct rtable **rtp,
1405 unsigned int flags) 1415 unsigned int flags)
1406{ 1416{
1407 struct inet_cork cork = {}; 1417 struct inet_cork cork;
1408 struct sk_buff_head queue; 1418 struct sk_buff_head queue;
1409 int err; 1419 int err;
1410 1420
@@ -1413,18 +1423,21 @@ struct sk_buff *ip_make_skb(struct sock *sk,
1413 1423
1414 __skb_queue_head_init(&queue); 1424 __skb_queue_head_init(&queue);
1415 1425
1426 cork.flags = 0;
1427 cork.addr = 0;
1428 cork.opt = NULL;
1416 err = ip_setup_cork(sk, &cork, ipc, rtp); 1429 err = ip_setup_cork(sk, &cork, ipc, rtp);
1417 if (err) 1430 if (err)
1418 return ERR_PTR(err); 1431 return ERR_PTR(err);
1419 1432
1420 err = __ip_append_data(sk, &queue, &cork, getfrag, 1433 err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
1421 from, length, transhdrlen, flags); 1434 from, length, transhdrlen, flags);
1422 if (err) { 1435 if (err) {
1423 __ip_flush_pending_frames(sk, &queue, &cork); 1436 __ip_flush_pending_frames(sk, &queue, &cork);
1424 return ERR_PTR(err); 1437 return ERR_PTR(err);
1425 } 1438 }
1426 1439
1427 return __ip_make_skb(sk, &queue, &cork); 1440 return __ip_make_skb(sk, fl4, &queue, &cork);
1428} 1441}
1429 1442
1430/* 1443/*
@@ -1447,48 +1460,39 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1447 * Should run single threaded per socket because it uses the sock 1460 * Should run single threaded per socket because it uses the sock
1448 * structure to pass arguments. 1461 * structure to pass arguments.
1449 */ 1462 */
1450void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1463void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1451 unsigned int len) 1464 struct ip_reply_arg *arg, unsigned int len)
1452{ 1465{
1453 struct inet_sock *inet = inet_sk(sk); 1466 struct inet_sock *inet = inet_sk(sk);
1454 struct { 1467 struct ip_options_data replyopts;
1455 struct ip_options opt;
1456 char data[40];
1457 } replyopts;
1458 struct ipcm_cookie ipc; 1468 struct ipcm_cookie ipc;
1459 __be32 daddr; 1469 struct flowi4 fl4;
1460 struct rtable *rt = skb_rtable(skb); 1470 struct rtable *rt = skb_rtable(skb);
1461 1471
1462 if (ip_options_echo(&replyopts.opt, skb)) 1472 if (ip_options_echo(&replyopts.opt.opt, skb))
1463 return; 1473 return;
1464 1474
1465 daddr = ipc.addr = rt->rt_src; 1475 ipc.addr = daddr;
1466 ipc.opt = NULL; 1476 ipc.opt = NULL;
1467 ipc.tx_flags = 0; 1477 ipc.tx_flags = 0;
1468 1478
1469 if (replyopts.opt.optlen) { 1479 if (replyopts.opt.opt.optlen) {
1470 ipc.opt = &replyopts.opt; 1480 ipc.opt = &replyopts.opt;
1471 1481
1472 if (ipc.opt->srr) 1482 if (replyopts.opt.opt.srr)
1473 daddr = replyopts.opt.faddr; 1483 daddr = replyopts.opt.opt.faddr;
1474 } 1484 }
1475 1485
1476 { 1486 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1477 struct flowi4 fl4 = { 1487 RT_TOS(ip_hdr(skb)->tos),
1478 .flowi4_oif = arg->bound_dev_if, 1488 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1479 .daddr = daddr, 1489 ip_reply_arg_flowi_flags(arg),
1480 .saddr = rt->rt_spec_dst, 1490 daddr, rt->rt_spec_dst,
1481 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), 1491 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1482 .fl4_sport = tcp_hdr(skb)->dest, 1492 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1483 .fl4_dport = tcp_hdr(skb)->source, 1493 rt = ip_route_output_key(sock_net(sk), &fl4);
1484 .flowi4_proto = sk->sk_protocol, 1494 if (IS_ERR(rt))
1485 .flowi4_flags = ip_reply_arg_flowi_flags(arg), 1495 return;
1486 };
1487 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1488 rt = ip_route_output_key(sock_net(sk), &fl4);
1489 if (IS_ERR(rt))
1490 return;
1491 }
1492 1496
1493 /* And let IP do all the hard work. 1497 /* And let IP do all the hard work.
1494 1498
@@ -1501,7 +1505,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1501 sk->sk_priority = skb->priority; 1505 sk->sk_priority = skb->priority;
1502 sk->sk_protocol = ip_hdr(skb)->protocol; 1506 sk->sk_protocol = ip_hdr(skb)->protocol;
1503 sk->sk_bound_dev_if = arg->bound_dev_if; 1507 sk->sk_bound_dev_if = arg->bound_dev_if;
1504 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1508 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1505 &ipc, &rt, MSG_DONTWAIT); 1509 &ipc, &rt, MSG_DONTWAIT);
1506 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1510 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1507 if (arg->csumoffset >= 0) 1511 if (arg->csumoffset >= 0)
@@ -1509,7 +1513,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1509 arg->csumoffset) = csum_fold(csum_add(skb->csum, 1513 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1510 arg->csum)); 1514 arg->csum));
1511 skb->ip_summed = CHECKSUM_NONE; 1515 skb->ip_summed = CHECKSUM_NONE;
1512 ip_push_pending_frames(sk); 1516 ip_push_pending_frames(sk, &fl4);
1513 } 1517 }
1514 1518
1515 bh_unlock_sock(sk); 1519 bh_unlock_sock(sk);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3948c86e59ca..ab0c9efd1efa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -131,7 +131,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
131static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 131static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
132{ 132{
133 struct sockaddr_in sin; 133 struct sockaddr_in sin;
134 struct iphdr *iph = ip_hdr(skb); 134 const struct iphdr *iph = ip_hdr(skb);
135 __be16 *ports = (__be16 *)skb_transport_header(skb); 135 __be16 *ports = (__be16 *)skb_transport_header(skb);
136 136
137 if (skb_transport_offset(skb) + 4 > skb->len) 137 if (skb_transport_offset(skb) + 4 > skb->len)
@@ -451,6 +451,11 @@ out:
451} 451}
452 452
453 453
454static void opt_kfree_rcu(struct rcu_head *head)
455{
456 kfree(container_of(head, struct ip_options_rcu, rcu));
457}
458
454/* 459/*
455 * Socket option code for IP. This is the end of the line after any 460 * Socket option code for IP. This is the end of the line after any
456 * TCP,UDP etc options on an IP socket. 461 * TCP,UDP etc options on an IP socket.
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
497 switch (optname) { 502 switch (optname) {
498 case IP_OPTIONS: 503 case IP_OPTIONS:
499 { 504 {
500 struct ip_options *opt = NULL; 505 struct ip_options_rcu *old, *opt = NULL;
506
501 if (optlen > 40) 507 if (optlen > 40)
502 goto e_inval; 508 goto e_inval;
503 err = ip_options_get_from_user(sock_net(sk), &opt, 509 err = ip_options_get_from_user(sock_net(sk), &opt,
504 optval, optlen); 510 optval, optlen);
505 if (err) 511 if (err)
506 break; 512 break;
513 old = rcu_dereference_protected(inet->inet_opt,
514 sock_owned_by_user(sk));
507 if (inet->is_icsk) { 515 if (inet->is_icsk) {
508 struct inet_connection_sock *icsk = inet_csk(sk); 516 struct inet_connection_sock *icsk = inet_csk(sk);
509#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 517#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
512 (TCPF_LISTEN | TCPF_CLOSE)) && 520 (TCPF_LISTEN | TCPF_CLOSE)) &&
513 inet->inet_daddr != LOOPBACK4_IPV6)) { 521 inet->inet_daddr != LOOPBACK4_IPV6)) {
514#endif 522#endif
515 if (inet->opt) 523 if (old)
516 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 524 icsk->icsk_ext_hdr_len -= old->opt.optlen;
517 if (opt) 525 if (opt)
518 icsk->icsk_ext_hdr_len += opt->optlen; 526 icsk->icsk_ext_hdr_len += opt->opt.optlen;
519 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 527 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
520#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 528#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
521 } 529 }
522#endif 530#endif
523 } 531 }
524 opt = xchg(&inet->opt, opt); 532 rcu_assign_pointer(inet->inet_opt, opt);
525 kfree(opt); 533 if (old)
534 call_rcu(&old->rcu, opt_kfree_rcu);
526 break; 535 break;
527 } 536 }
528 case IP_PKTINFO: 537 case IP_PKTINFO:
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1081 case IP_OPTIONS: 1090 case IP_OPTIONS:
1082 { 1091 {
1083 unsigned char optbuf[sizeof(struct ip_options)+40]; 1092 unsigned char optbuf[sizeof(struct ip_options)+40];
1084 struct ip_options * opt = (struct ip_options *)optbuf; 1093 struct ip_options *opt = (struct ip_options *)optbuf;
1094 struct ip_options_rcu *inet_opt;
1095
1096 inet_opt = rcu_dereference_protected(inet->inet_opt,
1097 sock_owned_by_user(sk));
1085 opt->optlen = 0; 1098 opt->optlen = 0;
1086 if (inet->opt) 1099 if (inet_opt)
1087 memcpy(optbuf, inet->opt, 1100 memcpy(optbuf, &inet_opt->opt,
1088 sizeof(struct ip_options)+ 1101 sizeof(struct ip_options) +
1089 inet->opt->optlen); 1102 inet_opt->opt.optlen);
1090 release_sock(sk); 1103 release_sock(sk);
1091 1104
1092 if (opt->optlen == 0) 1105 if (opt->optlen == 0)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 629067571f02..c857f6f49b03 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -27,7 +27,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
27{ 27{
28 struct net *net = dev_net(skb->dev); 28 struct net *net = dev_net(skb->dev);
29 __be32 spi; 29 __be32 spi;
30 struct iphdr *iph = (struct iphdr *)skb->data; 30 const struct iphdr *iph = (const struct iphdr *)skb->data;
31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); 31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
32 struct xfrm_state *x; 32 struct xfrm_state *x;
33 33
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
36 return; 36 return;
37 37
38 spi = htonl(ntohs(ipch->cpi)); 38 spi = htonl(ntohs(ipch->cpi));
39 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, 39 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
40 spi, IPPROTO_COMP, AF_INET); 40 spi, IPPROTO_COMP, AF_INET);
41 if (!x) 41 if (!x)
42 return; 42 return;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cbff2ecccf3d..ab7e5542c1cf 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -87,8 +87,8 @@
87#endif 87#endif
88 88
89/* Define the friendly delay before and after opening net devices */ 89/* Define the friendly delay before and after opening net devices */
90#define CONF_PRE_OPEN 500 /* Before opening: 1/2 second */ 90#define CONF_POST_OPEN 10 /* After opening: 10 msecs */
91#define CONF_POST_OPEN 1 /* After opening: 1 second */ 91#define CONF_CARRIER_TIMEOUT 120000 /* Wait for carrier timeout */
92 92
93/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ 93/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
94#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ 94#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */
@@ -188,14 +188,14 @@ struct ic_device {
188static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ 188static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
189static struct net_device *ic_dev __initdata = NULL; /* Selected device */ 189static struct net_device *ic_dev __initdata = NULL; /* Selected device */
190 190
191static bool __init ic_device_match(struct net_device *dev) 191static bool __init ic_is_init_dev(struct net_device *dev)
192{ 192{
193 if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : 193 if (dev->flags & IFF_LOOPBACK)
194 return false;
195 return user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
194 (!(dev->flags & IFF_LOOPBACK) && 196 (!(dev->flags & IFF_LOOPBACK) &&
195 (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && 197 (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
196 strncmp(dev->name, "dummy", 5))) 198 strncmp(dev->name, "dummy", 5));
197 return true;
198 return false;
199} 199}
200 200
201static int __init ic_open_devs(void) 201static int __init ic_open_devs(void)
@@ -203,6 +203,7 @@ static int __init ic_open_devs(void)
203 struct ic_device *d, **last; 203 struct ic_device *d, **last;
204 struct net_device *dev; 204 struct net_device *dev;
205 unsigned short oflags; 205 unsigned short oflags;
206 unsigned long start;
206 207
207 last = &ic_first_dev; 208 last = &ic_first_dev;
208 rtnl_lock(); 209 rtnl_lock();
@@ -216,9 +217,7 @@ static int __init ic_open_devs(void)
216 } 217 }
217 218
218 for_each_netdev(&init_net, dev) { 219 for_each_netdev(&init_net, dev) {
219 if (dev->flags & IFF_LOOPBACK) 220 if (ic_is_init_dev(dev)) {
220 continue;
221 if (ic_device_match(dev)) {
222 int able = 0; 221 int able = 0;
223 if (dev->mtu >= 364) 222 if (dev->mtu >= 364)
224 able |= IC_BOOTP; 223 able |= IC_BOOTP;
@@ -252,6 +251,17 @@ static int __init ic_open_devs(void)
252 dev->name, able, d->xid)); 251 dev->name, able, d->xid));
253 } 252 }
254 } 253 }
254
255 /* wait for a carrier on at least one device */
256 start = jiffies;
257 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
258 for_each_netdev(&init_net, dev)
259 if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
260 goto have_carrier;
261
262 msleep(1);
263 }
264have_carrier:
255 rtnl_unlock(); 265 rtnl_unlock();
256 266
257 *last = NULL; 267 *last = NULL;
@@ -1324,14 +1334,13 @@ static int __init wait_for_devices(void)
1324{ 1334{
1325 int i; 1335 int i;
1326 1336
1327 msleep(CONF_PRE_OPEN);
1328 for (i = 0; i < DEVICE_WAIT_MAX; i++) { 1337 for (i = 0; i < DEVICE_WAIT_MAX; i++) {
1329 struct net_device *dev; 1338 struct net_device *dev;
1330 int found = 0; 1339 int found = 0;
1331 1340
1332 rtnl_lock(); 1341 rtnl_lock();
1333 for_each_netdev(&init_net, dev) { 1342 for_each_netdev(&init_net, dev) {
1334 if (ic_device_match(dev)) { 1343 if (ic_is_init_dev(dev)) {
1335 found = 1; 1344 found = 1;
1336 break; 1345 break;
1337 } 1346 }
@@ -1378,7 +1387,7 @@ static int __init ip_auto_config(void)
1378 return err; 1387 return err;
1379 1388
1380 /* Give drivers a chance to settle */ 1389 /* Give drivers a chance to settle */
1381 ssleep(CONF_POST_OPEN); 1390 msleep(CONF_POST_OPEN);
1382 1391
1383 /* 1392 /*
1384 * If the config information is insufficient (e.g., our IP address or 1393 * If the config information is insufficient (e.g., our IP address or
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfc17c5914e7..378b20b7ca6e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -276,11 +276,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
276 276
277 dev_net_set(dev, net); 277 dev_net_set(dev, net);
278 278
279 if (strchr(name, '%')) {
280 if (dev_alloc_name(dev, name) < 0)
281 goto failed_free;
282 }
283
284 nt = netdev_priv(dev); 279 nt = netdev_priv(dev);
285 nt->parms = *parms; 280 nt->parms = *parms;
286 281
@@ -319,7 +314,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
319 8 bytes of packet payload. It means, that precise relaying of 314 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible. 315 ICMP in the real Internet is absolutely infeasible.
321 */ 316 */
322 struct iphdr *iph = (struct iphdr *)skb->data; 317 const struct iphdr *iph = (const struct iphdr *)skb->data;
323 const int type = icmp_hdr(skb)->type; 318 const int type = icmp_hdr(skb)->type;
324 const int code = icmp_hdr(skb)->code; 319 const int code = icmp_hdr(skb)->code;
325 struct ip_tunnel *t; 320 struct ip_tunnel *t;
@@ -433,15 +428,16 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
433{ 428{
434 struct ip_tunnel *tunnel = netdev_priv(dev); 429 struct ip_tunnel *tunnel = netdev_priv(dev);
435 struct pcpu_tstats *tstats; 430 struct pcpu_tstats *tstats;
436 struct iphdr *tiph = &tunnel->parms.iph; 431 const struct iphdr *tiph = &tunnel->parms.iph;
437 u8 tos = tunnel->parms.iph.tos; 432 u8 tos = tunnel->parms.iph.tos;
438 __be16 df = tiph->frag_off; 433 __be16 df = tiph->frag_off;
439 struct rtable *rt; /* Route to the other host */ 434 struct rtable *rt; /* Route to the other host */
440 struct net_device *tdev; /* Device to other host */ 435 struct net_device *tdev; /* Device to other host */
441 struct iphdr *old_iph = ip_hdr(skb); 436 const struct iphdr *old_iph = ip_hdr(skb);
442 struct iphdr *iph; /* Our new IP header */ 437 struct iphdr *iph; /* Our new IP header */
443 unsigned int max_headroom; /* The extra header space needed */ 438 unsigned int max_headroom; /* The extra header space needed */
444 __be32 dst = tiph->daddr; 439 __be32 dst = tiph->daddr;
440 struct flowi4 fl4;
445 int mtu; 441 int mtu;
446 442
447 if (skb->protocol != htons(ETH_P_IP)) 443 if (skb->protocol != htons(ETH_P_IP))
@@ -460,7 +456,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
460 goto tx_error_icmp; 456 goto tx_error_icmp;
461 } 457 }
462 458
463 rt = ip_route_output_ports(dev_net(dev), NULL, 459 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
464 dst, tiph->saddr, 460 dst, tiph->saddr,
465 0, 0, 461 0, 0,
466 IPPROTO_IPIP, RT_TOS(tos), 462 IPPROTO_IPIP, RT_TOS(tos),
@@ -549,8 +545,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
549 iph->frag_off = df; 545 iph->frag_off = df;
550 iph->protocol = IPPROTO_IPIP; 546 iph->protocol = IPPROTO_IPIP;
551 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); 547 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
552 iph->daddr = rt->rt_dst; 548 iph->daddr = fl4.daddr;
553 iph->saddr = rt->rt_src; 549 iph->saddr = fl4.saddr;
554 550
555 if ((iph->ttl = tiph->ttl) == 0) 551 if ((iph->ttl = tiph->ttl) == 0)
556 iph->ttl = old_iph->ttl; 552 iph->ttl = old_iph->ttl;
@@ -572,19 +568,21 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
572{ 568{
573 struct net_device *tdev = NULL; 569 struct net_device *tdev = NULL;
574 struct ip_tunnel *tunnel; 570 struct ip_tunnel *tunnel;
575 struct iphdr *iph; 571 const struct iphdr *iph;
576 572
577 tunnel = netdev_priv(dev); 573 tunnel = netdev_priv(dev);
578 iph = &tunnel->parms.iph; 574 iph = &tunnel->parms.iph;
579 575
580 if (iph->daddr) { 576 if (iph->daddr) {
581 struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, 577 struct rtable *rt;
582 iph->daddr, iph->saddr, 578 struct flowi4 fl4;
583 0, 0, 579
584 IPPROTO_IPIP, 580 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
585 RT_TOS(iph->tos), 581 iph->daddr, iph->saddr,
586 tunnel->parms.link); 582 0, 0,
587 583 IPPROTO_IPIP,
584 RT_TOS(iph->tos),
585 tunnel->parms.link);
588 if (!IS_ERR(rt)) { 586 if (!IS_ERR(rt)) {
589 tdev = rt->dst.dev; 587 tdev = rt->dst.dev;
590 ip_rt_put(rt); 588 ip_rt_put(rt);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1f62eaeb6de4..30a7763c400e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1549,7 +1549,7 @@ static struct notifier_block ip_mr_notifier = {
1549static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1549static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1550{ 1550{
1551 struct iphdr *iph; 1551 struct iphdr *iph;
1552 struct iphdr *old_iph = ip_hdr(skb); 1552 const struct iphdr *old_iph = ip_hdr(skb);
1553 1553
1554 skb_push(skb, sizeof(struct iphdr)); 1554 skb_push(skb, sizeof(struct iphdr));
1555 skb->transport_header = skb->network_header; 1555 skb->transport_header = skb->network_header;
@@ -1595,6 +1595,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1595 struct vif_device *vif = &mrt->vif_table[vifi]; 1595 struct vif_device *vif = &mrt->vif_table[vifi];
1596 struct net_device *dev; 1596 struct net_device *dev;
1597 struct rtable *rt; 1597 struct rtable *rt;
1598 struct flowi4 fl4;
1598 int encap = 0; 1599 int encap = 0;
1599 1600
1600 if (vif->dev == NULL) 1601 if (vif->dev == NULL)
@@ -1612,7 +1613,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1612#endif 1613#endif
1613 1614
1614 if (vif->flags & VIFF_TUNNEL) { 1615 if (vif->flags & VIFF_TUNNEL) {
1615 rt = ip_route_output_ports(net, NULL, 1616 rt = ip_route_output_ports(net, &fl4, NULL,
1616 vif->remote, vif->local, 1617 vif->remote, vif->local,
1617 0, 0, 1618 0, 0,
1618 IPPROTO_IPIP, 1619 IPPROTO_IPIP,
@@ -1621,7 +1622,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1621 goto out_free; 1622 goto out_free;
1622 encap = sizeof(struct iphdr); 1623 encap = sizeof(struct iphdr);
1623 } else { 1624 } else {
1624 rt = ip_route_output_ports(net, NULL, iph->daddr, 0, 1625 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1625 0, 0, 1626 0, 0,
1626 IPPROTO_IPIP, 1627 IPPROTO_IPIP,
1627 RT_TOS(iph->tos), vif->link); 1628 RT_TOS(iph->tos), vif->link);
@@ -1788,12 +1789,14 @@ dont_forward:
1788 return 0; 1789 return 0;
1789} 1790}
1790 1791
1791static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) 1792static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
1792{ 1793{
1794 struct rtable *rt = skb_rtable(skb);
1795 struct iphdr *iph = ip_hdr(skb);
1793 struct flowi4 fl4 = { 1796 struct flowi4 fl4 = {
1794 .daddr = rt->rt_key_dst, 1797 .daddr = iph->daddr,
1795 .saddr = rt->rt_key_src, 1798 .saddr = iph->saddr,
1796 .flowi4_tos = rt->rt_tos, 1799 .flowi4_tos = iph->tos,
1797 .flowi4_oif = rt->rt_oif, 1800 .flowi4_oif = rt->rt_oif,
1798 .flowi4_iif = rt->rt_iif, 1801 .flowi4_iif = rt->rt_iif,
1799 .flowi4_mark = rt->rt_mark, 1802 .flowi4_mark = rt->rt_mark,
@@ -1825,7 +1828,7 @@ int ip_mr_input(struct sk_buff *skb)
1825 if (IPCB(skb)->flags & IPSKB_FORWARDED) 1828 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1826 goto dont_forward; 1829 goto dont_forward;
1827 1830
1828 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); 1831 mrt = ipmr_rt_fib_lookup(net, skb);
1829 if (IS_ERR(mrt)) { 1832 if (IS_ERR(mrt)) {
1830 kfree_skb(skb); 1833 kfree_skb(skb);
1831 return PTR_ERR(mrt); 1834 return PTR_ERR(mrt);
@@ -1957,7 +1960,7 @@ int pim_rcv_v1(struct sk_buff *skb)
1957 1960
1958 pim = igmp_hdr(skb); 1961 pim = igmp_hdr(skb);
1959 1962
1960 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); 1963 mrt = ipmr_rt_fib_lookup(net, skb);
1961 if (IS_ERR(mrt)) 1964 if (IS_ERR(mrt))
1962 goto drop; 1965 goto drop;
1963 if (!mrt->mroute_do_pim || 1966 if (!mrt->mroute_do_pim ||
@@ -1989,7 +1992,7 @@ static int pim_rcv(struct sk_buff *skb)
1989 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1992 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1990 goto drop; 1993 goto drop;
1991 1994
1992 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); 1995 mrt = ipmr_rt_fib_lookup(net, skb);
1993 if (IS_ERR(mrt)) 1996 if (IS_ERR(mrt))
1994 goto drop; 1997 goto drop;
1995 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1998 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
@@ -2038,20 +2041,20 @@ rtattr_failure:
2038 return -EMSGSIZE; 2041 return -EMSGSIZE;
2039} 2042}
2040 2043
2041int ipmr_get_route(struct net *net, 2044int ipmr_get_route(struct net *net, struct sk_buff *skb,
2042 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 2045 __be32 saddr, __be32 daddr,
2046 struct rtmsg *rtm, int nowait)
2043{ 2047{
2044 int err;
2045 struct mr_table *mrt;
2046 struct mfc_cache *cache; 2048 struct mfc_cache *cache;
2047 struct rtable *rt = skb_rtable(skb); 2049 struct mr_table *mrt;
2050 int err;
2048 2051
2049 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2052 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2050 if (mrt == NULL) 2053 if (mrt == NULL)
2051 return -ENOENT; 2054 return -ENOENT;
2052 2055
2053 rcu_read_lock(); 2056 rcu_read_lock();
2054 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 2057 cache = ipmr_cache_find(mrt, saddr, daddr);
2055 2058
2056 if (cache == NULL) { 2059 if (cache == NULL) {
2057 struct sk_buff *skb2; 2060 struct sk_buff *skb2;
@@ -2084,8 +2087,8 @@ int ipmr_get_route(struct net *net,
2084 skb_reset_network_header(skb2); 2087 skb_reset_network_header(skb2);
2085 iph = ip_hdr(skb2); 2088 iph = ip_hdr(skb2);
2086 iph->ihl = sizeof(struct iphdr) >> 2; 2089 iph->ihl = sizeof(struct iphdr) >> 2;
2087 iph->saddr = rt->rt_src; 2090 iph->saddr = saddr;
2088 iph->daddr = rt->rt_dst; 2091 iph->daddr = daddr;
2089 iph->version = 0; 2092 iph->version = 0;
2090 err = ipmr_cache_unresolved(mrt, vif, skb2); 2093 err = ipmr_cache_unresolved(mrt, vif, skb2);
2091 read_unlock(&mrt_lock); 2094 read_unlock(&mrt_lock);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 89bc7e66d598..fd7a3f68917f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
260 void *table_base; 260 void *table_base;
261 const struct xt_table_info *private; 261 const struct xt_table_info *private;
262 struct xt_action_param acpar; 262 struct xt_action_param acpar;
263 unsigned int addend;
263 264
264 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 265 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
265 return NF_DROP; 266 return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
267 indev = in ? in->name : nulldevname; 268 indev = in ? in->name : nulldevname;
268 outdev = out ? out->name : nulldevname; 269 outdev = out ? out->name : nulldevname;
269 270
270 xt_info_rdlock_bh(); 271 local_bh_disable();
272 addend = xt_write_recseq_begin();
271 private = table->private; 273 private = table->private;
272 table_base = private->entries[smp_processor_id()]; 274 table_base = private->entries[smp_processor_id()];
273 275
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
338 /* Verdict */ 340 /* Verdict */
339 break; 341 break;
340 } while (!acpar.hotdrop); 342 } while (!acpar.hotdrop);
341 xt_info_rdunlock_bh(); 343 xt_write_recseq_end(addend);
344 local_bh_enable();
342 345
343 if (acpar.hotdrop) 346 if (acpar.hotdrop)
344 return NF_DROP; 347 return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
712 unsigned int i; 715 unsigned int i;
713 716
714 for_each_possible_cpu(cpu) { 717 for_each_possible_cpu(cpu) {
715 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 718 seqcount_t *s = &per_cpu(xt_recseq, cpu);
716 719
717 i = 0; 720 i = 0;
718 xt_entry_foreach(iter, t->entries[cpu], t->size) { 721 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
720 unsigned int start; 723 unsigned int start;
721 724
722 do { 725 do {
723 start = read_seqbegin(lock); 726 start = read_seqcount_begin(s);
724 bcnt = iter->counters.bcnt; 727 bcnt = iter->counters.bcnt;
725 pcnt = iter->counters.pcnt; 728 pcnt = iter->counters.pcnt;
726 } while (read_seqretry(lock, start)); 729 } while (read_seqcount_retry(s, start));
727 730
728 ADD_COUNTER(counters[i], bcnt, pcnt); 731 ADD_COUNTER(counters[i], bcnt, pcnt);
729 ++i; 732 ++i;
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1115 int ret = 0; 1118 int ret = 0;
1116 void *loc_cpu_entry; 1119 void *loc_cpu_entry;
1117 struct arpt_entry *iter; 1120 struct arpt_entry *iter;
1121 unsigned int addend;
1118#ifdef CONFIG_COMPAT 1122#ifdef CONFIG_COMPAT
1119 struct compat_xt_counters_info compat_tmp; 1123 struct compat_xt_counters_info compat_tmp;
1120 1124
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
1171 /* Choose the copy that is on our node */ 1175 /* Choose the copy that is on our node */
1172 curcpu = smp_processor_id(); 1176 curcpu = smp_processor_id();
1173 loc_cpu_entry = private->entries[curcpu]; 1177 loc_cpu_entry = private->entries[curcpu];
1174 xt_info_wrlock(curcpu); 1178 addend = xt_write_recseq_begin();
1175 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1179 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1176 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1180 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1177 ++i; 1181 ++i;
1178 } 1182 }
1179 xt_info_wrunlock(curcpu); 1183 xt_write_recseq_end(addend);
1180 unlock_up_free: 1184 unlock_up_free:
1181 local_bh_enable(); 1185 local_bh_enable();
1182 xt_table_unlock(t); 1186 xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 704915028009..764743843503 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
68} 68}
69EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); 69EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
70 70
71/*
72 We keep a set of rules for each CPU, so we can avoid write-locking
73 them in the softirq when updating the counters and therefore
74 only need to read-lock in the softirq; doing a write_lock_bh() in user
75 context stops packets coming through and allows user context to read
76 the counters or update the rules.
77
78 Hence the start of any table is given by get_table() below. */
79
80/* Returns whether matches rule or not. */ 71/* Returns whether matches rule or not. */
81/* Performance critical - called for every packet */ 72/* Performance critical - called for every packet */
82static inline bool 73static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
311 unsigned int *stackptr, origptr, cpu; 302 unsigned int *stackptr, origptr, cpu;
312 const struct xt_table_info *private; 303 const struct xt_table_info *private;
313 struct xt_action_param acpar; 304 struct xt_action_param acpar;
305 unsigned int addend;
314 306
315 /* Initialization */ 307 /* Initialization */
316 ip = ip_hdr(skb); 308 ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
331 acpar.hooknum = hook; 323 acpar.hooknum = hook;
332 324
333 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 325 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
334 xt_info_rdlock_bh(); 326 local_bh_disable();
327 addend = xt_write_recseq_begin();
335 private = table->private; 328 private = table->private;
336 cpu = smp_processor_id(); 329 cpu = smp_processor_id();
337 table_base = private->entries[cpu]; 330 table_base = private->entries[cpu];
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
430 pr_debug("Exiting %s; resetting sp from %u to %u\n", 423 pr_debug("Exiting %s; resetting sp from %u to %u\n",
431 __func__, *stackptr, origptr); 424 __func__, *stackptr, origptr);
432 *stackptr = origptr; 425 *stackptr = origptr;
433 xt_info_rdunlock_bh(); 426 xt_write_recseq_end(addend);
427 local_bh_enable();
428
434#ifdef DEBUG_ALLOW_ALL 429#ifdef DEBUG_ALLOW_ALL
435 return NF_ACCEPT; 430 return NF_ACCEPT;
436#else 431#else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
886 unsigned int i; 881 unsigned int i;
887 882
888 for_each_possible_cpu(cpu) { 883 for_each_possible_cpu(cpu) {
889 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 884 seqcount_t *s = &per_cpu(xt_recseq, cpu);
890 885
891 i = 0; 886 i = 0;
892 xt_entry_foreach(iter, t->entries[cpu], t->size) { 887 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
894 unsigned int start; 889 unsigned int start;
895 890
896 do { 891 do {
897 start = read_seqbegin(lock); 892 start = read_seqcount_begin(s);
898 bcnt = iter->counters.bcnt; 893 bcnt = iter->counters.bcnt;
899 pcnt = iter->counters.pcnt; 894 pcnt = iter->counters.pcnt;
900 } while (read_seqretry(lock, start)); 895 } while (read_seqcount_retry(s, start));
901 896
902 ADD_COUNTER(counters[i], bcnt, pcnt); 897 ADD_COUNTER(counters[i], bcnt, pcnt);
903 ++i; /* macro does multi eval of i */ 898 ++i; /* macro does multi eval of i */
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
1312 int ret = 0; 1307 int ret = 0;
1313 void *loc_cpu_entry; 1308 void *loc_cpu_entry;
1314 struct ipt_entry *iter; 1309 struct ipt_entry *iter;
1310 unsigned int addend;
1315#ifdef CONFIG_COMPAT 1311#ifdef CONFIG_COMPAT
1316 struct compat_xt_counters_info compat_tmp; 1312 struct compat_xt_counters_info compat_tmp;
1317 1313
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
1368 /* Choose the copy that is on our node */ 1364 /* Choose the copy that is on our node */
1369 curcpu = smp_processor_id(); 1365 curcpu = smp_processor_id();
1370 loc_cpu_entry = private->entries[curcpu]; 1366 loc_cpu_entry = private->entries[curcpu];
1371 xt_info_wrlock(curcpu); 1367 addend = xt_write_recseq_begin();
1372 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1368 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1373 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1369 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1374 ++i; 1370 ++i;
1375 } 1371 }
1376 xt_info_wrunlock(curcpu); 1372 xt_write_recseq_end(addend);
1377 unlock_up_free: 1373 unlock_up_free:
1378 local_bh_enable(); 1374 local_bh_enable();
1379 xt_table_unlock(t); 1375 xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 31427fb57aa8..99cfa28b6d38 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,7 +153,7 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data, 156static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen) 157 int datalen, __sum16 *check, int oldlen)
158{ 158{
159 struct rtable *rt = skb_rtable(skb); 159 struct rtable *rt = skb_rtable(skb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
new file mode 100644
index 000000000000..1f3bb11490c9
--- /dev/null
+++ b/net/ipv4/ping.c
@@ -0,0 +1,935 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * "Ping" sockets
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Based on ipv4/udp.c code.
14 *
15 * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6),
16 * Pavel Kankovsky (for Linux 2.4.32)
17 *
18 * Pavel gave all rights to bugs to Vasiliy,
19 * none of the bugs are Pavel's now.
20 *
21 */
22
23#include <asm/system.h>
24#include <linux/uaccess.h>
25#include <linux/types.h>
26#include <linux/fcntl.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/in.h>
30#include <linux/errno.h>
31#include <linux/timer.h>
32#include <linux/mm.h>
33#include <linux/inet.h>
34#include <linux/netdevice.h>
35#include <net/snmp.h>
36#include <net/ip.h>
37#include <net/ipv6.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <linux/skbuff.h>
41#include <linux/proc_fs.h>
42#include <net/sock.h>
43#include <net/ping.h>
44#include <net/icmp.h>
45#include <net/udp.h>
46#include <net/route.h>
47#include <net/inet_common.h>
48#include <net/checksum.h>
49
50
51static struct ping_table ping_table;
52
53static u16 ping_port_rover;
54
55static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
56{
57 int res = (num + net_hash_mix(net)) & mask;
58 pr_debug("hash(%d) = %d\n", num, res);
59 return res;
60}
61
62static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
63 struct net *net, unsigned num)
64{
65 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
66}
67
68static int ping_v4_get_port(struct sock *sk, unsigned short ident)
69{
70 struct hlist_nulls_node *node;
71 struct hlist_nulls_head *hlist;
72 struct inet_sock *isk, *isk2;
73 struct sock *sk2 = NULL;
74
75 isk = inet_sk(sk);
76 write_lock_bh(&ping_table.lock);
77 if (ident == 0) {
78 u32 i;
79 u16 result = ping_port_rover + 1;
80
81 for (i = 0; i < (1L << 16); i++, result++) {
82 if (!result)
83 result++; /* avoid zero */
84 hlist = ping_hashslot(&ping_table, sock_net(sk),
85 result);
86 ping_portaddr_for_each_entry(sk2, node, hlist) {
87 isk2 = inet_sk(sk2);
88
89 if (isk2->inet_num == result)
90 goto next_port;
91 }
92
93 /* found */
94 ping_port_rover = ident = result;
95 break;
96next_port:
97 ;
98 }
99 if (i >= (1L << 16))
100 goto fail;
101 } else {
102 hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
103 ping_portaddr_for_each_entry(sk2, node, hlist) {
104 isk2 = inet_sk(sk2);
105
106 if ((isk2->inet_num == ident) &&
107 (sk2 != sk) &&
108 (!sk2->sk_reuse || !sk->sk_reuse))
109 goto fail;
110 }
111 }
112
113 pr_debug("found port/ident = %d\n", ident);
114 isk->inet_num = ident;
115 if (sk_unhashed(sk)) {
116 pr_debug("was not hashed\n");
117 sock_hold(sk);
118 hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
119 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
120 }
121 write_unlock_bh(&ping_table.lock);
122 return 0;
123
124fail:
125 write_unlock_bh(&ping_table.lock);
126 return 1;
127}
128
129static void ping_v4_hash(struct sock *sk)
130{
131 pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
132 BUG(); /* "Please do not press this button again." */
133}
134
135static void ping_v4_unhash(struct sock *sk)
136{
137 struct inet_sock *isk = inet_sk(sk);
138 pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
139 if (sk_hashed(sk)) {
140 struct hlist_nulls_head *hslot;
141
142 hslot = ping_hashslot(&ping_table, sock_net(sk), isk->inet_num);
143 write_lock_bh(&ping_table.lock);
144 hlist_nulls_del(&sk->sk_nulls_node);
145 sock_put(sk);
146 isk->inet_num = isk->inet_sport = 0;
147 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
148 write_unlock_bh(&ping_table.lock);
149 }
150}
151
152static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr,
153 u16 ident, int dif)
154{
155 struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
156 struct sock *sk = NULL;
157 struct inet_sock *isk;
158 struct hlist_nulls_node *hnode;
159
160 pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n",
161 (int)ident, (unsigned long)daddr, dif);
162 read_lock_bh(&ping_table.lock);
163
164 ping_portaddr_for_each_entry(sk, hnode, hslot) {
165 isk = inet_sk(sk);
166
167 pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk,
168 (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr,
169 sk->sk_bound_dev_if);
170
171 pr_debug("iterate\n");
172 if (isk->inet_num != ident)
173 continue;
174 if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
175 continue;
176 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
177 continue;
178
179 sock_hold(sk);
180 goto exit;
181 }
182
183 sk = NULL;
184exit:
185 read_unlock_bh(&ping_table.lock);
186
187 return sk;
188}
189
190static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
191 gid_t *high)
192{
193 gid_t *data = net->ipv4.sysctl_ping_group_range;
194 unsigned seq;
195 do {
196 seq = read_seqbegin(&sysctl_local_ports.lock);
197
198 *low = data[0];
199 *high = data[1];
200 } while (read_seqretry(&sysctl_local_ports.lock, seq));
201}
202
203
204static int ping_init_sock(struct sock *sk)
205{
206 struct net *net = sock_net(sk);
207 gid_t group = current_egid();
208 gid_t range[2];
209 struct group_info *group_info = get_current_groups();
210 int i, j, count = group_info->ngroups;
211
212 inet_get_ping_group_range_net(net, range, range+1);
213 if (range[0] <= group && group <= range[1])
214 return 0;
215
216 for (i = 0; i < group_info->nblocks; i++) {
217 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
218
219 for (j = 0; j < cp_count; j++) {
220 group = group_info->blocks[i][j];
221 if (range[0] <= group && group <= range[1])
222 return 0;
223 }
224
225 count -= cp_count;
226 }
227
228 return -EACCES;
229}
230
231static void ping_close(struct sock *sk, long timeout)
232{
233 pr_debug("ping_close(sk=%p,sk->num=%u)\n",
234 inet_sk(sk), inet_sk(sk)->inet_num);
235 pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
236
237 sk_common_release(sk);
238}
239
240/*
241 * We need our own bind because there are no privileged id's == local ports.
242 * Moreover, we don't allow binding to multi- and broadcast addresses.
243 */
244
245static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
246{
247 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
248 struct inet_sock *isk = inet_sk(sk);
249 unsigned short snum;
250 int chk_addr_ret;
251 int err;
252
253 if (addr_len < sizeof(struct sockaddr_in))
254 return -EINVAL;
255
256 pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
257 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
258
259 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
260 if (addr->sin_addr.s_addr == INADDR_ANY)
261 chk_addr_ret = RTN_LOCAL;
262
263 if ((sysctl_ip_nonlocal_bind == 0 &&
264 isk->freebind == 0 && isk->transparent == 0 &&
265 chk_addr_ret != RTN_LOCAL) ||
266 chk_addr_ret == RTN_MULTICAST ||
267 chk_addr_ret == RTN_BROADCAST)
268 return -EADDRNOTAVAIL;
269
270 lock_sock(sk);
271
272 err = -EINVAL;
273 if (isk->inet_num != 0)
274 goto out;
275
276 err = -EADDRINUSE;
277 isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
278 snum = ntohs(addr->sin_port);
279 if (ping_v4_get_port(sk, snum) != 0) {
280 isk->inet_saddr = isk->inet_rcv_saddr = 0;
281 goto out;
282 }
283
284 pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n",
285 (int)isk->inet_num,
286 (unsigned long) isk->inet_rcv_saddr,
287 (int)sk->sk_bound_dev_if);
288
289 err = 0;
290 if (isk->inet_rcv_saddr)
291 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
292 if (snum)
293 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
294 isk->inet_sport = htons(isk->inet_num);
295 isk->inet_daddr = 0;
296 isk->inet_dport = 0;
297 sk_dst_reset(sk);
298out:
299 release_sock(sk);
300 pr_debug("ping_v4_bind -> %d\n", err);
301 return err;
302}
303
304/*
305 * Is this a supported type of ICMP message?
306 */
307
308static inline int ping_supported(int type, int code)
309{
310 if (type == ICMP_ECHO && code == 0)
311 return 1;
312 return 0;
313}
314
315/*
316 * This routine is called by the ICMP module when it gets some
317 * sort of error condition.
318 */
319
320static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
321
322void ping_err(struct sk_buff *skb, u32 info)
323{
324 struct iphdr *iph = (struct iphdr *)skb->data;
325 struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
326 struct inet_sock *inet_sock;
327 int type = icmph->type;
328 int code = icmph->code;
329 struct net *net = dev_net(skb->dev);
330 struct sock *sk;
331 int harderr;
332 int err;
333
334 /* We assume the packet has already been checked by icmp_unreach */
335
336 if (!ping_supported(icmph->type, icmph->code))
337 return;
338
339 pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
340 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
341
342 sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
343 ntohs(icmph->un.echo.id), skb->dev->ifindex);
344 if (sk == NULL) {
345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
346 pr_debug("no socket, dropping\n");
347 return; /* No socket for error */
348 }
349 pr_debug("err on socket %p\n", sk);
350
351 err = 0;
352 harderr = 0;
353 inet_sock = inet_sk(sk);
354
355 switch (type) {
356 default:
357 case ICMP_TIME_EXCEEDED:
358 err = EHOSTUNREACH;
359 break;
360 case ICMP_SOURCE_QUENCH:
361 /* This is not a real error but ping wants to see it.
362 * Report it with some fake errno. */
363 err = EREMOTEIO;
364 break;
365 case ICMP_PARAMETERPROB:
366 err = EPROTO;
367 harderr = 1;
368 break;
369 case ICMP_DEST_UNREACH:
370 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
371 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
372 err = EMSGSIZE;
373 harderr = 1;
374 break;
375 }
376 goto out;
377 }
378 err = EHOSTUNREACH;
379 if (code <= NR_ICMP_UNREACH) {
380 harderr = icmp_err_convert[code].fatal;
381 err = icmp_err_convert[code].errno;
382 }
383 break;
384 case ICMP_REDIRECT:
385 /* See ICMP_SOURCE_QUENCH */
386 err = EREMOTEIO;
387 break;
388 }
389
390 /*
391 * RFC1122: OK. Passes ICMP errors back to application, as per
392 * 4.1.3.3.
393 */
394 if (!inet_sock->recverr) {
395 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
396 goto out;
397 } else {
398 ip_icmp_error(sk, skb, err, 0 /* no remote port */,
399 info, (u8 *)icmph);
400 }
401 sk->sk_err = err;
402 sk->sk_error_report(sk);
403out:
404 sock_put(sk);
405}
406
407/*
408 * Copy and checksum an ICMP Echo packet from user space into a buffer.
409 */
410
411struct pingfakehdr {
412 struct icmphdr icmph;
413 struct iovec *iov;
414 u32 wcheck;
415};
416
417static int ping_getfrag(void *from, char * to,
418 int offset, int fraglen, int odd, struct sk_buff *skb)
419{
420 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
421
422 if (offset == 0) {
423 if (fraglen < sizeof(struct icmphdr))
424 BUG();
425 if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
426 pfh->iov, 0, fraglen - sizeof(struct icmphdr),
427 &pfh->wcheck))
428 return -EFAULT;
429
430 return 0;
431 }
432 if (offset < sizeof(struct icmphdr))
433 BUG();
434 if (csum_partial_copy_fromiovecend
435 (to, pfh->iov, offset - sizeof(struct icmphdr),
436 fraglen, &pfh->wcheck))
437 return -EFAULT;
438 return 0;
439}
440
441static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
442 struct flowi4 *fl4)
443{
444 struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
445
446 pfh->wcheck = csum_partial((char *)&pfh->icmph,
447 sizeof(struct icmphdr), pfh->wcheck);
448 pfh->icmph.checksum = csum_fold(pfh->wcheck);
449 memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr));
450 skb->ip_summed = CHECKSUM_NONE;
451 return ip_push_pending_frames(sk, fl4);
452}
453
454static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
455 size_t len)
456{
457 struct net *net = sock_net(sk);
458 struct flowi4 fl4;
459 struct inet_sock *inet = inet_sk(sk);
460 struct ipcm_cookie ipc;
461 struct icmphdr user_icmph;
462 struct pingfakehdr pfh;
463 struct rtable *rt = NULL;
464 struct ip_options_data opt_copy;
465 int free = 0;
466 u32 saddr, daddr, faddr;
467 u8 tos;
468 int err;
469
470 pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
471
472
473 if (len > 0xFFFF)
474 return -EMSGSIZE;
475
476 /*
477 * Check the flags.
478 */
479
480 /* Mirror BSD error message compatibility */
481 if (msg->msg_flags & MSG_OOB)
482 return -EOPNOTSUPP;
483
484 /*
485 * Fetch the ICMP header provided by the userland.
486 * iovec is modified!
487 */
488
489 if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
490 sizeof(struct icmphdr)))
491 return -EFAULT;
492 if (!ping_supported(user_icmph.type, user_icmph.code))
493 return -EINVAL;
494
495 /*
496 * Get and verify the address.
497 */
498
499 if (msg->msg_name) {
500 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
501 if (msg->msg_namelen < sizeof(*usin))
502 return -EINVAL;
503 if (usin->sin_family != AF_INET)
504 return -EINVAL;
505 daddr = usin->sin_addr.s_addr;
506 /* no remote port */
507 } else {
508 if (sk->sk_state != TCP_ESTABLISHED)
509 return -EDESTADDRREQ;
510 daddr = inet->inet_daddr;
511 /* no remote port */
512 }
513
514 ipc.addr = inet->inet_saddr;
515 ipc.opt = NULL;
516 ipc.oif = sk->sk_bound_dev_if;
517 ipc.tx_flags = 0;
518 err = sock_tx_timestamp(sk, &ipc.tx_flags);
519 if (err)
520 return err;
521
522 if (msg->msg_controllen) {
523 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
524 if (err)
525 return err;
526 if (ipc.opt)
527 free = 1;
528 }
529 if (!ipc.opt) {
530 struct ip_options_rcu *inet_opt;
531
532 rcu_read_lock();
533 inet_opt = rcu_dereference(inet->inet_opt);
534 if (inet_opt) {
535 memcpy(&opt_copy, inet_opt,
536 sizeof(*inet_opt) + inet_opt->opt.optlen);
537 ipc.opt = &opt_copy.opt;
538 }
539 rcu_read_unlock();
540 }
541
542 saddr = ipc.addr;
543 ipc.addr = faddr = daddr;
544
545 if (ipc.opt && ipc.opt->opt.srr) {
546 if (!daddr)
547 return -EINVAL;
548 faddr = ipc.opt->opt.faddr;
549 }
550 tos = RT_TOS(inet->tos);
551 if (sock_flag(sk, SOCK_LOCALROUTE) ||
552 (msg->msg_flags & MSG_DONTROUTE) ||
553 (ipc.opt && ipc.opt->opt.is_strictroute)) {
554 tos |= RTO_ONLINK;
555 }
556
557 if (ipv4_is_multicast(daddr)) {
558 if (!ipc.oif)
559 ipc.oif = inet->mc_index;
560 if (!saddr)
561 saddr = inet->mc_addr;
562 }
563
564 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
565 RT_SCOPE_UNIVERSE, sk->sk_protocol,
566 inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
567
568 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
569 rt = ip_route_output_flow(net, &fl4, sk);
570 if (IS_ERR(rt)) {
571 err = PTR_ERR(rt);
572 rt = NULL;
573 if (err == -ENETUNREACH)
574 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
575 goto out;
576 }
577
578 err = -EACCES;
579 if ((rt->rt_flags & RTCF_BROADCAST) &&
580 !sock_flag(sk, SOCK_BROADCAST))
581 goto out;
582
583 if (msg->msg_flags & MSG_CONFIRM)
584 goto do_confirm;
585back_from_confirm:
586
587 if (!ipc.addr)
588 ipc.addr = fl4.daddr;
589
590 lock_sock(sk);
591
592 pfh.icmph.type = user_icmph.type; /* already checked */
593 pfh.icmph.code = user_icmph.code; /* ditto */
594 pfh.icmph.checksum = 0;
595 pfh.icmph.un.echo.id = inet->inet_sport;
596 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
597 pfh.iov = msg->msg_iov;
598 pfh.wcheck = 0;
599
600 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
601 0, &ipc, &rt, msg->msg_flags);
602 if (err)
603 ip_flush_pending_frames(sk);
604 else
605 err = ping_push_pending_frames(sk, &pfh, &fl4);
606 release_sock(sk);
607
608out:
609 ip_rt_put(rt);
610 if (free)
611 kfree(ipc.opt);
612 if (!err) {
613 icmp_out_count(sock_net(sk), user_icmph.type);
614 return len;
615 }
616 return err;
617
618do_confirm:
619 dst_confirm(&rt->dst);
620 if (!(msg->msg_flags & MSG_PROBE) || len)
621 goto back_from_confirm;
622 err = 0;
623 goto out;
624}
625
626static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
627 size_t len, int noblock, int flags, int *addr_len)
628{
629 struct inet_sock *isk = inet_sk(sk);
630 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
631 struct sk_buff *skb;
632 int copied, err;
633
634 pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
635
636 if (flags & MSG_OOB)
637 goto out;
638
639 if (addr_len)
640 *addr_len = sizeof(*sin);
641
642 if (flags & MSG_ERRQUEUE)
643 return ip_recv_error(sk, msg, len);
644
645 skb = skb_recv_datagram(sk, flags, noblock, &err);
646 if (!skb)
647 goto out;
648
649 copied = skb->len;
650 if (copied > len) {
651 msg->msg_flags |= MSG_TRUNC;
652 copied = len;
653 }
654
655 /* Don't bother checking the checksum */
656 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
657 if (err)
658 goto done;
659
660 sock_recv_timestamp(msg, sk, skb);
661
662 /* Copy the address. */
663 if (sin) {
664 sin->sin_family = AF_INET;
665 sin->sin_port = 0 /* skb->h.uh->source */;
666 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
667 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
668 }
669 if (isk->cmsg_flags)
670 ip_cmsg_recv(msg, skb);
671 err = copied;
672
673done:
674 skb_free_datagram(sk, skb);
675out:
676 pr_debug("ping_recvmsg -> %d\n", err);
677 return err;
678}
679
680static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
681{
682 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
683 inet_sk(sk), inet_sk(sk)->inet_num, skb);
684 if (sock_queue_rcv_skb(sk, skb) < 0) {
685 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
686 kfree_skb(skb);
687 pr_debug("ping_queue_rcv_skb -> failed\n");
688 return -1;
689 }
690 return 0;
691}
692
693
694/*
695 * All we need to do is get the socket.
696 */
697
698void ping_rcv(struct sk_buff *skb)
699{
700 struct sock *sk;
701 struct net *net = dev_net(skb->dev);
702 struct iphdr *iph = ip_hdr(skb);
703 struct icmphdr *icmph = icmp_hdr(skb);
704 u32 saddr = iph->saddr;
705 u32 daddr = iph->daddr;
706
707 /* We assume the packet has already been checked by icmp_rcv */
708
709 pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
710 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
711
712 /* Push ICMP header back */
713 skb_push(skb, skb->data - (u8 *)icmph);
714
715 sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
716 skb->dev->ifindex);
717 if (sk != NULL) {
718 pr_debug("rcv on socket %p\n", sk);
719 ping_queue_rcv_skb(sk, skb_get(skb));
720 sock_put(sk);
721 return;
722 }
723 pr_debug("no socket, dropping\n");
724
725 /* We're called from icmp_rcv(). kfree_skb() is done there. */
726}
727
728struct proto ping_prot = {
729 .name = "PING",
730 .owner = THIS_MODULE,
731 .init = ping_init_sock,
732 .close = ping_close,
733 .connect = ip4_datagram_connect,
734 .disconnect = udp_disconnect,
735 .setsockopt = ip_setsockopt,
736 .getsockopt = ip_getsockopt,
737 .sendmsg = ping_sendmsg,
738 .recvmsg = ping_recvmsg,
739 .bind = ping_bind,
740 .backlog_rcv = ping_queue_rcv_skb,
741 .hash = ping_v4_hash,
742 .unhash = ping_v4_unhash,
743 .get_port = ping_v4_get_port,
744 .obj_size = sizeof(struct inet_sock),
745};
746EXPORT_SYMBOL(ping_prot);
747
748#ifdef CONFIG_PROC_FS
749
750static struct sock *ping_get_first(struct seq_file *seq, int start)
751{
752 struct sock *sk;
753 struct ping_iter_state *state = seq->private;
754 struct net *net = seq_file_net(seq);
755
756 for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
757 ++state->bucket) {
758 struct hlist_nulls_node *node;
759 struct hlist_nulls_head *hslot;
760
761 hslot = &ping_table.hash[state->bucket];
762
763 if (hlist_nulls_empty(hslot))
764 continue;
765
766 sk_nulls_for_each(sk, node, hslot) {
767 if (net_eq(sock_net(sk), net))
768 goto found;
769 }
770 }
771 sk = NULL;
772found:
773 return sk;
774}
775
776static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
777{
778 struct ping_iter_state *state = seq->private;
779 struct net *net = seq_file_net(seq);
780
781 do {
782 sk = sk_nulls_next(sk);
783 } while (sk && (!net_eq(sock_net(sk), net)));
784
785 if (!sk)
786 return ping_get_first(seq, state->bucket + 1);
787 return sk;
788}
789
790static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
791{
792 struct sock *sk = ping_get_first(seq, 0);
793
794 if (sk)
795 while (pos && (sk = ping_get_next(seq, sk)) != NULL)
796 --pos;
797 return pos ? NULL : sk;
798}
799
800static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
801{
802 struct ping_iter_state *state = seq->private;
803 state->bucket = 0;
804
805 read_lock_bh(&ping_table.lock);
806
807 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
808}
809
810static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
811{
812 struct sock *sk;
813
814 if (v == SEQ_START_TOKEN)
815 sk = ping_get_idx(seq, 0);
816 else
817 sk = ping_get_next(seq, v);
818
819 ++*pos;
820 return sk;
821}
822
823static void ping_seq_stop(struct seq_file *seq, void *v)
824{
825 read_unlock_bh(&ping_table.lock);
826}
827
828static void ping_format_sock(struct sock *sp, struct seq_file *f,
829 int bucket, int *len)
830{
831 struct inet_sock *inet = inet_sk(sp);
832 __be32 dest = inet->inet_daddr;
833 __be32 src = inet->inet_rcv_saddr;
834 __u16 destp = ntohs(inet->inet_dport);
835 __u16 srcp = ntohs(inet->inet_sport);
836
837 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
838 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
839 bucket, src, srcp, dest, destp, sp->sk_state,
840 sk_wmem_alloc_get(sp),
841 sk_rmem_alloc_get(sp),
842 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
843 atomic_read(&sp->sk_refcnt), sp,
844 atomic_read(&sp->sk_drops), len);
845}
846
847static int ping_seq_show(struct seq_file *seq, void *v)
848{
849 if (v == SEQ_START_TOKEN)
850 seq_printf(seq, "%-127s\n",
851 " sl local_address rem_address st tx_queue "
852 "rx_queue tr tm->when retrnsmt uid timeout "
853 "inode ref pointer drops");
854 else {
855 struct ping_iter_state *state = seq->private;
856 int len;
857
858 ping_format_sock(v, seq, state->bucket, &len);
859 seq_printf(seq, "%*s\n", 127 - len, "");
860 }
861 return 0;
862}
863
864static const struct seq_operations ping_seq_ops = {
865 .show = ping_seq_show,
866 .start = ping_seq_start,
867 .next = ping_seq_next,
868 .stop = ping_seq_stop,
869};
870
871static int ping_seq_open(struct inode *inode, struct file *file)
872{
873 return seq_open_net(inode, file, &ping_seq_ops,
874 sizeof(struct ping_iter_state));
875}
876
877static const struct file_operations ping_seq_fops = {
878 .open = ping_seq_open,
879 .read = seq_read,
880 .llseek = seq_lseek,
881 .release = seq_release_net,
882};
883
884static int ping_proc_register(struct net *net)
885{
886 struct proc_dir_entry *p;
887 int rc = 0;
888
889 p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
890 if (!p)
891 rc = -ENOMEM;
892 return rc;
893}
894
895static void ping_proc_unregister(struct net *net)
896{
897 proc_net_remove(net, "icmp");
898}
899
900
901static int __net_init ping_proc_init_net(struct net *net)
902{
903 return ping_proc_register(net);
904}
905
906static void __net_exit ping_proc_exit_net(struct net *net)
907{
908 ping_proc_unregister(net);
909}
910
911static struct pernet_operations ping_net_ops = {
912 .init = ping_proc_init_net,
913 .exit = ping_proc_exit_net,
914};
915
916int __init ping_proc_init(void)
917{
918 return register_pernet_subsys(&ping_net_ops);
919}
920
921void ping_proc_exit(void)
922{
923 unregister_pernet_subsys(&ping_net_ops);
924}
925
926#endif
927
928void __init ping_init(void)
929{
930 int i;
931
932 for (i = 0; i < PING_HTABLE_SIZE; i++)
933 INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
934 rwlock_init(&ping_table.lock);
935}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bceaec42c37d..11e1780455f2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -154,7 +154,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
154 * RFC 1122: SHOULD pass TOS value up to the transport layer. 154 * RFC 1122: SHOULD pass TOS value up to the transport layer.
155 * -> It does. And not only TOS, but all IP header. 155 * -> It does. And not only TOS, but all IP header.
156 */ 156 */
157static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) 157static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
158{ 158{
159 struct sock *sk; 159 struct sock *sk;
160 struct hlist_head *head; 160 struct hlist_head *head;
@@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
247 } 247 }
248 248
249 if (inet->recverr) { 249 if (inet->recverr) {
250 struct iphdr *iph = (struct iphdr *)skb->data; 250 const struct iphdr *iph = (const struct iphdr *)skb->data;
251 u8 *payload = skb->data + (iph->ihl << 2); 251 u8 *payload = skb->data + (iph->ihl << 2);
252 252
253 if (inet->hdrincl) 253 if (inet->hdrincl)
@@ -265,7 +265,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
265{ 265{
266 int hash; 266 int hash;
267 struct sock *raw_sk; 267 struct sock *raw_sk;
268 struct iphdr *iph; 268 const struct iphdr *iph;
269 struct net *net; 269 struct net *net;
270 270
271 hash = protocol & (RAW_HTABLE_SIZE - 1); 271 hash = protocol & (RAW_HTABLE_SIZE - 1);
@@ -273,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
273 read_lock(&raw_v4_hashinfo.lock); 273 read_lock(&raw_v4_hashinfo.lock);
274 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); 274 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
275 if (raw_sk != NULL) { 275 if (raw_sk != NULL) {
276 iph = (struct iphdr *)skb->data; 276 iph = (const struct iphdr *)skb->data;
277 net = dev_net(skb->dev); 277 net = dev_net(skb->dev);
278 278
279 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, 279 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
@@ -281,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
281 skb->dev->ifindex)) != NULL) { 281 skb->dev->ifindex)) != NULL) {
282 raw_err(raw_sk, skb, info); 282 raw_err(raw_sk, skb, info);
283 raw_sk = sk_next(raw_sk); 283 raw_sk = sk_next(raw_sk);
284 iph = (struct iphdr *)skb->data; 284 iph = (const struct iphdr *)skb->data;
285 } 285 }
286 } 286 }
287 read_unlock(&raw_v4_hashinfo.lock); 287 read_unlock(&raw_v4_hashinfo.lock);
@@ -314,9 +314,10 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
314 return 0; 314 return 0;
315} 315}
316 316
317static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, 317static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
318 struct rtable **rtp, 318 void *from, size_t length,
319 unsigned int flags) 319 struct rtable **rtp,
320 unsigned int flags)
320{ 321{
321 struct inet_sock *inet = inet_sk(sk); 322 struct inet_sock *inet = inet_sk(sk);
322 struct net *net = sock_net(sk); 323 struct net *net = sock_net(sk);
@@ -327,7 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
327 struct rtable *rt = *rtp; 328 struct rtable *rt = *rtp;
328 329
329 if (length > rt->dst.dev->mtu) { 330 if (length > rt->dst.dev->mtu) {
330 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 331 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
331 rt->dst.dev->mtu); 332 rt->dst.dev->mtu);
332 return -EMSGSIZE; 333 return -EMSGSIZE;
333 } 334 }
@@ -372,7 +373,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
372 373
373 if (iphlen >= sizeof(*iph)) { 374 if (iphlen >= sizeof(*iph)) {
374 if (!iph->saddr) 375 if (!iph->saddr)
375 iph->saddr = rt->rt_src; 376 iph->saddr = fl4->saddr;
376 iph->check = 0; 377 iph->check = 0;
377 iph->tot_len = htons(length); 378 iph->tot_len = htons(length);
378 if (!iph->id) 379 if (!iph->id)
@@ -455,11 +456,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
455 struct inet_sock *inet = inet_sk(sk); 456 struct inet_sock *inet = inet_sk(sk);
456 struct ipcm_cookie ipc; 457 struct ipcm_cookie ipc;
457 struct rtable *rt = NULL; 458 struct rtable *rt = NULL;
459 struct flowi4 fl4;
458 int free = 0; 460 int free = 0;
459 __be32 daddr; 461 __be32 daddr;
460 __be32 saddr; 462 __be32 saddr;
461 u8 tos; 463 u8 tos;
462 int err; 464 int err;
465 struct ip_options_data opt_copy;
463 466
464 err = -EMSGSIZE; 467 err = -EMSGSIZE;
465 if (len > 0xFFFF) 468 if (len > 0xFFFF)
@@ -520,8 +523,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
520 saddr = ipc.addr; 523 saddr = ipc.addr;
521 ipc.addr = daddr; 524 ipc.addr = daddr;
522 525
523 if (!ipc.opt) 526 if (!ipc.opt) {
524 ipc.opt = inet->opt; 527 struct ip_options_rcu *inet_opt;
528
529 rcu_read_lock();
530 inet_opt = rcu_dereference(inet->inet_opt);
531 if (inet_opt) {
532 memcpy(&opt_copy, inet_opt,
533 sizeof(*inet_opt) + inet_opt->opt.optlen);
534 ipc.opt = &opt_copy.opt;
535 }
536 rcu_read_unlock();
537 }
525 538
526 if (ipc.opt) { 539 if (ipc.opt) {
527 err = -EINVAL; 540 err = -EINVAL;
@@ -530,10 +543,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
530 */ 543 */
531 if (inet->hdrincl) 544 if (inet->hdrincl)
532 goto done; 545 goto done;
533 if (ipc.opt->srr) { 546 if (ipc.opt->opt.srr) {
534 if (!daddr) 547 if (!daddr)
535 goto done; 548 goto done;
536 daddr = ipc.opt->faddr; 549 daddr = ipc.opt->opt.faddr;
537 } 550 }
538 } 551 }
539 tos = RT_CONN_FLAGS(sk); 552 tos = RT_CONN_FLAGS(sk);
@@ -547,31 +560,23 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
547 saddr = inet->mc_addr; 560 saddr = inet->mc_addr;
548 } 561 }
549 562
550 { 563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
551 struct flowi4 fl4 = { 564 RT_SCOPE_UNIVERSE,
552 .flowi4_oif = ipc.oif, 565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
553 .flowi4_mark = sk->sk_mark, 566 FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
554 .daddr = daddr,
555 .saddr = saddr,
556 .flowi4_tos = tos,
557 .flowi4_proto = (inet->hdrincl ?
558 IPPROTO_RAW :
559 sk->sk_protocol),
560 .flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
561 };
562 if (!inet->hdrincl) {
563 err = raw_probe_proto_opt(&fl4, msg);
564 if (err)
565 goto done;
566 }
567 567
568 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 568 if (!inet->hdrincl) {
569 rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 569 err = raw_probe_proto_opt(&fl4, msg);
570 if (IS_ERR(rt)) { 570 if (err)
571 err = PTR_ERR(rt);
572 rt = NULL;
573 goto done; 571 goto done;
574 } 572 }
573
574 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
575 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
576 if (IS_ERR(rt)) {
577 err = PTR_ERR(rt);
578 rt = NULL;
579 goto done;
575 } 580 }
576 581
577 err = -EACCES; 582 err = -EACCES;
@@ -583,19 +588,20 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
583back_from_confirm: 588back_from_confirm:
584 589
585 if (inet->hdrincl) 590 if (inet->hdrincl)
586 err = raw_send_hdrinc(sk, msg->msg_iov, len, 591 err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
587 &rt, msg->msg_flags); 592 &rt, msg->msg_flags);
588 593
589 else { 594 else {
590 if (!ipc.addr) 595 if (!ipc.addr)
591 ipc.addr = rt->rt_dst; 596 ipc.addr = fl4.daddr;
592 lock_sock(sk); 597 lock_sock(sk);
593 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, 598 err = ip_append_data(sk, &fl4, ip_generic_getfrag,
594 &ipc, &rt, msg->msg_flags); 599 msg->msg_iov, len, 0,
600 &ipc, &rt, msg->msg_flags);
595 if (err) 601 if (err)
596 ip_flush_pending_frames(sk); 602 ip_flush_pending_frames(sk);
597 else if (!(msg->msg_flags & MSG_MORE)) { 603 else if (!(msg->msg_flags & MSG_MORE)) {
598 err = ip_push_pending_frames(sk); 604 err = ip_push_pending_frames(sk, &fl4);
599 if (err == -ENOBUFS && !inet->recverr) 605 if (err == -ENOBUFS && !inet->recverr)
600 err = 0; 606 err = 0;
601 } 607 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 99e6e4bb1c72..b24d58e6bbcd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -156,7 +156,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
156 u32 *p = NULL; 156 u32 *p = NULL;
157 157
158 if (!rt->peer) 158 if (!rt->peer)
159 rt_bind_peer(rt, 1); 159 rt_bind_peer(rt, rt->rt_dst, 1);
160 160
161 peer = rt->peer; 161 peer = rt->peer;
162 if (peer) { 162 if (peer) {
@@ -424,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
424 dst_metric(&r->dst, RTAX_WINDOW), 424 dst_metric(&r->dst, RTAX_WINDOW),
425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
426 dst_metric(&r->dst, RTAX_RTTVAR)), 426 dst_metric(&r->dst, RTAX_RTTVAR)),
427 r->rt_tos, 427 r->rt_key_tos,
428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
429 r->dst.hh ? (r->dst.hh->hh_output == 429 r->dst.hh ? (r->dst.hh->hh_output ==
430 dev_queue_xmit) : 0, 430 dev_queue_xmit) : 0,
@@ -724,7 +724,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | 724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | 725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
726 (rt1->rt_mark ^ rt2->rt_mark) | 726 (rt1->rt_mark ^ rt2->rt_mark) |
727 (rt1->rt_tos ^ rt2->rt_tos) | 727 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
728 (rt1->rt_oif ^ rt2->rt_oif) | 728 (rt1->rt_oif ^ rt2->rt_oif) |
729 (rt1->rt_iif ^ rt2->rt_iif)) == 0; 729 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
730} 730}
@@ -968,10 +968,6 @@ static int rt_garbage_collect(struct dst_ops *ops)
968 break; 968 break;
969 969
970 expire >>= 1; 970 expire >>= 1;
971#if RT_CACHE_DEBUG >= 2
972 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
973 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
974#endif
975 971
976 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) 972 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
977 goto out; 973 goto out;
@@ -992,10 +988,6 @@ work_done:
992 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || 988 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
993 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) 989 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
994 expire = ip_rt_gc_timeout; 990 expire = ip_rt_gc_timeout;
995#if RT_CACHE_DEBUG >= 2
996 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
997 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
998#endif
999out: return 0; 991out: return 0;
1000} 992}
1001 993
@@ -1179,16 +1171,6 @@ restart:
1179 1171
1180 rt->dst.rt_next = rt_hash_table[hash].chain; 1172 rt->dst.rt_next = rt_hash_table[hash].chain;
1181 1173
1182#if RT_CACHE_DEBUG >= 2
1183 if (rt->dst.rt_next) {
1184 struct rtable *trt;
1185 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1186 hash, &rt->rt_dst);
1187 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1188 printk(" . %pI4", &trt->rt_dst);
1189 printk("\n");
1190 }
1191#endif
1192 /* 1174 /*
1193 * Since lookup is lockfree, we must make sure 1175 * Since lookup is lockfree, we must make sure
1194 * previous writes to rt are committed to memory 1176 * previous writes to rt are committed to memory
@@ -1211,11 +1193,11 @@ static u32 rt_peer_genid(void)
1211 return atomic_read(&__rt_peer_genid); 1193 return atomic_read(&__rt_peer_genid);
1212} 1194}
1213 1195
1214void rt_bind_peer(struct rtable *rt, int create) 1196void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
1215{ 1197{
1216 struct inet_peer *peer; 1198 struct inet_peer *peer;
1217 1199
1218 peer = inet_getpeer_v4(rt->rt_dst, create); 1200 peer = inet_getpeer_v4(daddr, create);
1219 1201
1220 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1202 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1221 inet_putpeer(peer); 1203 inet_putpeer(peer);
@@ -1249,7 +1231,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1249 1231
1250 if (rt) { 1232 if (rt) {
1251 if (rt->peer == NULL) 1233 if (rt->peer == NULL)
1252 rt_bind_peer(rt, 1); 1234 rt_bind_peer(rt, rt->rt_dst, 1);
1253 1235
1254 /* If peer is attached to destination, it is never detached, 1236 /* If peer is attached to destination, it is never detached,
1255 so that we need not to grab a lock to dereference it. 1237 so that we need not to grab a lock to dereference it.
@@ -1347,10 +1329,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1347 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, 1329 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1348 rt->rt_oif, 1330 rt->rt_oif,
1349 rt_genid(dev_net(dst->dev))); 1331 rt_genid(dev_net(dst->dev)));
1350#if RT_CACHE_DEBUG >= 1
1351 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1352 &rt->rt_dst, rt->rt_tos);
1353#endif
1354 rt_del(hash, rt); 1332 rt_del(hash, rt);
1355 ret = NULL; 1333 ret = NULL;
1356 } else if (rt->peer && 1334 } else if (rt->peer &&
@@ -1399,7 +1377,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1399 rcu_read_unlock(); 1377 rcu_read_unlock();
1400 1378
1401 if (!rt->peer) 1379 if (!rt->peer)
1402 rt_bind_peer(rt, 1); 1380 rt_bind_peer(rt, rt->rt_dst, 1);
1403 peer = rt->peer; 1381 peer = rt->peer;
1404 if (!peer) { 1382 if (!peer) {
1405 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1383 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
@@ -1435,7 +1413,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1435 peer->rate_tokens == ip_rt_redirect_number && 1413 peer->rate_tokens == ip_rt_redirect_number &&
1436 net_ratelimit()) 1414 net_ratelimit())
1437 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1415 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1438 &rt->rt_src, rt->rt_iif, 1416 &ip_hdr(skb)->saddr, rt->rt_iif,
1439 &rt->rt_dst, &rt->rt_gateway); 1417 &rt->rt_dst, &rt->rt_gateway);
1440#endif 1418#endif
1441 } 1419 }
@@ -1467,7 +1445,7 @@ static int ip_error(struct sk_buff *skb)
1467 } 1445 }
1468 1446
1469 if (!rt->peer) 1447 if (!rt->peer)
1470 rt_bind_peer(rt, 1); 1448 rt_bind_peer(rt, rt->rt_dst, 1);
1471 peer = rt->peer; 1449 peer = rt->peer;
1472 1450
1473 send = true; 1451 send = true;
@@ -1507,7 +1485,7 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
1507 return 68; 1485 return 68;
1508} 1486}
1509 1487
1510unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, 1488unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1511 unsigned short new_mtu, 1489 unsigned short new_mtu,
1512 struct net_device *dev) 1490 struct net_device *dev)
1513{ 1491{
@@ -1574,7 +1552,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1574 dst_confirm(dst); 1552 dst_confirm(dst);
1575 1553
1576 if (!rt->peer) 1554 if (!rt->peer)
1577 rt_bind_peer(rt, 1); 1555 rt_bind_peer(rt, rt->rt_dst, 1);
1578 peer = rt->peer; 1556 peer = rt->peer;
1579 if (peer) { 1557 if (peer) {
1580 if (mtu < ip_rt_min_pmtu) 1558 if (mtu < ip_rt_min_pmtu)
@@ -1631,7 +1609,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1631 struct inet_peer *peer; 1609 struct inet_peer *peer;
1632 1610
1633 if (!rt->peer) 1611 if (!rt->peer)
1634 rt_bind_peer(rt, 0); 1612 rt_bind_peer(rt, rt->rt_dst, 0);
1635 1613
1636 peer = rt->peer; 1614 peer = rt->peer;
1637 if (peer && peer->pmtu_expires) 1615 if (peer && peer->pmtu_expires)
@@ -1699,22 +1677,26 @@ static int ip_rt_bug(struct sk_buff *skb)
1699 in IP options! 1677 in IP options!
1700 */ 1678 */
1701 1679
1702void ip_rt_get_source(u8 *addr, struct rtable *rt) 1680void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1703{ 1681{
1704 __be32 src; 1682 __be32 src;
1705 struct fib_result res;
1706 1683
1707 if (rt_is_output_route(rt)) 1684 if (rt_is_output_route(rt))
1708 src = rt->rt_src; 1685 src = ip_hdr(skb)->saddr;
1709 else { 1686 else {
1710 struct flowi4 fl4 = { 1687 struct fib_result res;
1711 .daddr = rt->rt_key_dst, 1688 struct flowi4 fl4;
1712 .saddr = rt->rt_key_src, 1689 struct iphdr *iph;
1713 .flowi4_tos = rt->rt_tos, 1690
1714 .flowi4_oif = rt->rt_oif, 1691 iph = ip_hdr(skb);
1715 .flowi4_iif = rt->rt_iif, 1692
1716 .flowi4_mark = rt->rt_mark, 1693 memset(&fl4, 0, sizeof(fl4));
1717 }; 1694 fl4.daddr = iph->daddr;
1695 fl4.saddr = iph->saddr;
1696 fl4.flowi4_tos = iph->tos;
1697 fl4.flowi4_oif = rt->dst.dev->ifindex;
1698 fl4.flowi4_iif = skb->dev->ifindex;
1699 fl4.flowi4_mark = skb->mark;
1718 1700
1719 rcu_read_lock(); 1701 rcu_read_lock();
1720 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) 1702 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
@@ -1767,7 +1749,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1767 return mtu; 1749 return mtu;
1768} 1750}
1769 1751
1770static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, 1752static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1771 struct fib_info *fi) 1753 struct fib_info *fi)
1772{ 1754{
1773 struct inet_peer *peer; 1755 struct inet_peer *peer;
@@ -1776,7 +1758,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1776 /* If a peer entry exists for this destination, we must hook 1758 /* If a peer entry exists for this destination, we must hook
1777 * it up in order to get at cached metrics. 1759 * it up in order to get at cached metrics.
1778 */ 1760 */
1779 if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) 1761 if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1780 create = 1; 1762 create = 1;
1781 1763
1782 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); 1764 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
@@ -1803,7 +1785,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1803 } 1785 }
1804} 1786}
1805 1787
1806static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, 1788static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1807 const struct fib_result *res, 1789 const struct fib_result *res,
1808 struct fib_info *fi, u16 type, u32 itag) 1790 struct fib_info *fi, u16 type, u32 itag)
1809{ 1791{
@@ -1813,7 +1795,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1813 if (FIB_RES_GW(*res) && 1795 if (FIB_RES_GW(*res) &&
1814 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1796 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1815 rt->rt_gateway = FIB_RES_GW(*res); 1797 rt->rt_gateway = FIB_RES_GW(*res);
1816 rt_init_metrics(rt, oldflp4, fi); 1798 rt_init_metrics(rt, fl4, fi);
1817#ifdef CONFIG_IP_ROUTE_CLASSID 1799#ifdef CONFIG_IP_ROUTE_CLASSID
1818 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1800 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1819#endif 1801#endif
@@ -1830,20 +1812,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1830#endif 1812#endif
1831 set_class_tag(rt, itag); 1813 set_class_tag(rt, itag);
1832#endif 1814#endif
1833 rt->rt_type = type;
1834} 1815}
1835 1816
1836static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) 1817static struct rtable *rt_dst_alloc(struct net_device *dev,
1818 bool nopolicy, bool noxfrm)
1837{ 1819{
1838 struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); 1820 return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
1839 if (rt) { 1821 DST_HOST |
1840 rt->dst.obsolete = -1; 1822 (nopolicy ? DST_NOPOLICY : 0) |
1841 1823 (noxfrm ? DST_NOXFRM : 0));
1842 rt->dst.flags = DST_HOST |
1843 (nopolicy ? DST_NOPOLICY : 0) |
1844 (noxfrm ? DST_NOXFRM : 0);
1845 }
1846 return rt;
1847} 1824}
1848 1825
1849/* called in rcu_read_lock() section */ 1826/* called in rcu_read_lock() section */
@@ -1871,36 +1848,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1871 goto e_inval; 1848 goto e_inval;
1872 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1849 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1873 } else { 1850 } else {
1874 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1851 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
1875 &itag, 0); 1852 &itag);
1876 if (err < 0) 1853 if (err < 0)
1877 goto e_err; 1854 goto e_err;
1878 } 1855 }
1879 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); 1856 rth = rt_dst_alloc(init_net.loopback_dev,
1857 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1880 if (!rth) 1858 if (!rth)
1881 goto e_nobufs; 1859 goto e_nobufs;
1882 1860
1861#ifdef CONFIG_IP_ROUTE_CLASSID
1862 rth->dst.tclassid = itag;
1863#endif
1883 rth->dst.output = ip_rt_bug; 1864 rth->dst.output = ip_rt_bug;
1884 1865
1885 rth->rt_key_dst = daddr; 1866 rth->rt_key_dst = daddr;
1886 rth->rt_dst = daddr;
1887 rth->rt_tos = tos;
1888 rth->rt_mark = skb->mark;
1889 rth->rt_key_src = saddr; 1867 rth->rt_key_src = saddr;
1868 rth->rt_genid = rt_genid(dev_net(dev));
1869 rth->rt_flags = RTCF_MULTICAST;
1870 rth->rt_type = RTN_MULTICAST;
1871 rth->rt_key_tos = tos;
1872 rth->rt_dst = daddr;
1890 rth->rt_src = saddr; 1873 rth->rt_src = saddr;
1891#ifdef CONFIG_IP_ROUTE_CLASSID
1892 rth->dst.tclassid = itag;
1893#endif
1894 rth->rt_route_iif = dev->ifindex; 1874 rth->rt_route_iif = dev->ifindex;
1895 rth->rt_iif = dev->ifindex; 1875 rth->rt_iif = dev->ifindex;
1896 rth->dst.dev = init_net.loopback_dev;
1897 dev_hold(rth->dst.dev);
1898 rth->rt_oif = 0; 1876 rth->rt_oif = 0;
1877 rth->rt_mark = skb->mark;
1899 rth->rt_gateway = daddr; 1878 rth->rt_gateway = daddr;
1900 rth->rt_spec_dst= spec_dst; 1879 rth->rt_spec_dst= spec_dst;
1901 rth->rt_genid = rt_genid(dev_net(dev)); 1880 rth->rt_peer_genid = 0;
1902 rth->rt_flags = RTCF_MULTICAST; 1881 rth->peer = NULL;
1903 rth->rt_type = RTN_MULTICAST; 1882 rth->fi = NULL;
1904 if (our) { 1883 if (our) {
1905 rth->dst.input= ip_local_deliver; 1884 rth->dst.input= ip_local_deliver;
1906 rth->rt_flags |= RTCF_LOCAL; 1885 rth->rt_flags |= RTCF_LOCAL;
@@ -1981,8 +1960,8 @@ static int __mkroute_input(struct sk_buff *skb,
1981 } 1960 }
1982 1961
1983 1962
1984 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 1963 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1985 in_dev->dev, &spec_dst, &itag, skb->mark); 1964 in_dev->dev, &spec_dst, &itag);
1986 if (err < 0) { 1965 if (err < 0) {
1987 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1966 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1988 saddr); 1967 saddr);
@@ -2013,7 +1992,8 @@ static int __mkroute_input(struct sk_buff *skb,
2013 } 1992 }
2014 } 1993 }
2015 1994
2016 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), 1995 rth = rt_dst_alloc(out_dev->dev,
1996 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2017 IN_DEV_CONF_GET(out_dev, NOXFRM)); 1997 IN_DEV_CONF_GET(out_dev, NOXFRM));
2018 if (!rth) { 1998 if (!rth) {
2019 err = -ENOBUFS; 1999 err = -ENOBUFS;
@@ -2021,27 +2001,28 @@ static int __mkroute_input(struct sk_buff *skb,
2021 } 2001 }
2022 2002
2023 rth->rt_key_dst = daddr; 2003 rth->rt_key_dst = daddr;
2024 rth->rt_dst = daddr;
2025 rth->rt_tos = tos;
2026 rth->rt_mark = skb->mark;
2027 rth->rt_key_src = saddr; 2004 rth->rt_key_src = saddr;
2005 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2006 rth->rt_flags = flags;
2007 rth->rt_type = res->type;
2008 rth->rt_key_tos = tos;
2009 rth->rt_dst = daddr;
2028 rth->rt_src = saddr; 2010 rth->rt_src = saddr;
2029 rth->rt_gateway = daddr;
2030 rth->rt_route_iif = in_dev->dev->ifindex; 2011 rth->rt_route_iif = in_dev->dev->ifindex;
2031 rth->rt_iif = in_dev->dev->ifindex; 2012 rth->rt_iif = in_dev->dev->ifindex;
2032 rth->dst.dev = (out_dev)->dev;
2033 dev_hold(rth->dst.dev);
2034 rth->rt_oif = 0; 2013 rth->rt_oif = 0;
2014 rth->rt_mark = skb->mark;
2015 rth->rt_gateway = daddr;
2035 rth->rt_spec_dst= spec_dst; 2016 rth->rt_spec_dst= spec_dst;
2017 rth->rt_peer_genid = 0;
2018 rth->peer = NULL;
2019 rth->fi = NULL;
2036 2020
2037 rth->dst.input = ip_forward; 2021 rth->dst.input = ip_forward;
2038 rth->dst.output = ip_output; 2022 rth->dst.output = ip_output;
2039 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2040 2023
2041 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); 2024 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2042 2025
2043 rth->rt_flags = flags;
2044
2045 *result = rth; 2026 *result = rth;
2046 err = 0; 2027 err = 0;
2047 cleanup: 2028 cleanup:
@@ -2150,9 +2131,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2150 goto brd_input; 2131 goto brd_input;
2151 2132
2152 if (res.type == RTN_LOCAL) { 2133 if (res.type == RTN_LOCAL) {
2153 err = fib_validate_source(saddr, daddr, tos, 2134 err = fib_validate_source(skb, saddr, daddr, tos,
2154 net->loopback_dev->ifindex, 2135 net->loopback_dev->ifindex,
2155 dev, &spec_dst, &itag, skb->mark); 2136 dev, &spec_dst, &itag);
2156 if (err < 0) 2137 if (err < 0)
2157 goto martian_source_keep_err; 2138 goto martian_source_keep_err;
2158 if (err) 2139 if (err)
@@ -2176,8 +2157,8 @@ brd_input:
2176 if (ipv4_is_zeronet(saddr)) 2157 if (ipv4_is_zeronet(saddr))
2177 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 2158 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2178 else { 2159 else {
2179 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2160 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
2180 &itag, skb->mark); 2161 &itag);
2181 if (err < 0) 2162 if (err < 0)
2182 goto martian_source_keep_err; 2163 goto martian_source_keep_err;
2183 if (err) 2164 if (err)
@@ -2188,36 +2169,42 @@ brd_input:
2188 RT_CACHE_STAT_INC(in_brd); 2169 RT_CACHE_STAT_INC(in_brd);
2189 2170
2190local_input: 2171local_input:
2191 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); 2172 rth = rt_dst_alloc(net->loopback_dev,
2173 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2192 if (!rth) 2174 if (!rth)
2193 goto e_nobufs; 2175 goto e_nobufs;
2194 2176
2177 rth->dst.input= ip_local_deliver;
2195 rth->dst.output= ip_rt_bug; 2178 rth->dst.output= ip_rt_bug;
2196 rth->rt_genid = rt_genid(net); 2179#ifdef CONFIG_IP_ROUTE_CLASSID
2180 rth->dst.tclassid = itag;
2181#endif
2197 2182
2198 rth->rt_key_dst = daddr; 2183 rth->rt_key_dst = daddr;
2199 rth->rt_dst = daddr;
2200 rth->rt_tos = tos;
2201 rth->rt_mark = skb->mark;
2202 rth->rt_key_src = saddr; 2184 rth->rt_key_src = saddr;
2185 rth->rt_genid = rt_genid(net);
2186 rth->rt_flags = flags|RTCF_LOCAL;
2187 rth->rt_type = res.type;
2188 rth->rt_key_tos = tos;
2189 rth->rt_dst = daddr;
2203 rth->rt_src = saddr; 2190 rth->rt_src = saddr;
2204#ifdef CONFIG_IP_ROUTE_CLASSID 2191#ifdef CONFIG_IP_ROUTE_CLASSID
2205 rth->dst.tclassid = itag; 2192 rth->dst.tclassid = itag;
2206#endif 2193#endif
2207 rth->rt_route_iif = dev->ifindex; 2194 rth->rt_route_iif = dev->ifindex;
2208 rth->rt_iif = dev->ifindex; 2195 rth->rt_iif = dev->ifindex;
2209 rth->dst.dev = net->loopback_dev; 2196 rth->rt_oif = 0;
2210 dev_hold(rth->dst.dev); 2197 rth->rt_mark = skb->mark;
2211 rth->rt_gateway = daddr; 2198 rth->rt_gateway = daddr;
2212 rth->rt_spec_dst= spec_dst; 2199 rth->rt_spec_dst= spec_dst;
2213 rth->dst.input= ip_local_deliver; 2200 rth->rt_peer_genid = 0;
2214 rth->rt_flags = flags|RTCF_LOCAL; 2201 rth->peer = NULL;
2202 rth->fi = NULL;
2215 if (res.type == RTN_UNREACHABLE) { 2203 if (res.type == RTN_UNREACHABLE) {
2216 rth->dst.input= ip_error; 2204 rth->dst.input= ip_error;
2217 rth->dst.error= -err; 2205 rth->dst.error= -err;
2218 rth->rt_flags &= ~RTCF_LOCAL; 2206 rth->rt_flags &= ~RTCF_LOCAL;
2219 } 2207 }
2220 rth->rt_type = res.type;
2221 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); 2208 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2222 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); 2209 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2223 err = 0; 2210 err = 0;
@@ -2288,7 +2275,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2288 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | 2275 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2289 (rth->rt_iif ^ iif) | 2276 (rth->rt_iif ^ iif) |
2290 rth->rt_oif | 2277 rth->rt_oif |
2291 (rth->rt_tos ^ tos)) == 0 && 2278 (rth->rt_key_tos ^ tos)) == 0 &&
2292 rth->rt_mark == skb->mark && 2279 rth->rt_mark == skb->mark &&
2293 net_eq(dev_net(rth->dst.dev), net) && 2280 net_eq(dev_net(rth->dst.dev), net) &&
2294 !rt_is_expired(rth)) { 2281 !rt_is_expired(rth)) {
@@ -2349,12 +2336,12 @@ EXPORT_SYMBOL(ip_route_input_common);
2349/* called with rcu_read_lock() */ 2336/* called with rcu_read_lock() */
2350static struct rtable *__mkroute_output(const struct fib_result *res, 2337static struct rtable *__mkroute_output(const struct fib_result *res,
2351 const struct flowi4 *fl4, 2338 const struct flowi4 *fl4,
2352 const struct flowi4 *oldflp4, 2339 __be32 orig_daddr, __be32 orig_saddr,
2353 struct net_device *dev_out, 2340 int orig_oif, struct net_device *dev_out,
2354 unsigned int flags) 2341 unsigned int flags)
2355{ 2342{
2356 struct fib_info *fi = res->fi; 2343 struct fib_info *fi = res->fi;
2357 u32 tos = RT_FL_TOS(oldflp4); 2344 u32 tos = RT_FL_TOS(fl4);
2358 struct in_device *in_dev; 2345 struct in_device *in_dev;
2359 u16 type = res->type; 2346 u16 type = res->type;
2360 struct rtable *rth; 2347 struct rtable *rth;
@@ -2381,8 +2368,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2381 fi = NULL; 2368 fi = NULL;
2382 } else if (type == RTN_MULTICAST) { 2369 } else if (type == RTN_MULTICAST) {
2383 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2370 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2384 if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, 2371 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2385 oldflp4->flowi4_proto)) 2372 fl4->flowi4_proto))
2386 flags &= ~RTCF_LOCAL; 2373 flags &= ~RTCF_LOCAL;
2387 /* If multicast route do not exist use 2374 /* If multicast route do not exist use
2388 * default one, but do not gateway in this case. 2375 * default one, but do not gateway in this case.
@@ -2392,29 +2379,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2392 fi = NULL; 2379 fi = NULL;
2393 } 2380 }
2394 2381
2395 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), 2382 rth = rt_dst_alloc(dev_out,
2383 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2396 IN_DEV_CONF_GET(in_dev, NOXFRM)); 2384 IN_DEV_CONF_GET(in_dev, NOXFRM));
2397 if (!rth) 2385 if (!rth)
2398 return ERR_PTR(-ENOBUFS); 2386 return ERR_PTR(-ENOBUFS);
2399 2387
2400 rth->rt_key_dst = oldflp4->daddr; 2388 rth->dst.output = ip_output;
2401 rth->rt_tos = tos; 2389
2402 rth->rt_key_src = oldflp4->saddr; 2390 rth->rt_key_dst = orig_daddr;
2403 rth->rt_oif = oldflp4->flowi4_oif; 2391 rth->rt_key_src = orig_saddr;
2404 rth->rt_mark = oldflp4->flowi4_mark; 2392 rth->rt_genid = rt_genid(dev_net(dev_out));
2393 rth->rt_flags = flags;
2394 rth->rt_type = type;
2395 rth->rt_key_tos = tos;
2405 rth->rt_dst = fl4->daddr; 2396 rth->rt_dst = fl4->daddr;
2406 rth->rt_src = fl4->saddr; 2397 rth->rt_src = fl4->saddr;
2407 rth->rt_route_iif = 0; 2398 rth->rt_route_iif = 0;
2408 rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; 2399 rth->rt_iif = orig_oif ? : dev_out->ifindex;
2409 /* get references to the devices that are to be hold by the routing 2400 rth->rt_oif = orig_oif;
2410 cache entry */ 2401 rth->rt_mark = fl4->flowi4_mark;
2411 rth->dst.dev = dev_out;
2412 dev_hold(dev_out);
2413 rth->rt_gateway = fl4->daddr; 2402 rth->rt_gateway = fl4->daddr;
2414 rth->rt_spec_dst= fl4->saddr; 2403 rth->rt_spec_dst= fl4->saddr;
2415 2404 rth->rt_peer_genid = 0;
2416 rth->dst.output=ip_output; 2405 rth->peer = NULL;
2417 rth->rt_genid = rt_genid(dev_net(dev_out)); 2406 rth->fi = NULL;
2418 2407
2419 RT_CACHE_STAT_INC(out_slow_tot); 2408 RT_CACHE_STAT_INC(out_slow_tot);
2420 2409
@@ -2432,7 +2421,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2432#ifdef CONFIG_IP_MROUTE 2421#ifdef CONFIG_IP_MROUTE
2433 if (type == RTN_MULTICAST) { 2422 if (type == RTN_MULTICAST) {
2434 if (IN_DEV_MFORWARD(in_dev) && 2423 if (IN_DEV_MFORWARD(in_dev) &&
2435 !ipv4_is_local_multicast(oldflp4->daddr)) { 2424 !ipv4_is_local_multicast(fl4->daddr)) {
2436 rth->dst.input = ip_mr_input; 2425 rth->dst.input = ip_mr_input;
2437 rth->dst.output = ip_mc_output; 2426 rth->dst.output = ip_mc_output;
2438 } 2427 }
@@ -2440,9 +2429,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2440#endif 2429#endif
2441 } 2430 }
2442 2431
2443 rt_set_nexthop(rth, oldflp4, res, fi, type, 0); 2432 rt_set_nexthop(rth, fl4, res, fi, type, 0);
2444 2433
2445 rth->rt_flags = flags;
2446 return rth; 2434 return rth;
2447} 2435}
2448 2436
@@ -2451,36 +2439,37 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2451 * called with rcu_read_lock(); 2439 * called with rcu_read_lock();
2452 */ 2440 */
2453 2441
2454static struct rtable *ip_route_output_slow(struct net *net, 2442static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2455 const struct flowi4 *oldflp4)
2456{ 2443{
2457 u32 tos = RT_FL_TOS(oldflp4);
2458 struct flowi4 fl4;
2459 struct fib_result res;
2460 unsigned int flags = 0;
2461 struct net_device *dev_out = NULL; 2444 struct net_device *dev_out = NULL;
2445 u32 tos = RT_FL_TOS(fl4);
2446 unsigned int flags = 0;
2447 struct fib_result res;
2462 struct rtable *rth; 2448 struct rtable *rth;
2449 __be32 orig_daddr;
2450 __be32 orig_saddr;
2451 int orig_oif;
2463 2452
2464 res.fi = NULL; 2453 res.fi = NULL;
2465#ifdef CONFIG_IP_MULTIPLE_TABLES 2454#ifdef CONFIG_IP_MULTIPLE_TABLES
2466 res.r = NULL; 2455 res.r = NULL;
2467#endif 2456#endif
2468 2457
2469 fl4.flowi4_oif = oldflp4->flowi4_oif; 2458 orig_daddr = fl4->daddr;
2470 fl4.flowi4_iif = net->loopback_dev->ifindex; 2459 orig_saddr = fl4->saddr;
2471 fl4.flowi4_mark = oldflp4->flowi4_mark; 2460 orig_oif = fl4->flowi4_oif;
2472 fl4.daddr = oldflp4->daddr; 2461
2473 fl4.saddr = oldflp4->saddr; 2462 fl4->flowi4_iif = net->loopback_dev->ifindex;
2474 fl4.flowi4_tos = tos & IPTOS_RT_MASK; 2463 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2475 fl4.flowi4_scope = ((tos & RTO_ONLINK) ? 2464 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2476 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); 2465 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2477 2466
2478 rcu_read_lock(); 2467 rcu_read_lock();
2479 if (oldflp4->saddr) { 2468 if (fl4->saddr) {
2480 rth = ERR_PTR(-EINVAL); 2469 rth = ERR_PTR(-EINVAL);
2481 if (ipv4_is_multicast(oldflp4->saddr) || 2470 if (ipv4_is_multicast(fl4->saddr) ||
2482 ipv4_is_lbcast(oldflp4->saddr) || 2471 ipv4_is_lbcast(fl4->saddr) ||
2483 ipv4_is_zeronet(oldflp4->saddr)) 2472 ipv4_is_zeronet(fl4->saddr))
2484 goto out; 2473 goto out;
2485 2474
2486 /* I removed check for oif == dev_out->oif here. 2475 /* I removed check for oif == dev_out->oif here.
@@ -2491,11 +2480,11 @@ static struct rtable *ip_route_output_slow(struct net *net,
2491 of another iface. --ANK 2480 of another iface. --ANK
2492 */ 2481 */
2493 2482
2494 if (oldflp4->flowi4_oif == 0 && 2483 if (fl4->flowi4_oif == 0 &&
2495 (ipv4_is_multicast(oldflp4->daddr) || 2484 (ipv4_is_multicast(fl4->daddr) ||
2496 ipv4_is_lbcast(oldflp4->daddr))) { 2485 ipv4_is_lbcast(fl4->daddr))) {
2497 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2486 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2498 dev_out = __ip_dev_find(net, oldflp4->saddr, false); 2487 dev_out = __ip_dev_find(net, fl4->saddr, false);
2499 if (dev_out == NULL) 2488 if (dev_out == NULL)
2500 goto out; 2489 goto out;
2501 2490
@@ -2514,20 +2503,20 @@ static struct rtable *ip_route_output_slow(struct net *net,
2514 Luckily, this hack is good workaround. 2503 Luckily, this hack is good workaround.
2515 */ 2504 */
2516 2505
2517 fl4.flowi4_oif = dev_out->ifindex; 2506 fl4->flowi4_oif = dev_out->ifindex;
2518 goto make_route; 2507 goto make_route;
2519 } 2508 }
2520 2509
2521 if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { 2510 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2522 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2511 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2523 if (!__ip_dev_find(net, oldflp4->saddr, false)) 2512 if (!__ip_dev_find(net, fl4->saddr, false))
2524 goto out; 2513 goto out;
2525 } 2514 }
2526 } 2515 }
2527 2516
2528 2517
2529 if (oldflp4->flowi4_oif) { 2518 if (fl4->flowi4_oif) {
2530 dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); 2519 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2531 rth = ERR_PTR(-ENODEV); 2520 rth = ERR_PTR(-ENODEV);
2532 if (dev_out == NULL) 2521 if (dev_out == NULL)
2533 goto out; 2522 goto out;
@@ -2537,37 +2526,37 @@ static struct rtable *ip_route_output_slow(struct net *net,
2537 rth = ERR_PTR(-ENETUNREACH); 2526 rth = ERR_PTR(-ENETUNREACH);
2538 goto out; 2527 goto out;
2539 } 2528 }
2540 if (ipv4_is_local_multicast(oldflp4->daddr) || 2529 if (ipv4_is_local_multicast(fl4->daddr) ||
2541 ipv4_is_lbcast(oldflp4->daddr)) { 2530 ipv4_is_lbcast(fl4->daddr)) {
2542 if (!fl4.saddr) 2531 if (!fl4->saddr)
2543 fl4.saddr = inet_select_addr(dev_out, 0, 2532 fl4->saddr = inet_select_addr(dev_out, 0,
2544 RT_SCOPE_LINK); 2533 RT_SCOPE_LINK);
2545 goto make_route; 2534 goto make_route;
2546 } 2535 }
2547 if (!fl4.saddr) { 2536 if (fl4->saddr) {
2548 if (ipv4_is_multicast(oldflp4->daddr)) 2537 if (ipv4_is_multicast(fl4->daddr))
2549 fl4.saddr = inet_select_addr(dev_out, 0, 2538 fl4->saddr = inet_select_addr(dev_out, 0,
2550 fl4.flowi4_scope); 2539 fl4->flowi4_scope);
2551 else if (!oldflp4->daddr) 2540 else if (!fl4->daddr)
2552 fl4.saddr = inet_select_addr(dev_out, 0, 2541 fl4->saddr = inet_select_addr(dev_out, 0,
2553 RT_SCOPE_HOST); 2542 RT_SCOPE_HOST);
2554 } 2543 }
2555 } 2544 }
2556 2545
2557 if (!fl4.daddr) { 2546 if (!fl4->daddr) {
2558 fl4.daddr = fl4.saddr; 2547 fl4->daddr = fl4->saddr;
2559 if (!fl4.daddr) 2548 if (!fl4->daddr)
2560 fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); 2549 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2561 dev_out = net->loopback_dev; 2550 dev_out = net->loopback_dev;
2562 fl4.flowi4_oif = net->loopback_dev->ifindex; 2551 fl4->flowi4_oif = net->loopback_dev->ifindex;
2563 res.type = RTN_LOCAL; 2552 res.type = RTN_LOCAL;
2564 flags |= RTCF_LOCAL; 2553 flags |= RTCF_LOCAL;
2565 goto make_route; 2554 goto make_route;
2566 } 2555 }
2567 2556
2568 if (fib_lookup(net, &fl4, &res)) { 2557 if (fib_lookup(net, fl4, &res)) {
2569 res.fi = NULL; 2558 res.fi = NULL;
2570 if (oldflp4->flowi4_oif) { 2559 if (fl4->flowi4_oif) {
2571 /* Apparently, routing tables are wrong. Assume, 2560 /* Apparently, routing tables are wrong. Assume,
2572 that the destination is on link. 2561 that the destination is on link.
2573 2562
@@ -2586,9 +2575,9 @@ static struct rtable *ip_route_output_slow(struct net *net,
2586 likely IPv6, but we do not. 2575 likely IPv6, but we do not.
2587 */ 2576 */
2588 2577
2589 if (fl4.saddr == 0) 2578 if (fl4->saddr == 0)
2590 fl4.saddr = inet_select_addr(dev_out, 0, 2579 fl4->saddr = inet_select_addr(dev_out, 0,
2591 RT_SCOPE_LINK); 2580 RT_SCOPE_LINK);
2592 res.type = RTN_UNICAST; 2581 res.type = RTN_UNICAST;
2593 goto make_route; 2582 goto make_route;
2594 } 2583 }
@@ -2597,42 +2586,45 @@ static struct rtable *ip_route_output_slow(struct net *net,
2597 } 2586 }
2598 2587
2599 if (res.type == RTN_LOCAL) { 2588 if (res.type == RTN_LOCAL) {
2600 if (!fl4.saddr) { 2589 if (!fl4->saddr) {
2601 if (res.fi->fib_prefsrc) 2590 if (res.fi->fib_prefsrc)
2602 fl4.saddr = res.fi->fib_prefsrc; 2591 fl4->saddr = res.fi->fib_prefsrc;
2603 else 2592 else
2604 fl4.saddr = fl4.daddr; 2593 fl4->saddr = fl4->daddr;
2605 } 2594 }
2606 dev_out = net->loopback_dev; 2595 dev_out = net->loopback_dev;
2607 fl4.flowi4_oif = dev_out->ifindex; 2596 fl4->flowi4_oif = dev_out->ifindex;
2608 res.fi = NULL; 2597 res.fi = NULL;
2609 flags |= RTCF_LOCAL; 2598 flags |= RTCF_LOCAL;
2610 goto make_route; 2599 goto make_route;
2611 } 2600 }
2612 2601
2613#ifdef CONFIG_IP_ROUTE_MULTIPATH 2602#ifdef CONFIG_IP_ROUTE_MULTIPATH
2614 if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) 2603 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2615 fib_select_multipath(&res); 2604 fib_select_multipath(&res);
2616 else 2605 else
2617#endif 2606#endif
2618 if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) 2607 if (!res.prefixlen &&
2608 res.table->tb_num_default > 1 &&
2609 res.type == RTN_UNICAST && !fl4->flowi4_oif)
2619 fib_select_default(&res); 2610 fib_select_default(&res);
2620 2611
2621 if (!fl4.saddr) 2612 if (!fl4->saddr)
2622 fl4.saddr = FIB_RES_PREFSRC(net, res); 2613 fl4->saddr = FIB_RES_PREFSRC(net, res);
2623 2614
2624 dev_out = FIB_RES_DEV(res); 2615 dev_out = FIB_RES_DEV(res);
2625 fl4.flowi4_oif = dev_out->ifindex; 2616 fl4->flowi4_oif = dev_out->ifindex;
2626 2617
2627 2618
2628make_route: 2619make_route:
2629 rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); 2620 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2621 dev_out, flags);
2630 if (!IS_ERR(rth)) { 2622 if (!IS_ERR(rth)) {
2631 unsigned int hash; 2623 unsigned int hash;
2632 2624
2633 hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, 2625 hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
2634 rt_genid(dev_net(dev_out))); 2626 rt_genid(dev_net(dev_out)));
2635 rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); 2627 rth = rt_intern_hash(hash, rth, NULL, orig_oif);
2636 } 2628 }
2637 2629
2638out: 2630out:
@@ -2640,7 +2632,7 @@ out:
2640 return rth; 2632 return rth;
2641} 2633}
2642 2634
2643struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) 2635struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2644{ 2636{
2645 struct rtable *rth; 2637 struct rtable *rth;
2646 unsigned int hash; 2638 unsigned int hash;
@@ -2658,13 +2650,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
2658 rt_is_output_route(rth) && 2650 rt_is_output_route(rth) &&
2659 rth->rt_oif == flp4->flowi4_oif && 2651 rth->rt_oif == flp4->flowi4_oif &&
2660 rth->rt_mark == flp4->flowi4_mark && 2652 rth->rt_mark == flp4->flowi4_mark &&
2661 !((rth->rt_tos ^ flp4->flowi4_tos) & 2653 !((rth->rt_key_tos ^ flp4->flowi4_tos) &
2662 (IPTOS_RT_MASK | RTO_ONLINK)) && 2654 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2663 net_eq(dev_net(rth->dst.dev), net) && 2655 net_eq(dev_net(rth->dst.dev), net) &&
2664 !rt_is_expired(rth)) { 2656 !rt_is_expired(rth)) {
2665 dst_use(&rth->dst, jiffies); 2657 dst_use(&rth->dst, jiffies);
2666 RT_CACHE_STAT_INC(out_hit); 2658 RT_CACHE_STAT_INC(out_hit);
2667 rcu_read_unlock_bh(); 2659 rcu_read_unlock_bh();
2660 if (!flp4->saddr)
2661 flp4->saddr = rth->rt_src;
2662 if (!flp4->daddr)
2663 flp4->daddr = rth->rt_dst;
2668 return rth; 2664 return rth;
2669 } 2665 }
2670 RT_CACHE_STAT_INC(out_hlist_search); 2666 RT_CACHE_STAT_INC(out_hlist_search);
@@ -2709,7 +2705,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2709 2705
2710struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2706struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2711{ 2707{
2712 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); 2708 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
2713 struct rtable *ort = (struct rtable *) dst_orig; 2709 struct rtable *ort = (struct rtable *) dst_orig;
2714 2710
2715 if (rt) { 2711 if (rt) {
@@ -2726,7 +2722,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2726 2722
2727 rt->rt_key_dst = ort->rt_key_dst; 2723 rt->rt_key_dst = ort->rt_key_dst;
2728 rt->rt_key_src = ort->rt_key_src; 2724 rt->rt_key_src = ort->rt_key_src;
2729 rt->rt_tos = ort->rt_tos; 2725 rt->rt_key_tos = ort->rt_key_tos;
2730 rt->rt_route_iif = ort->rt_route_iif; 2726 rt->rt_route_iif = ort->rt_route_iif;
2731 rt->rt_iif = ort->rt_iif; 2727 rt->rt_iif = ort->rt_iif;
2732 rt->rt_oif = ort->rt_oif; 2728 rt->rt_oif = ort->rt_oif;
@@ -2762,15 +2758,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2762 if (IS_ERR(rt)) 2758 if (IS_ERR(rt))
2763 return rt; 2759 return rt;
2764 2760
2765 if (flp4->flowi4_proto) { 2761 if (flp4->flowi4_proto)
2766 if (!flp4->saddr)
2767 flp4->saddr = rt->rt_src;
2768 if (!flp4->daddr)
2769 flp4->daddr = rt->rt_dst;
2770 rt = (struct rtable *) xfrm_lookup(net, &rt->dst, 2762 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2771 flowi4_to_flowi(flp4), 2763 flowi4_to_flowi(flp4),
2772 sk, 0); 2764 sk, 0);
2773 }
2774 2765
2775 return rt; 2766 return rt;
2776} 2767}
@@ -2794,7 +2785,7 @@ static int rt_fill_info(struct net *net,
2794 r->rtm_family = AF_INET; 2785 r->rtm_family = AF_INET;
2795 r->rtm_dst_len = 32; 2786 r->rtm_dst_len = 32;
2796 r->rtm_src_len = 0; 2787 r->rtm_src_len = 0;
2797 r->rtm_tos = rt->rt_tos; 2788 r->rtm_tos = rt->rt_key_tos;
2798 r->rtm_table = RT_TABLE_MAIN; 2789 r->rtm_table = RT_TABLE_MAIN;
2799 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2790 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2800 r->rtm_type = rt->rt_type; 2791 r->rtm_type = rt->rt_type;
@@ -2848,7 +2839,9 @@ static int rt_fill_info(struct net *net,
2848 2839
2849 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && 2840 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2850 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { 2841 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2851 int err = ipmr_get_route(net, skb, r, nowait); 2842 int err = ipmr_get_route(net, skb,
2843 rt->rt_src, rt->rt_dst,
2844 r, nowait);
2852 if (err <= 0) { 2845 if (err <= 0) {
2853 if (!nowait) { 2846 if (!nowait) {
2854 if (err == 0) 2847 if (err == 0)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8b44c6d2a79b..26461492a847 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 321 * the ACK carries the same options again (see RFC1122 4.2.3.8)
322 */ 322 */
323 if (opt && opt->optlen) { 323 if (opt && opt->optlen) {
324 int opt_size = sizeof(struct ip_options) + opt->optlen; 324 int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
325 325
326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC); 326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
327 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { 327 if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
328 kfree(ireq->opt); 328 kfree(ireq->opt);
329 ireq->opt = NULL; 329 ireq->opt = NULL;
330 } 330 }
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
345 * no easy way to do this. 345 * no easy way to do this.
346 */ 346 */
347 { 347 {
348 struct flowi4 fl4 = { 348 struct flowi4 fl4;
349 .flowi4_mark = sk->sk_mark, 349
350 .daddr = ((opt && opt->srr) ? 350 flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
351 opt->faddr : ireq->rmt_addr), 351 RT_SCOPE_UNIVERSE, IPPROTO_TCP,
352 .saddr = ireq->loc_addr, 352 inet_sk_flowi_flags(sk),
353 .flowi4_tos = RT_CONN_FLAGS(sk), 353 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
354 .flowi4_proto = IPPROTO_TCP, 354 ireq->loc_addr, th->source, th->dest);
355 .flowi4_flags = inet_sk_flowi_flags(sk),
356 .fl4_sport = th->dest,
357 .fl4_dport = th->source,
358 };
359 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 355 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
360 rt = ip_route_output_key(sock_net(sk), &fl4); 356 rt = ip_route_output_key(sock_net(sk), &fl4);
361 if (IS_ERR(rt)) { 357 if (IS_ERR(rt)) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 321e6e84dbcc..57d0752e239a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,6 +13,7 @@
13#include <linux/seqlock.h> 13#include <linux/seqlock.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/nsproxy.h>
16#include <net/snmp.h> 17#include <net/snmp.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/ip.h> 19#include <net/ip.h>
@@ -21,6 +22,7 @@
21#include <net/udp.h> 22#include <net/udp.h>
22#include <net/cipso_ipv4.h> 23#include <net/cipso_ipv4.h>
23#include <net/inet_frag.h> 24#include <net/inet_frag.h>
25#include <net/ping.h>
24 26
25static int zero; 27static int zero;
26static int tcp_retr1_max = 255; 28static int tcp_retr1_max = 255;
@@ -30,6 +32,8 @@ static int tcp_adv_win_scale_min = -31;
30static int tcp_adv_win_scale_max = 31; 32static int tcp_adv_win_scale_max = 31;
31static int ip_ttl_min = 1; 33static int ip_ttl_min = 1;
32static int ip_ttl_max = 255; 34static int ip_ttl_max = 255;
35static int ip_ping_group_range_min[] = { 0, 0 };
36static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
33 37
34/* Update system visible IP port range */ 38/* Update system visible IP port range */
35static void set_local_port_range(int range[2]) 39static void set_local_port_range(int range[2])
@@ -68,6 +72,53 @@ static int ipv4_local_port_range(ctl_table *table, int write,
68 return ret; 72 return ret;
69} 73}
70 74
75
76void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
77{
78 gid_t *data = table->data;
79 unsigned seq;
80 do {
81 seq = read_seqbegin(&sysctl_local_ports.lock);
82
83 *low = data[0];
84 *high = data[1];
85 } while (read_seqretry(&sysctl_local_ports.lock, seq));
86}
87
88/* Update system visible IP port range */
89static void set_ping_group_range(struct ctl_table *table, int range[2])
90{
91 gid_t *data = table->data;
92 write_seqlock(&sysctl_local_ports.lock);
93 data[0] = range[0];
94 data[1] = range[1];
95 write_sequnlock(&sysctl_local_ports.lock);
96}
97
98/* Validate changes from /proc interface. */
99static int ipv4_ping_group_range(ctl_table *table, int write,
100 void __user *buffer,
101 size_t *lenp, loff_t *ppos)
102{
103 int ret;
104 gid_t range[2];
105 ctl_table tmp = {
106 .data = &range,
107 .maxlen = sizeof(range),
108 .mode = table->mode,
109 .extra1 = &ip_ping_group_range_min,
110 .extra2 = &ip_ping_group_range_max,
111 };
112
113 inet_get_ping_group_range_table(table, range, range + 1);
114 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
115
116 if (write && ret == 0)
117 set_ping_group_range(table, range);
118
119 return ret;
120}
121
71static int proc_tcp_congestion_control(ctl_table *ctl, int write, 122static int proc_tcp_congestion_control(ctl_table *ctl, int write,
72 void __user *buffer, size_t *lenp, loff_t *ppos) 123 void __user *buffer, size_t *lenp, loff_t *ppos)
73{ 124{
@@ -677,6 +728,13 @@ static struct ctl_table ipv4_net_table[] = {
677 .mode = 0644, 728 .mode = 0644,
678 .proc_handler = proc_dointvec 729 .proc_handler = proc_dointvec
679 }, 730 },
731 {
732 .procname = "ping_group_range",
733 .data = &init_net.ipv4.sysctl_ping_group_range,
734 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
735 .mode = 0644,
736 .proc_handler = ipv4_ping_group_range,
737 },
680 { } 738 { }
681}; 739};
682 740
@@ -711,8 +769,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
711 &net->ipv4.sysctl_icmp_ratemask; 769 &net->ipv4.sysctl_icmp_ratemask;
712 table[6].data = 770 table[6].data =
713 &net->ipv4.sysctl_rt_cache_rebuild_count; 771 &net->ipv4.sysctl_rt_cache_rebuild_count;
772 table[7].data =
773 &net->ipv4.sysctl_ping_group_range;
774
714 } 775 }
715 776
777 /*
778 * Sane defaults - nobody may create ping sockets.
779 * Boot scripts should set this to distro-specific group.
780 */
781 net->ipv4.sysctl_ping_group_range[0] = 1;
782 net->ipv4.sysctl_ping_group_range[1] = 0;
783
716 net->ipv4.sysctl_rt_cache_rebuild_count = 4; 784 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
717 785
718 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 786 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
999 /* We have some space in skb head. Superb! */ 999 /* We have some space in skb head. Superb! */
1000 if (copy > skb_tailroom(skb)) 1000 if (copy > skb_tailroom(skb))
1001 copy = skb_tailroom(skb); 1001 copy = skb_tailroom(skb);
1002 if ((err = skb_add_data(skb, from, copy)) != 0) 1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err)
1003 goto do_fault; 1004 goto do_fault;
1004 } else { 1005 } else {
1005 int merge = 0; 1006 int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
1042 1043
1043 /* Time to copy data. We are close to 1044 /* Time to copy data. We are close to
1044 * the end! */ 1045 * the end! */
1045 err = skb_copy_to_page(sk, from, skb, page, 1046 err = skb_copy_to_page_nocache(sk, from, skb,
1046 off, copy); 1047 page, off, copy);
1047 if (err) { 1048 if (err) {
1048 /* If this page was new, give it to the 1049 /* If this page was new, give it to the
1049 * socket so it does not get leaked. 1050 * socket so it does not get leaked.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f7e6c2c2d2bb..3c8d9b6f1ea4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{ 148{
149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk); 151 struct tcp_sock *tp = tcp_sk(sk);
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152 __be16 orig_sport, orig_dport; 152 __be16 orig_sport, orig_dport;
153 struct rtable *rt;
154 __be32 daddr, nexthop; 153 __be32 daddr, nexthop;
154 struct flowi4 *fl4;
155 struct rtable *rt;
155 int err; 156 int err;
157 struct ip_options_rcu *inet_opt;
156 158
157 if (addr_len < sizeof(struct sockaddr_in)) 159 if (addr_len < sizeof(struct sockaddr_in))
158 return -EINVAL; 160 return -EINVAL;
@@ -161,15 +163,18 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
161 return -EAFNOSUPPORT; 163 return -EAFNOSUPPORT;
162 164
163 nexthop = daddr = usin->sin_addr.s_addr; 165 nexthop = daddr = usin->sin_addr.s_addr;
164 if (inet->opt && inet->opt->srr) { 166 inet_opt = rcu_dereference_protected(inet->inet_opt,
167 sock_owned_by_user(sk));
168 if (inet_opt && inet_opt->opt.srr) {
165 if (!daddr) 169 if (!daddr)
166 return -EINVAL; 170 return -EINVAL;
167 nexthop = inet->opt->faddr; 171 nexthop = inet_opt->opt.faddr;
168 } 172 }
169 173
170 orig_sport = inet->inet_sport; 174 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port; 175 orig_dport = usin->sin_port;
172 rt = ip_route_connect(nexthop, inet->inet_saddr, 176 fl4 = &inet->cork.fl.u.ip4;
177 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 178 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP, 179 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true); 180 orig_sport, orig_dport, sk, true);
@@ -185,11 +190,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
185 return -ENETUNREACH; 190 return -ENETUNREACH;
186 } 191 }
187 192
188 if (!inet->opt || !inet->opt->srr) 193 if (!inet_opt || !inet_opt->opt.srr)
189 daddr = rt->rt_dst; 194 daddr = fl4->daddr;
190 195
191 if (!inet->inet_saddr) 196 if (!inet->inet_saddr)
192 inet->inet_saddr = rt->rt_src; 197 inet->inet_saddr = fl4->saddr;
193 inet->inet_rcv_saddr = inet->inet_saddr; 198 inet->inet_rcv_saddr = inet->inet_saddr;
194 199
195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 200 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -200,8 +205,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
200 } 205 }
201 206
202 if (tcp_death_row.sysctl_tw_recycle && 207 if (tcp_death_row.sysctl_tw_recycle &&
203 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 208 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
204 struct inet_peer *peer = rt_get_peer(rt); 209 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
205 /* 210 /*
206 * VJ's idea. We save last timestamp seen from 211 * VJ's idea. We save last timestamp seen from
207 * the destination in peer table, when entering state 212 * the destination in peer table, when entering state
@@ -221,8 +226,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
221 inet->inet_daddr = daddr; 226 inet->inet_daddr = daddr;
222 227
223 inet_csk(sk)->icsk_ext_hdr_len = 0; 228 inet_csk(sk)->icsk_ext_hdr_len = 0;
224 if (inet->opt) 229 if (inet_opt)
225 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 230 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
226 231
227 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 232 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
228 233
@@ -236,8 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
236 if (err) 241 if (err)
237 goto failure; 242 goto failure;
238 243
239 rt = ip_route_newports(rt, IPPROTO_TCP, 244 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
240 orig_sport, orig_dport,
241 inet->inet_sport, inet->inet_dport, sk); 245 inet->inet_sport, inet->inet_dport, sk);
242 if (IS_ERR(rt)) { 246 if (IS_ERR(rt)) {
243 err = PTR_ERR(rt); 247 err = PTR_ERR(rt);
@@ -279,7 +283,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
279/* 283/*
280 * This routine does path mtu discovery as defined in RFC1191. 284 * This routine does path mtu discovery as defined in RFC1191.
281 */ 285 */
282static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 286static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
283{ 287{
284 struct dst_entry *dst; 288 struct dst_entry *dst;
285 struct inet_sock *inet = inet_sk(sk); 289 struct inet_sock *inet = inet_sk(sk);
@@ -341,7 +345,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
341 345
342void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 346void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
343{ 347{
344 struct iphdr *iph = (struct iphdr *)icmp_skb->data; 348 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
345 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 349 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
346 struct inet_connection_sock *icsk; 350 struct inet_connection_sock *icsk;
347 struct tcp_sock *tp; 351 struct tcp_sock *tp;
@@ -647,7 +651,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
647 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 651 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
648 652
649 net = dev_net(skb_dst(skb)->dev); 653 net = dev_net(skb_dst(skb)->dev);
650 ip_send_reply(net->ipv4.tcp_sock, skb, 654 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
651 &arg, arg.iov[0].iov_len); 655 &arg, arg.iov[0].iov_len);
652 656
653 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 657 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -722,7 +726,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
722 if (oif) 726 if (oif)
723 arg.bound_dev_if = oif; 727 arg.bound_dev_if = oif;
724 728
725 ip_send_reply(net->ipv4.tcp_sock, skb, 729 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
726 &arg, arg.iov[0].iov_len); 730 &arg, arg.iov[0].iov_len);
727 731
728 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 732 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -765,11 +769,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
765 struct request_values *rvp) 769 struct request_values *rvp)
766{ 770{
767 const struct inet_request_sock *ireq = inet_rsk(req); 771 const struct inet_request_sock *ireq = inet_rsk(req);
772 struct flowi4 fl4;
768 int err = -1; 773 int err = -1;
769 struct sk_buff * skb; 774 struct sk_buff * skb;
770 775
771 /* First, grab a route. */ 776 /* First, grab a route. */
772 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 777 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
773 return -1; 778 return -1;
774 779
775 skb = tcp_make_synack(sk, dst, req, rvp); 780 skb = tcp_make_synack(sk, dst, req, rvp);
@@ -820,17 +825,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
820/* 825/*
821 * Save and compile IPv4 options into the request_sock if needed. 826 * Save and compile IPv4 options into the request_sock if needed.
822 */ 827 */
823static struct ip_options *tcp_v4_save_options(struct sock *sk, 828static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
824 struct sk_buff *skb) 829 struct sk_buff *skb)
825{ 830{
826 struct ip_options *opt = &(IPCB(skb)->opt); 831 const struct ip_options *opt = &(IPCB(skb)->opt);
827 struct ip_options *dopt = NULL; 832 struct ip_options_rcu *dopt = NULL;
828 833
829 if (opt && opt->optlen) { 834 if (opt && opt->optlen) {
830 int opt_size = optlength(opt); 835 int opt_size = sizeof(*dopt) + opt->optlen;
836
831 dopt = kmalloc(opt_size, GFP_ATOMIC); 837 dopt = kmalloc(opt_size, GFP_ATOMIC);
832 if (dopt) { 838 if (dopt) {
833 if (ip_options_echo(dopt, skb)) { 839 if (ip_options_echo(&dopt->opt, skb)) {
834 kfree(dopt); 840 kfree(dopt);
835 dopt = NULL; 841 dopt = NULL;
836 } 842 }
@@ -1333,6 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1333 req->cookie_ts = tmp_opt.tstamp_ok; 1339 req->cookie_ts = tmp_opt.tstamp_ok;
1334 } else if (!isn) { 1340 } else if (!isn) {
1335 struct inet_peer *peer = NULL; 1341 struct inet_peer *peer = NULL;
1342 struct flowi4 fl4;
1336 1343
1337 /* VJ's idea. We save last timestamp seen 1344 /* VJ's idea. We save last timestamp seen
1338 * from the destination in peer table, when entering 1345 * from the destination in peer table, when entering
@@ -1345,9 +1352,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1345 */ 1352 */
1346 if (tmp_opt.saw_tstamp && 1353 if (tmp_opt.saw_tstamp &&
1347 tcp_death_row.sysctl_tw_recycle && 1354 tcp_death_row.sysctl_tw_recycle &&
1348 (dst = inet_csk_route_req(sk, req)) != NULL && 1355 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1349 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1356 fl4.daddr == saddr &&
1350 peer->daddr.addr.a4 == saddr) { 1357 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1351 inet_peer_refcheck(peer); 1358 inet_peer_refcheck(peer);
1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1359 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1353 (s32)(peer->tcp_ts - req->ts_recent) > 1360 (s32)(peer->tcp_ts - req->ts_recent) >
@@ -1411,19 +1418,16 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1411#ifdef CONFIG_TCP_MD5SIG 1418#ifdef CONFIG_TCP_MD5SIG
1412 struct tcp_md5sig_key *key; 1419 struct tcp_md5sig_key *key;
1413#endif 1420#endif
1421 struct ip_options_rcu *inet_opt;
1414 1422
1415 if (sk_acceptq_is_full(sk)) 1423 if (sk_acceptq_is_full(sk))
1416 goto exit_overflow; 1424 goto exit_overflow;
1417 1425
1418 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1419 goto exit;
1420
1421 newsk = tcp_create_openreq_child(sk, req, skb); 1426 newsk = tcp_create_openreq_child(sk, req, skb);
1422 if (!newsk) 1427 if (!newsk)
1423 goto exit_nonewsk; 1428 goto exit_nonewsk;
1424 1429
1425 newsk->sk_gso_type = SKB_GSO_TCPV4; 1430 newsk->sk_gso_type = SKB_GSO_TCPV4;
1426 sk_setup_caps(newsk, dst);
1427 1431
1428 newtp = tcp_sk(newsk); 1432 newtp = tcp_sk(newsk);
1429 newinet = inet_sk(newsk); 1433 newinet = inet_sk(newsk);
@@ -1431,15 +1435,21 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1431 newinet->inet_daddr = ireq->rmt_addr; 1435 newinet->inet_daddr = ireq->rmt_addr;
1432 newinet->inet_rcv_saddr = ireq->loc_addr; 1436 newinet->inet_rcv_saddr = ireq->loc_addr;
1433 newinet->inet_saddr = ireq->loc_addr; 1437 newinet->inet_saddr = ireq->loc_addr;
1434 newinet->opt = ireq->opt; 1438 inet_opt = ireq->opt;
1439 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1435 ireq->opt = NULL; 1440 ireq->opt = NULL;
1436 newinet->mc_index = inet_iif(skb); 1441 newinet->mc_index = inet_iif(skb);
1437 newinet->mc_ttl = ip_hdr(skb)->ttl; 1442 newinet->mc_ttl = ip_hdr(skb)->ttl;
1438 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1443 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1439 if (newinet->opt) 1444 if (inet_opt)
1440 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1445 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1441 newinet->inet_id = newtp->write_seq ^ jiffies; 1446 newinet->inet_id = newtp->write_seq ^ jiffies;
1442 1447
1448 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1449 goto put_and_exit;
1450
1451 sk_setup_caps(newsk, dst);
1452
1443 tcp_mtup_init(newsk); 1453 tcp_mtup_init(newsk);
1444 tcp_sync_mss(newsk, dst_mtu(dst)); 1454 tcp_sync_mss(newsk, dst_mtu(dst));
1445 newtp->advmss = dst_metric_advmss(dst); 1455 newtp->advmss = dst_metric_advmss(dst);
@@ -1467,10 +1477,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1467 } 1477 }
1468#endif 1478#endif
1469 1479
1470 if (__inet_inherit_port(sk, newsk) < 0) { 1480 if (__inet_inherit_port(sk, newsk) < 0)
1471 sock_put(newsk); 1481 goto put_and_exit;
1472 goto exit;
1473 }
1474 __inet_hash_nolisten(newsk, NULL); 1482 __inet_hash_nolisten(newsk, NULL);
1475 1483
1476 return newsk; 1484 return newsk;
@@ -1482,6 +1490,9 @@ exit_nonewsk:
1482exit: 1490exit:
1483 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1484 return NULL; 1492 return NULL;
1493put_and_exit:
1494 sock_put(newsk);
1495 goto exit;
1485} 1496}
1486EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1497EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1487 1498
@@ -1764,12 +1775,13 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1764 struct inet_sock *inet = inet_sk(sk); 1775 struct inet_sock *inet = inet_sk(sk);
1765 struct inet_peer *peer; 1776 struct inet_peer *peer;
1766 1777
1767 if (!rt || rt->rt_dst != inet->inet_daddr) { 1778 if (!rt ||
1779 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1768 peer = inet_getpeer_v4(inet->inet_daddr, 1); 1780 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1769 *release_it = true; 1781 *release_it = true;
1770 } else { 1782 } else {
1771 if (!rt->peer) 1783 if (!rt->peer)
1772 rt_bind_peer(rt, 1); 1784 rt_bind_peer(rt, inet->inet_daddr, 1);
1773 peer = rt->peer; 1785 peer = rt->peer;
1774 *release_it = false; 1786 *release_it = false;
1775 } 1787 }
@@ -2527,7 +2539,7 @@ void tcp4_proc_exit(void)
2527 2539
2528struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2540struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2529{ 2541{
2530 struct iphdr *iph = skb_gro_network_header(skb); 2542 const struct iphdr *iph = skb_gro_network_header(skb);
2531 2543
2532 switch (skb->ip_summed) { 2544 switch (skb->ip_summed) {
2533 case CHECKSUM_COMPLETE: 2545 case CHECKSUM_COMPLETE:
@@ -2548,7 +2560,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2548 2560
2549int tcp4_gro_complete(struct sk_buff *skb) 2561int tcp4_gro_complete(struct sk_buff *skb)
2550{ 2562{
2551 struct iphdr *iph = ip_hdr(skb); 2563 const struct iphdr *iph = ip_hdr(skb);
2552 struct tcphdr *th = tcp_hdr(skb); 2564 struct tcphdr *th = tcp_hdr(skb);
2553 2565
2554 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2566 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 17388c7f49c4..882e0b0964d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -899,7 +899,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
900 tcp_skb_pcount(skb)); 900 tcp_skb_pcount(skb));
901 901
902 err = icsk->icsk_af_ops->queue_xmit(skb); 902 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
903 if (likely(err <= 0)) 903 if (likely(err <= 0))
904 return err; 904 return err;
905 905
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f87a8eb76f3b..599374f65c76 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -578,7 +578,7 @@ found:
578void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 578void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
579{ 579{
580 struct inet_sock *inet; 580 struct inet_sock *inet;
581 struct iphdr *iph = (struct iphdr *)skb->data; 581 const struct iphdr *iph = (const struct iphdr *)skb->data;
582 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 582 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
583 const int type = icmp_hdr(skb)->type; 583 const int type = icmp_hdr(skb)->type;
584 const int code = icmp_hdr(skb)->code; 584 const int code = icmp_hdr(skb)->code;
@@ -706,12 +706,11 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
706 } 706 }
707} 707}
708 708
709static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) 709static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
710{ 710{
711 struct sock *sk = skb->sk; 711 struct sock *sk = skb->sk;
712 struct inet_sock *inet = inet_sk(sk); 712 struct inet_sock *inet = inet_sk(sk);
713 struct udphdr *uh; 713 struct udphdr *uh;
714 struct rtable *rt = (struct rtable *)skb_dst(skb);
715 int err = 0; 714 int err = 0;
716 int is_udplite = IS_UDPLITE(sk); 715 int is_udplite = IS_UDPLITE(sk);
717 int offset = skb_transport_offset(skb); 716 int offset = skb_transport_offset(skb);
@@ -723,7 +722,7 @@ static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
723 */ 722 */
724 uh = udp_hdr(skb); 723 uh = udp_hdr(skb);
725 uh->source = inet->inet_sport; 724 uh->source = inet->inet_sport;
726 uh->dest = dport; 725 uh->dest = fl4->fl4_dport;
727 uh->len = htons(len); 726 uh->len = htons(len);
728 uh->check = 0; 727 uh->check = 0;
729 728
@@ -737,14 +736,14 @@ static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
737 736
738 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 737 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
739 738
740 udp4_hwcsum(skb, rt->rt_src, daddr); 739 udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
741 goto send; 740 goto send;
742 741
743 } else 742 } else
744 csum = udp_csum(skb); 743 csum = udp_csum(skb);
745 744
746 /* add protocol-dependent pseudo-header */ 745 /* add protocol-dependent pseudo-header */
747 uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, 746 uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
748 sk->sk_protocol, csum); 747 sk->sk_protocol, csum);
749 if (uh->check == 0) 748 if (uh->check == 0)
750 uh->check = CSUM_MANGLED_0; 749 uh->check = CSUM_MANGLED_0;
@@ -774,11 +773,11 @@ static int udp_push_pending_frames(struct sock *sk)
774 struct sk_buff *skb; 773 struct sk_buff *skb;
775 int err = 0; 774 int err = 0;
776 775
777 skb = ip_finish_skb(sk); 776 skb = ip_finish_skb(sk, fl4);
778 if (!skb) 777 if (!skb)
779 goto out; 778 goto out;
780 779
781 err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); 780 err = udp_send_skb(skb, fl4);
782 781
783out: 782out:
784 up->len = 0; 783 up->len = 0;
@@ -791,6 +790,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
791{ 790{
792 struct inet_sock *inet = inet_sk(sk); 791 struct inet_sock *inet = inet_sk(sk);
793 struct udp_sock *up = udp_sk(sk); 792 struct udp_sock *up = udp_sk(sk);
793 struct flowi4 fl4_stack;
794 struct flowi4 *fl4; 794 struct flowi4 *fl4;
795 int ulen = len; 795 int ulen = len;
796 struct ipcm_cookie ipc; 796 struct ipcm_cookie ipc;
@@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
806 struct sk_buff *skb; 806 struct sk_buff *skb;
807 struct ip_options_data opt_copy;
807 808
808 if (len > 0xFFFF) 809 if (len > 0xFFFF)
809 return -EMSGSIZE; 810 return -EMSGSIZE;
@@ -820,6 +821,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
820 821
821 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 822 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
822 823
824 fl4 = &inet->cork.fl.u.ip4;
823 if (up->pending) { 825 if (up->pending) {
824 /* 826 /*
825 * There are pending frames. 827 * There are pending frames.
@@ -877,22 +879,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
877 free = 1; 879 free = 1;
878 connected = 0; 880 connected = 0;
879 } 881 }
880 if (!ipc.opt) 882 if (!ipc.opt) {
881 ipc.opt = inet->opt; 883 struct ip_options_rcu *inet_opt;
884
885 rcu_read_lock();
886 inet_opt = rcu_dereference(inet->inet_opt);
887 if (inet_opt) {
888 memcpy(&opt_copy, inet_opt,
889 sizeof(*inet_opt) + inet_opt->opt.optlen);
890 ipc.opt = &opt_copy.opt;
891 }
892 rcu_read_unlock();
893 }
882 894
883 saddr = ipc.addr; 895 saddr = ipc.addr;
884 ipc.addr = faddr = daddr; 896 ipc.addr = faddr = daddr;
885 897
886 if (ipc.opt && ipc.opt->srr) { 898 if (ipc.opt && ipc.opt->opt.srr) {
887 if (!daddr) 899 if (!daddr)
888 return -EINVAL; 900 return -EINVAL;
889 faddr = ipc.opt->faddr; 901 faddr = ipc.opt->opt.faddr;
890 connected = 0; 902 connected = 0;
891 } 903 }
892 tos = RT_TOS(inet->tos); 904 tos = RT_TOS(inet->tos);
893 if (sock_flag(sk, SOCK_LOCALROUTE) || 905 if (sock_flag(sk, SOCK_LOCALROUTE) ||
894 (msg->msg_flags & MSG_DONTROUTE) || 906 (msg->msg_flags & MSG_DONTROUTE) ||
895 (ipc.opt && ipc.opt->is_strictroute)) { 907 (ipc.opt && ipc.opt->opt.is_strictroute)) {
896 tos |= RTO_ONLINK; 908 tos |= RTO_ONLINK;
897 connected = 0; 909 connected = 0;
898 } 910 }
@@ -909,22 +921,16 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
909 rt = (struct rtable *)sk_dst_check(sk, 0); 921 rt = (struct rtable *)sk_dst_check(sk, 0);
910 922
911 if (rt == NULL) { 923 if (rt == NULL) {
912 struct flowi4 fl4 = {
913 .flowi4_oif = ipc.oif,
914 .flowi4_mark = sk->sk_mark,
915 .daddr = faddr,
916 .saddr = saddr,
917 .flowi4_tos = tos,
918 .flowi4_proto = sk->sk_protocol,
919 .flowi4_flags = (inet_sk_flowi_flags(sk) |
920 FLOWI_FLAG_CAN_SLEEP),
921 .fl4_sport = inet->inet_sport,
922 .fl4_dport = dport,
923 };
924 struct net *net = sock_net(sk); 924 struct net *net = sock_net(sk);
925 925
926 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 926 fl4 = &fl4_stack;
927 rt = ip_route_output_flow(net, &fl4, sk); 927 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
928 RT_SCOPE_UNIVERSE, sk->sk_protocol,
929 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
930 faddr, saddr, dport, inet->inet_sport);
931
932 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
933 rt = ip_route_output_flow(net, fl4, sk);
928 if (IS_ERR(rt)) { 934 if (IS_ERR(rt)) {
929 err = PTR_ERR(rt); 935 err = PTR_ERR(rt);
930 rt = NULL; 936 rt = NULL;
@@ -945,18 +951,18 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
945 goto do_confirm; 951 goto do_confirm;
946back_from_confirm: 952back_from_confirm:
947 953
948 saddr = rt->rt_src; 954 saddr = fl4->saddr;
949 if (!ipc.addr) 955 if (!ipc.addr)
950 daddr = ipc.addr = rt->rt_dst; 956 daddr = ipc.addr = fl4->daddr;
951 957
952 /* Lockless fast path for the non-corking case. */ 958 /* Lockless fast path for the non-corking case. */
953 if (!corkreq) { 959 if (!corkreq) {
954 skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, 960 skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
955 sizeof(struct udphdr), &ipc, &rt, 961 sizeof(struct udphdr), &ipc, &rt,
956 msg->msg_flags); 962 msg->msg_flags);
957 err = PTR_ERR(skb); 963 err = PTR_ERR(skb);
958 if (skb && !IS_ERR(skb)) 964 if (skb && !IS_ERR(skb))
959 err = udp_send_skb(skb, daddr, dport); 965 err = udp_send_skb(skb, fl4);
960 goto out; 966 goto out;
961 } 967 }
962 968
@@ -982,9 +988,9 @@ back_from_confirm:
982 988
983do_append_data: 989do_append_data:
984 up->len += ulen; 990 up->len += ulen;
985 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, 991 err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
986 sizeof(struct udphdr), &ipc, &rt, 992 sizeof(struct udphdr), &ipc, &rt,
987 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 993 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
988 if (err) 994 if (err)
989 udp_flush_pending_frames(sk); 995 udp_flush_pending_frames(sk);
990 else if (!corkreq) 996 else if (!corkreq)
@@ -1024,6 +1030,7 @@ EXPORT_SYMBOL(udp_sendmsg);
1024int udp_sendpage(struct sock *sk, struct page *page, int offset, 1030int udp_sendpage(struct sock *sk, struct page *page, int offset,
1025 size_t size, int flags) 1031 size_t size, int flags)
1026{ 1032{
1033 struct inet_sock *inet = inet_sk(sk);
1027 struct udp_sock *up = udp_sk(sk); 1034 struct udp_sock *up = udp_sk(sk);
1028 int ret; 1035 int ret;
1029 1036
@@ -1048,7 +1055,8 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
1048 return -EINVAL; 1055 return -EINVAL;
1049 } 1056 }
1050 1057
1051 ret = ip_append_page(sk, page, offset, size, flags); 1058 ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
1059 page, offset, size, flags);
1052 if (ret == -EOPNOTSUPP) { 1060 if (ret == -EOPNOTSUPP) {
1053 release_sock(sk); 1061 release_sock(sk);
1054 return sock_no_sendpage(sk->sk_socket, page, offset, 1062 return sock_no_sendpage(sk->sk_socket, page, offset,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d20a05e970d8..981e43eaf704 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -18,38 +18,46 @@
18 18
19static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 19static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
20 20
21static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, 21static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
22 const xfrm_address_t *saddr, 22 int tos,
23 const xfrm_address_t *daddr) 23 const xfrm_address_t *saddr,
24 const xfrm_address_t *daddr)
24{ 25{
25 struct flowi4 fl4 = {
26 .daddr = daddr->a4,
27 .flowi4_tos = tos,
28 };
29 struct rtable *rt; 26 struct rtable *rt;
30 27
28 memset(fl4, 0, sizeof(*fl4));
29 fl4->daddr = daddr->a4;
30 fl4->flowi4_tos = tos;
31 if (saddr) 31 if (saddr)
32 fl4.saddr = saddr->a4; 32 fl4->saddr = saddr->a4;
33 33
34 rt = __ip_route_output_key(net, &fl4); 34 rt = __ip_route_output_key(net, fl4);
35 if (!IS_ERR(rt)) 35 if (!IS_ERR(rt))
36 return &rt->dst; 36 return &rt->dst;
37 37
38 return ERR_CAST(rt); 38 return ERR_CAST(rt);
39} 39}
40 40
41static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
42 const xfrm_address_t *saddr,
43 const xfrm_address_t *daddr)
44{
45 struct flowi4 fl4;
46
47 return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);
48}
49
41static int xfrm4_get_saddr(struct net *net, 50static int xfrm4_get_saddr(struct net *net,
42 xfrm_address_t *saddr, xfrm_address_t *daddr) 51 xfrm_address_t *saddr, xfrm_address_t *daddr)
43{ 52{
44 struct dst_entry *dst; 53 struct dst_entry *dst;
45 struct rtable *rt; 54 struct flowi4 fl4;
46 55
47 dst = xfrm4_dst_lookup(net, 0, NULL, daddr); 56 dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);
48 if (IS_ERR(dst)) 57 if (IS_ERR(dst))
49 return -EHOSTUNREACH; 58 return -EHOSTUNREACH;
50 59
51 rt = (struct rtable *)dst; 60 saddr->a4 = fl4.saddr;
52 saddr->a4 = rt->rt_src;
53 dst_release(dst); 61 dst_release(dst);
54 return 0; 62 return 0;
55} 63}
@@ -73,7 +81,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
73 81
74 rt->rt_key_dst = fl4->daddr; 82 rt->rt_key_dst = fl4->daddr;
75 rt->rt_key_src = fl4->saddr; 83 rt->rt_key_src = fl4->saddr;
76 rt->rt_tos = fl4->flowi4_tos; 84 rt->rt_key_tos = fl4->flowi4_tos;
77 rt->rt_route_iif = fl4->flowi4_iif; 85 rt->rt_route_iif = fl4->flowi4_iif;
78 rt->rt_iif = fl4->flowi4_iif; 86 rt->rt_iif = fl4->flowi4_iif;
79 rt->rt_oif = fl4->flowi4_oif; 87 rt->rt_oif = fl4->flowi4_oif;
@@ -102,7 +110,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
102static void 110static void
103_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) 111_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
104{ 112{
105 struct iphdr *iph = ip_hdr(skb); 113 const struct iphdr *iph = ip_hdr(skb);
106 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 114 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
107 struct flowi4 *fl4 = &fl->u.ip4; 115 struct flowi4 *fl4 = &fl->u.ip4;
108 116
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 805d63ef4340..d9ac0a0058b5 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -55,7 +55,7 @@ xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
55 55
56int xfrm4_extract_header(struct sk_buff *skb) 56int xfrm4_extract_header(struct sk_buff *skb)
57{ 57{
58 struct iphdr *iph = ip_hdr(skb); 58 const struct iphdr *iph = ip_hdr(skb);
59 59
60 XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); 60 XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
61 XFRM_MODE_SKB_CB(skb)->id = iph->id; 61 XFRM_MODE_SKB_CB(skb)->id = iph->id;