aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c53
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/cipso_ipv4.c113
-rw-r--r--net/ipv4/datagram.c22
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_semantics.c12
-rw-r--r--net/ipv4/fib_trie.c118
-rw-r--r--net/ipv4/icmp.c133
-rw-r--r--net/ipv4/igmp.c64
-rw-r--r--net/ipv4/inet_connection_sock.c61
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_lro.c4
-rw-r--r--net/ipv4/inetpeer.c42
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c58
-rw-r--r--net/ipv4/ip_gre.c70
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_options.c57
-rw-r--r--net/ipv4/ip_output.c158
-rw-r--r--net/ipv4/ip_sockglue.c37
-rw-r--r--net/ipv4/ipcomp.c4
-rw-r--r--net/ipv4/ipconfig.c35
-rw-r--r--net/ipv4/ipip.c36
-rw-r--r--net/ipv4/ipmr.c39
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/ip_tables.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/ping.c932
-rw-r--r--net/ipv4/raw.c94
-rw-r--r--net/ipv4/route.c386
-rw-r--r--net/ipv4/syncookies.c22
-rw-r--r--net/ipv4/sysctl_net_ipv4.c68
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_ipv4.c104
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c80
-rw-r--r--net/ipv4/xfrm4_policy.c38
-rw-r--r--net/ipv4/xfrm4_state.c2
41 files changed, 2044 insertions, 899 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 0dc772d0d125..f2dc69cffb57 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 17obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 807d83c02ef6..cc1463156cd0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -105,6 +105,7 @@
105#include <net/tcp.h> 105#include <net/tcp.h>
106#include <net/udp.h> 106#include <net/udp.h>
107#include <net/udplite.h> 107#include <net/udplite.h>
108#include <net/ping.h>
108#include <linux/skbuff.h> 109#include <linux/skbuff.h>
109#include <net/sock.h> 110#include <net/sock.h>
110#include <net/raw.h> 111#include <net/raw.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
153 WARN_ON(sk->sk_wmem_queued); 154 WARN_ON(sk->sk_wmem_queued);
154 WARN_ON(sk->sk_forward_alloc); 155 WARN_ON(sk->sk_forward_alloc);
155 156
156 kfree(inet->opt); 157 kfree(rcu_dereference_protected(inet->inet_opt, 1));
157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); 158 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 159 sk_refcnt_debug_dec(sk);
159} 160}
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
1008 .flags = INET_PROTOSW_PERMANENT, 1009 .flags = INET_PROTOSW_PERMANENT,
1009 }, 1010 },
1010 1011
1012 {
1013 .type = SOCK_DGRAM,
1014 .protocol = IPPROTO_ICMP,
1015 .prot = &ping_prot,
1016 .ops = &inet_dgram_ops,
1017 .no_check = UDP_CSUM_DEFAULT,
1018 .flags = INET_PROTOSW_REUSE,
1019 },
1011 1020
1012 { 1021 {
1013 .type = SOCK_RAW, 1022 .type = SOCK_RAW,
@@ -1103,14 +1112,19 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1103 struct inet_sock *inet = inet_sk(sk); 1112 struct inet_sock *inet = inet_sk(sk);
1104 __be32 old_saddr = inet->inet_saddr; 1113 __be32 old_saddr = inet->inet_saddr;
1105 __be32 daddr = inet->inet_daddr; 1114 __be32 daddr = inet->inet_daddr;
1115 struct flowi4 *fl4;
1106 struct rtable *rt; 1116 struct rtable *rt;
1107 __be32 new_saddr; 1117 __be32 new_saddr;
1118 struct ip_options_rcu *inet_opt;
1108 1119
1109 if (inet->opt && inet->opt->srr) 1120 inet_opt = rcu_dereference_protected(inet->inet_opt,
1110 daddr = inet->opt->faddr; 1121 sock_owned_by_user(sk));
1122 if (inet_opt && inet_opt->opt.srr)
1123 daddr = inet_opt->opt.faddr;
1111 1124
1112 /* Query new route. */ 1125 /* Query new route. */
1113 rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), 1126 fl4 = &inet->cork.fl.u.ip4;
1127 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
1114 sk->sk_bound_dev_if, sk->sk_protocol, 1128 sk->sk_bound_dev_if, sk->sk_protocol,
1115 inet->inet_sport, inet->inet_dport, sk, false); 1129 inet->inet_sport, inet->inet_dport, sk, false);
1116 if (IS_ERR(rt)) 1130 if (IS_ERR(rt))
@@ -1118,7 +1132,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1118 1132
1119 sk_setup_caps(sk, &rt->dst); 1133 sk_setup_caps(sk, &rt->dst);
1120 1134
1121 new_saddr = rt->rt_src; 1135 new_saddr = fl4->saddr;
1122 1136
1123 if (new_saddr == old_saddr) 1137 if (new_saddr == old_saddr)
1124 return 0; 1138 return 0;
@@ -1147,6 +1161,8 @@ int inet_sk_rebuild_header(struct sock *sk)
1147 struct inet_sock *inet = inet_sk(sk); 1161 struct inet_sock *inet = inet_sk(sk);
1148 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); 1162 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1149 __be32 daddr; 1163 __be32 daddr;
1164 struct ip_options_rcu *inet_opt;
1165 struct flowi4 *fl4;
1150 int err; 1166 int err;
1151 1167
1152 /* Route is OK, nothing to do. */ 1168 /* Route is OK, nothing to do. */
@@ -1154,10 +1170,14 @@ int inet_sk_rebuild_header(struct sock *sk)
1154 return 0; 1170 return 0;
1155 1171
1156 /* Reroute. */ 1172 /* Reroute. */
1173 rcu_read_lock();
1174 inet_opt = rcu_dereference(inet->inet_opt);
1157 daddr = inet->inet_daddr; 1175 daddr = inet->inet_daddr;
1158 if (inet->opt && inet->opt->srr) 1176 if (inet_opt && inet_opt->opt.srr)
1159 daddr = inet->opt->faddr; 1177 daddr = inet_opt->opt.faddr;
1160 rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, 1178 rcu_read_unlock();
1179 fl4 = &inet->cork.fl.u.ip4;
1180 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
1161 inet->inet_dport, inet->inet_sport, 1181 inet->inet_dport, inet->inet_sport,
1162 sk->sk_protocol, RT_CONN_FLAGS(sk), 1182 sk->sk_protocol, RT_CONN_FLAGS(sk),
1163 sk->sk_bound_dev_if); 1183 sk->sk_bound_dev_if);
@@ -1186,7 +1206,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1186 1206
1187static int inet_gso_send_check(struct sk_buff *skb) 1207static int inet_gso_send_check(struct sk_buff *skb)
1188{ 1208{
1189 struct iphdr *iph; 1209 const struct iphdr *iph;
1190 const struct net_protocol *ops; 1210 const struct net_protocol *ops;
1191 int proto; 1211 int proto;
1192 int ihl; 1212 int ihl;
@@ -1293,7 +1313,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1293 const struct net_protocol *ops; 1313 const struct net_protocol *ops;
1294 struct sk_buff **pp = NULL; 1314 struct sk_buff **pp = NULL;
1295 struct sk_buff *p; 1315 struct sk_buff *p;
1296 struct iphdr *iph; 1316 const struct iphdr *iph;
1297 unsigned int hlen; 1317 unsigned int hlen;
1298 unsigned int off; 1318 unsigned int off;
1299 unsigned int id; 1319 unsigned int id;
@@ -1516,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
1516 1536
1517static const struct net_protocol icmp_protocol = { 1537static const struct net_protocol icmp_protocol = {
1518 .handler = icmp_rcv, 1538 .handler = icmp_rcv,
1539 .err_handler = ping_err,
1519 .no_policy = 1, 1540 .no_policy = 1,
1520 .netns_ok = 1, 1541 .netns_ok = 1,
1521}; 1542};
@@ -1631,6 +1652,10 @@ static int __init inet_init(void)
1631 if (rc) 1652 if (rc)
1632 goto out_unregister_udp_proto; 1653 goto out_unregister_udp_proto;
1633 1654
1655 rc = proto_register(&ping_prot, 1);
1656 if (rc)
1657 goto out_unregister_raw_proto;
1658
1634 /* 1659 /*
1635 * Tell SOCKET that we are alive... 1660 * Tell SOCKET that we are alive...
1636 */ 1661 */
@@ -1686,6 +1711,8 @@ static int __init inet_init(void)
1686 /* Add UDP-Lite (RFC 3828) */ 1711 /* Add UDP-Lite (RFC 3828) */
1687 udplite4_register(); 1712 udplite4_register();
1688 1713
1714 ping_init();
1715
1689 /* 1716 /*
1690 * Set the ICMP layer up 1717 * Set the ICMP layer up
1691 */ 1718 */
@@ -1716,6 +1743,8 @@ static int __init inet_init(void)
1716 rc = 0; 1743 rc = 0;
1717out: 1744out:
1718 return rc; 1745 return rc;
1746out_unregister_raw_proto:
1747 proto_unregister(&raw_prot);
1719out_unregister_udp_proto: 1748out_unregister_udp_proto:
1720 proto_unregister(&udp_prot); 1749 proto_unregister(&udp_prot);
1721out_unregister_tcp_proto: 1750out_unregister_tcp_proto:
@@ -1740,11 +1769,15 @@ static int __init ipv4_proc_init(void)
1740 goto out_tcp; 1769 goto out_tcp;
1741 if (udp4_proc_init()) 1770 if (udp4_proc_init())
1742 goto out_udp; 1771 goto out_udp;
1772 if (ping_proc_init())
1773 goto out_ping;
1743 if (ip_misc_proc_init()) 1774 if (ip_misc_proc_init())
1744 goto out_misc; 1775 goto out_misc;
1745out: 1776out:
1746 return rc; 1777 return rc;
1747out_misc: 1778out_misc:
1779 ping_proc_exit();
1780out_ping:
1748 udp4_proc_exit(); 1781 udp4_proc_exit();
1749out_udp: 1782out_udp:
1750 tcp4_proc_exit(); 1783 tcp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4286fd3cc0e2..c1f4154552fc 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
73 * into IP header for icv calculation. Options are already checked 73 * into IP header for icv calculation. Options are already checked
74 * for validity, so paranoia is not required. */ 74 * for validity, so paranoia is not required. */
75 75
76static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) 76static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
77{ 77{
78 unsigned char * optptr = (unsigned char*)(iph+1); 78 unsigned char * optptr = (unsigned char*)(iph+1);
79 int l = iph->ihl*4 - sizeof(struct iphdr); 79 int l = iph->ihl*4 - sizeof(struct iphdr);
@@ -396,7 +396,7 @@ out:
396static void ah4_err(struct sk_buff *skb, u32 info) 396static void ah4_err(struct sk_buff *skb, u32 info)
397{ 397{
398 struct net *net = dev_net(skb->dev); 398 struct net *net = dev_net(skb->dev);
399 struct iphdr *iph = (struct iphdr *)skb->data; 399 const struct iphdr *iph = (const struct iphdr *)skb->data;
400 struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); 400 struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
401 struct xfrm_state *x; 401 struct xfrm_state *x;
402 402
@@ -404,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
404 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 404 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
405 return; 405 return;
406 406
407 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); 407 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
408 ah->spi, IPPROTO_AH, AF_INET);
408 if (!x) 409 if (!x)
409 return; 410 return;
410 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", 411 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a0af7ea87870..2b3c23c287cd 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
1857 return CIPSO_V4_HDR_LEN + ret_val; 1857 return CIPSO_V4_HDR_LEN + ret_val;
1858} 1858}
1859 1859
1860static void opt_kfree_rcu(struct rcu_head *head)
1861{
1862 kfree(container_of(head, struct ip_options_rcu, rcu));
1863}
1864
1860/** 1865/**
1861 * cipso_v4_sock_setattr - Add a CIPSO option to a socket 1866 * cipso_v4_sock_setattr - Add a CIPSO option to a socket
1862 * @sk: the socket 1867 * @sk: the socket
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
1879 unsigned char *buf = NULL; 1884 unsigned char *buf = NULL;
1880 u32 buf_len; 1885 u32 buf_len;
1881 u32 opt_len; 1886 u32 opt_len;
1882 struct ip_options *opt = NULL; 1887 struct ip_options_rcu *old, *opt = NULL;
1883 struct inet_sock *sk_inet; 1888 struct inet_sock *sk_inet;
1884 struct inet_connection_sock *sk_conn; 1889 struct inet_connection_sock *sk_conn;
1885 1890
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
1915 ret_val = -ENOMEM; 1920 ret_val = -ENOMEM;
1916 goto socket_setattr_failure; 1921 goto socket_setattr_failure;
1917 } 1922 }
1918 memcpy(opt->__data, buf, buf_len); 1923 memcpy(opt->opt.__data, buf, buf_len);
1919 opt->optlen = opt_len; 1924 opt->opt.optlen = opt_len;
1920 opt->cipso = sizeof(struct iphdr); 1925 opt->opt.cipso = sizeof(struct iphdr);
1921 kfree(buf); 1926 kfree(buf);
1922 buf = NULL; 1927 buf = NULL;
1923 1928
1924 sk_inet = inet_sk(sk); 1929 sk_inet = inet_sk(sk);
1930
1931 old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk));
1925 if (sk_inet->is_icsk) { 1932 if (sk_inet->is_icsk) {
1926 sk_conn = inet_csk(sk); 1933 sk_conn = inet_csk(sk);
1927 if (sk_inet->opt) 1934 if (old)
1928 sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; 1935 sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
1929 sk_conn->icsk_ext_hdr_len += opt->optlen; 1936 sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
1930 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); 1937 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1931 } 1938 }
1932 opt = xchg(&sk_inet->opt, opt); 1939 rcu_assign_pointer(sk_inet->inet_opt, opt);
1933 kfree(opt); 1940 if (old)
1941 call_rcu(&old->rcu, opt_kfree_rcu);
1934 1942
1935 return 0; 1943 return 0;
1936 1944
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
1960 unsigned char *buf = NULL; 1968 unsigned char *buf = NULL;
1961 u32 buf_len; 1969 u32 buf_len;
1962 u32 opt_len; 1970 u32 opt_len;
1963 struct ip_options *opt = NULL; 1971 struct ip_options_rcu *opt = NULL;
1964 struct inet_request_sock *req_inet; 1972 struct inet_request_sock *req_inet;
1965 1973
1966 /* We allocate the maximum CIPSO option size here so we are probably 1974 /* We allocate the maximum CIPSO option size here so we are probably
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
1988 ret_val = -ENOMEM; 1996 ret_val = -ENOMEM;
1989 goto req_setattr_failure; 1997 goto req_setattr_failure;
1990 } 1998 }
1991 memcpy(opt->__data, buf, buf_len); 1999 memcpy(opt->opt.__data, buf, buf_len);
1992 opt->optlen = opt_len; 2000 opt->opt.optlen = opt_len;
1993 opt->cipso = sizeof(struct iphdr); 2001 opt->opt.cipso = sizeof(struct iphdr);
1994 kfree(buf); 2002 kfree(buf);
1995 buf = NULL; 2003 buf = NULL;
1996 2004
1997 req_inet = inet_rsk(req); 2005 req_inet = inet_rsk(req);
1998 opt = xchg(&req_inet->opt, opt); 2006 opt = xchg(&req_inet->opt, opt);
1999 kfree(opt); 2007 if (opt)
2008 call_rcu(&opt->rcu, opt_kfree_rcu);
2000 2009
2001 return 0; 2010 return 0;
2002 2011
@@ -2016,34 +2025,34 @@ req_setattr_failure:
2016 * values on failure. 2025 * values on failure.
2017 * 2026 *
2018 */ 2027 */
2019static int cipso_v4_delopt(struct ip_options **opt_ptr) 2028static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
2020{ 2029{
2021 int hdr_delta = 0; 2030 int hdr_delta = 0;
2022 struct ip_options *opt = *opt_ptr; 2031 struct ip_options_rcu *opt = *opt_ptr;
2023 2032
2024 if (opt->srr || opt->rr || opt->ts || opt->router_alert) { 2033 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
2025 u8 cipso_len; 2034 u8 cipso_len;
2026 u8 cipso_off; 2035 u8 cipso_off;
2027 unsigned char *cipso_ptr; 2036 unsigned char *cipso_ptr;
2028 int iter; 2037 int iter;
2029 int optlen_new; 2038 int optlen_new;
2030 2039
2031 cipso_off = opt->cipso - sizeof(struct iphdr); 2040 cipso_off = opt->opt.cipso - sizeof(struct iphdr);
2032 cipso_ptr = &opt->__data[cipso_off]; 2041 cipso_ptr = &opt->opt.__data[cipso_off];
2033 cipso_len = cipso_ptr[1]; 2042 cipso_len = cipso_ptr[1];
2034 2043
2035 if (opt->srr > opt->cipso) 2044 if (opt->opt.srr > opt->opt.cipso)
2036 opt->srr -= cipso_len; 2045 opt->opt.srr -= cipso_len;
2037 if (opt->rr > opt->cipso) 2046 if (opt->opt.rr > opt->opt.cipso)
2038 opt->rr -= cipso_len; 2047 opt->opt.rr -= cipso_len;
2039 if (opt->ts > opt->cipso) 2048 if (opt->opt.ts > opt->opt.cipso)
2040 opt->ts -= cipso_len; 2049 opt->opt.ts -= cipso_len;
2041 if (opt->router_alert > opt->cipso) 2050 if (opt->opt.router_alert > opt->opt.cipso)
2042 opt->router_alert -= cipso_len; 2051 opt->opt.router_alert -= cipso_len;
2043 opt->cipso = 0; 2052 opt->opt.cipso = 0;
2044 2053
2045 memmove(cipso_ptr, cipso_ptr + cipso_len, 2054 memmove(cipso_ptr, cipso_ptr + cipso_len,
2046 opt->optlen - cipso_off - cipso_len); 2055 opt->opt.optlen - cipso_off - cipso_len);
2047 2056
2048 /* determining the new total option length is tricky because of 2057 /* determining the new total option length is tricky because of
2049 * the padding necessary, the only thing i can think to do at 2058 * the padding necessary, the only thing i can think to do at
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2052 * from there we can determine the new total option length */ 2061 * from there we can determine the new total option length */
2053 iter = 0; 2062 iter = 0;
2054 optlen_new = 0; 2063 optlen_new = 0;
2055 while (iter < opt->optlen) 2064 while (iter < opt->opt.optlen)
2056 if (opt->__data[iter] != IPOPT_NOP) { 2065 if (opt->opt.__data[iter] != IPOPT_NOP) {
2057 iter += opt->__data[iter + 1]; 2066 iter += opt->opt.__data[iter + 1];
2058 optlen_new = iter; 2067 optlen_new = iter;
2059 } else 2068 } else
2060 iter++; 2069 iter++;
2061 hdr_delta = opt->optlen; 2070 hdr_delta = opt->opt.optlen;
2062 opt->optlen = (optlen_new + 3) & ~3; 2071 opt->opt.optlen = (optlen_new + 3) & ~3;
2063 hdr_delta -= opt->optlen; 2072 hdr_delta -= opt->opt.optlen;
2064 } else { 2073 } else {
2065 /* only the cipso option was present on the socket so we can 2074 /* only the cipso option was present on the socket so we can
2066 * remove the entire option struct */ 2075 * remove the entire option struct */
2067 *opt_ptr = NULL; 2076 *opt_ptr = NULL;
2068 hdr_delta = opt->optlen; 2077 hdr_delta = opt->opt.optlen;
2069 kfree(opt); 2078 call_rcu(&opt->rcu, opt_kfree_rcu);
2070 } 2079 }
2071 2080
2072 return hdr_delta; 2081 return hdr_delta;
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2083void cipso_v4_sock_delattr(struct sock *sk) 2092void cipso_v4_sock_delattr(struct sock *sk)
2084{ 2093{
2085 int hdr_delta; 2094 int hdr_delta;
2086 struct ip_options *opt; 2095 struct ip_options_rcu *opt;
2087 struct inet_sock *sk_inet; 2096 struct inet_sock *sk_inet;
2088 2097
2089 sk_inet = inet_sk(sk); 2098 sk_inet = inet_sk(sk);
2090 opt = sk_inet->opt; 2099 opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
2091 if (opt == NULL || opt->cipso == 0) 2100 if (opt == NULL || opt->opt.cipso == 0)
2092 return; 2101 return;
2093 2102
2094 hdr_delta = cipso_v4_delopt(&sk_inet->opt); 2103 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
2095 if (sk_inet->is_icsk && hdr_delta > 0) { 2104 if (sk_inet->is_icsk && hdr_delta > 0) {
2096 struct inet_connection_sock *sk_conn = inet_csk(sk); 2105 struct inet_connection_sock *sk_conn = inet_csk(sk);
2097 sk_conn->icsk_ext_hdr_len -= hdr_delta; 2106 sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
2109 */ 2118 */
2110void cipso_v4_req_delattr(struct request_sock *req) 2119void cipso_v4_req_delattr(struct request_sock *req)
2111{ 2120{
2112 struct ip_options *opt; 2121 struct ip_options_rcu *opt;
2113 struct inet_request_sock *req_inet; 2122 struct inet_request_sock *req_inet;
2114 2123
2115 req_inet = inet_rsk(req); 2124 req_inet = inet_rsk(req);
2116 opt = req_inet->opt; 2125 opt = req_inet->opt;
2117 if (opt == NULL || opt->cipso == 0) 2126 if (opt == NULL || opt->opt.cipso == 0)
2118 return; 2127 return;
2119 2128
2120 cipso_v4_delopt(&req_inet->opt); 2129 cipso_v4_delopt(&req_inet->opt);
@@ -2184,14 +2193,18 @@ getattr_return:
2184 */ 2193 */
2185int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) 2194int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
2186{ 2195{
2187 struct ip_options *opt; 2196 struct ip_options_rcu *opt;
2197 int res = -ENOMSG;
2188 2198
2189 opt = inet_sk(sk)->opt; 2199 rcu_read_lock();
2190 if (opt == NULL || opt->cipso == 0) 2200 opt = rcu_dereference(inet_sk(sk)->inet_opt);
2191 return -ENOMSG; 2201 if (opt && opt->opt.cipso)
2192 2202 res = cipso_v4_getattr(opt->opt.__data +
2193 return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), 2203 opt->opt.cipso -
2194 secattr); 2204 sizeof(struct iphdr),
2205 secattr);
2206 rcu_read_unlock();
2207 return res;
2195} 2208}
2196 2209
2197/** 2210/**
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 85bd24ca4f6d..424fafbc8cb0 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
24{ 24{
25 struct inet_sock *inet = inet_sk(sk); 25 struct inet_sock *inet = inet_sk(sk);
26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; 26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
27 struct flowi4 *fl4;
27 struct rtable *rt; 28 struct rtable *rt;
28 __be32 saddr; 29 __be32 saddr;
29 int oif; 30 int oif;
@@ -38,6 +39,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
38 39
39 sk_dst_reset(sk); 40 sk_dst_reset(sk);
40 41
42 lock_sock(sk);
43
41 oif = sk->sk_bound_dev_if; 44 oif = sk->sk_bound_dev_if;
42 saddr = inet->inet_saddr; 45 saddr = inet->inet_saddr;
43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) { 46 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -46,7 +49,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
46 if (!saddr) 49 if (!saddr)
47 saddr = inet->mc_addr; 50 saddr = inet->mc_addr;
48 } 51 }
49 rt = ip_route_connect(usin->sin_addr.s_addr, saddr, 52 fl4 = &inet->cork.fl.u.ip4;
53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
50 RT_CONN_FLAGS(sk), oif, 54 RT_CONN_FLAGS(sk), oif,
51 sk->sk_protocol, 55 sk->sk_protocol,
52 inet->inet_sport, usin->sin_port, sk, true); 56 inet->inet_sport, usin->sin_port, sk, true);
@@ -54,26 +58,30 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
54 err = PTR_ERR(rt); 58 err = PTR_ERR(rt);
55 if (err == -ENETUNREACH) 59 if (err == -ENETUNREACH)
56 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 60 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
57 return err; 61 goto out;
58 } 62 }
59 63
60 if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { 64 if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
61 ip_rt_put(rt); 65 ip_rt_put(rt);
62 return -EACCES; 66 err = -EACCES;
67 goto out;
63 } 68 }
64 if (!inet->inet_saddr) 69 if (!inet->inet_saddr)
65 inet->inet_saddr = rt->rt_src; /* Update source address */ 70 inet->inet_saddr = fl4->saddr; /* Update source address */
66 if (!inet->inet_rcv_saddr) { 71 if (!inet->inet_rcv_saddr) {
67 inet->inet_rcv_saddr = rt->rt_src; 72 inet->inet_rcv_saddr = fl4->saddr;
68 if (sk->sk_prot->rehash) 73 if (sk->sk_prot->rehash)
69 sk->sk_prot->rehash(sk); 74 sk->sk_prot->rehash(sk);
70 } 75 }
71 inet->inet_daddr = rt->rt_dst; 76 inet->inet_daddr = fl4->daddr;
72 inet->inet_dport = usin->sin_port; 77 inet->inet_dport = usin->sin_port;
73 sk->sk_state = TCP_ESTABLISHED; 78 sk->sk_state = TCP_ESTABLISHED;
74 inet->inet_id = jiffies; 79 inet->inet_id = jiffies;
75 80
76 sk_dst_set(sk, &rt->dst); 81 sk_dst_set(sk, &rt->dst);
77 return 0; 82 err = 0;
83out:
84 release_sock(sk);
85 return err;
78} 86}
79EXPORT_SYMBOL(ip4_datagram_connect); 87EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cd9ca0811cfa..0d4a184af16f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1369,7 +1369,7 @@ errout:
1369 1369
1370static size_t inet_get_link_af_size(const struct net_device *dev) 1370static size_t inet_get_link_af_size(const struct net_device *dev)
1371{ 1371{
1372 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1372 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373 1373
1374 if (!in_dev) 1374 if (!in_dev)
1375 return 0; 1375 return 0;
@@ -1379,7 +1379,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
1379 1379
1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381{ 1381{
1382 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1382 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383 struct nlattr *nla; 1383 struct nlattr *nla;
1384 int i; 1384 int i;
1385 1385
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 03f994bcf7de..a5b413416da3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -276,7 +276,7 @@ error:
276 276
277static int esp_input_done2(struct sk_buff *skb, int err) 277static int esp_input_done2(struct sk_buff *skb, int err)
278{ 278{
279 struct iphdr *iph; 279 const struct iphdr *iph;
280 struct xfrm_state *x = xfrm_input_state(skb); 280 struct xfrm_state *x = xfrm_input_state(skb);
281 struct esp_data *esp = x->data; 281 struct esp_data *esp = x->data;
282 struct crypto_aead *aead = esp->aead; 282 struct crypto_aead *aead = esp->aead;
@@ -484,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
484static void esp4_err(struct sk_buff *skb, u32 info) 484static void esp4_err(struct sk_buff *skb, u32 info)
485{ 485{
486 struct net *net = dev_net(skb->dev); 486 struct net *net = dev_net(skb->dev);
487 struct iphdr *iph = (struct iphdr *)skb->data; 487 const struct iphdr *iph = (const struct iphdr *)skb->data;
488 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); 488 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
489 struct xfrm_state *x; 489 struct xfrm_state *x;
490 490
@@ -492,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
492 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 492 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
493 return; 493 return;
494 494
495 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); 495 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
496 esph->spi, IPPROTO_ESP, AF_INET);
496 if (!x) 497 if (!x)
497 return; 498 return;
498 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", 499 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 451088330bbb..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
44#include <net/arp.h> 44#include <net/arp.h>
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h>
47 48
48#ifndef CONFIG_IP_MULTIPLE_TABLES 49#ifndef CONFIG_IP_MULTIPLE_TABLES
49 50
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type);
188 * - check, that packet arrived from expected physical interface. 189 * - check, that packet arrived from expected physical interface.
189 * called with rcu_read_lock() 190 * called with rcu_read_lock()
190 */ 191 */
191int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 192int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
192 struct net_device *dev, __be32 *spec_dst, 193 int oif, struct net_device *dev, __be32 *spec_dst,
193 u32 *itag, u32 mark) 194 u32 *itag)
194{ 195{
195 struct in_device *in_dev; 196 struct in_device *in_dev;
196 struct flowi4 fl4; 197 struct flowi4 fl4;
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
202 203
203 fl4.flowi4_oif = 0; 204 fl4.flowi4_oif = 0;
204 fl4.flowi4_iif = oif; 205 fl4.flowi4_iif = oif;
205 fl4.flowi4_mark = mark;
206 fl4.daddr = src; 206 fl4.daddr = src;
207 fl4.saddr = dst; 207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos; 208 fl4.flowi4_tos = tos;
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
212 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
213 if (in_dev) { 213 if (in_dev) {
214 no_addr = in_dev->ifa_list == NULL; 214 no_addr = in_dev->ifa_list == NULL;
215 rpf = IN_DEV_RPFILTER(in_dev); 215
216 /* Ignore rp_filter for packets protected by IPsec. */
217 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
218
216 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 219 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
217 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 220 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
218 fl4.flowi4_mark = 0;
219 } 221 }
220 222
221 if (in_dev == NULL) 223 if (in_dev == NULL)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 641a5a2a9f9c..33e2c35b74b7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -141,18 +141,8 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
141 }, 141 },
142}; 142};
143 143
144
145/* Release a nexthop info record */ 144/* Release a nexthop info record */
146 145
147static void free_fib_info_rcu(struct rcu_head *head)
148{
149 struct fib_info *fi = container_of(head, struct fib_info, rcu);
150
151 if (fi->fib_metrics != (u32 *) dst_default_metrics)
152 kfree(fi->fib_metrics);
153 kfree(fi);
154}
155
156void free_fib_info(struct fib_info *fi) 146void free_fib_info(struct fib_info *fi)
157{ 147{
158 if (fi->fib_dead == 0) { 148 if (fi->fib_dead == 0) {
@@ -166,7 +156,7 @@ void free_fib_info(struct fib_info *fi)
166 } endfor_nexthops(fi); 156 } endfor_nexthops(fi);
167 fib_info_cnt--; 157 fib_info_cnt--;
168 release_net(fi->fib_net); 158 release_net(fi->fib_net);
169 call_rcu(&fi->rcu, free_fib_info_rcu); 159 kfree_rcu(fi, rcu);
170} 160}
171 161
172void fib_release_info(struct fib_info *fi) 162void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5fe9b8b41df3..58c25ea5a5c1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -72,6 +72,7 @@
72#include <linux/init.h> 72#include <linux/init.h>
73#include <linux/list.h> 73#include <linux/list.h>
74#include <linux/slab.h> 74#include <linux/slab.h>
75#include <linux/prefetch.h>
75#include <net/net_namespace.h> 76#include <net/net_namespace.h>
76#include <net/ip.h> 77#include <net/ip.h>
77#include <net/protocol.h> 78#include <net/protocol.h>
@@ -126,7 +127,7 @@ struct tnode {
126 struct work_struct work; 127 struct work_struct work;
127 struct tnode *tnode_free; 128 struct tnode *tnode_free;
128 }; 129 };
129 struct rt_trie_node *child[0]; 130 struct rt_trie_node __rcu *child[0];
130}; 131};
131 132
132#ifdef CONFIG_IP_FIB_TRIE_STATS 133#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,7 +152,7 @@ struct trie_stat {
151}; 152};
152 153
153struct trie { 154struct trie {
154 struct rt_trie_node *trie; 155 struct rt_trie_node __rcu *trie;
155#ifdef CONFIG_IP_FIB_TRIE_STATS 156#ifdef CONFIG_IP_FIB_TRIE_STATS
156 struct trie_use_stats stats; 157 struct trie_use_stats stats;
157#endif 158#endif
@@ -177,16 +178,29 @@ static const int sync_pages = 128;
177static struct kmem_cache *fn_alias_kmem __read_mostly; 178static struct kmem_cache *fn_alias_kmem __read_mostly;
178static struct kmem_cache *trie_leaf_kmem __read_mostly; 179static struct kmem_cache *trie_leaf_kmem __read_mostly;
179 180
180static inline struct tnode *node_parent(struct rt_trie_node *node) 181/*
182 * caller must hold RTNL
183 */
184static inline struct tnode *node_parent(const struct rt_trie_node *node)
181{ 185{
182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 186 unsigned long parent;
187
188 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
189
190 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
183} 191}
184 192
185static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) 193/*
194 * caller must hold RCU read lock or RTNL
195 */
196static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
186{ 197{
187 struct tnode *ret = node_parent(node); 198 unsigned long parent;
188 199
189 return rcu_dereference_rtnl(ret); 200 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
201 lockdep_rtnl_is_held());
202
203 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
190} 204}
191 205
192/* Same as rcu_assign_pointer 206/* Same as rcu_assign_pointer
@@ -198,18 +212,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
198 node->parent = (unsigned long)ptr | NODE_TYPE(node); 212 node->parent = (unsigned long)ptr | NODE_TYPE(node);
199} 213}
200 214
201static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) 215/*
216 * caller must hold RTNL
217 */
218static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
202{ 219{
203 BUG_ON(i >= 1U << tn->bits); 220 BUG_ON(i >= 1U << tn->bits);
204 221
205 return tn->child[i]; 222 return rtnl_dereference(tn->child[i]);
206} 223}
207 224
208static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 225/*
226 * caller must hold RCU read lock or RTNL
227 */
228static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
209{ 229{
210 struct rt_trie_node *ret = tnode_get_child(tn, i); 230 BUG_ON(i >= 1U << tn->bits);
211 231
212 return rcu_dereference_rtnl(ret); 232 return rcu_dereference_rtnl(tn->child[i]);
213} 233}
214 234
215static inline int tnode_child_length(const struct tnode *tn) 235static inline int tnode_child_length(const struct tnode *tn)
@@ -350,14 +370,9 @@ static inline void free_leaf(struct leaf *l)
350 call_rcu_bh(&l->rcu, __leaf_free_rcu); 370 call_rcu_bh(&l->rcu, __leaf_free_rcu);
351} 371}
352 372
353static void __leaf_info_free_rcu(struct rcu_head *head)
354{
355 kfree(container_of(head, struct leaf_info, rcu));
356}
357
358static inline void free_leaf_info(struct leaf_info *leaf) 373static inline void free_leaf_info(struct leaf_info *leaf)
359{ 374{
360 call_rcu(&leaf->rcu, __leaf_info_free_rcu); 375 kfree_rcu(leaf, rcu);
361} 376}
362 377
363static struct tnode *tnode_alloc(size_t size) 378static struct tnode *tnode_alloc(size_t size)
@@ -487,7 +502,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
487static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 502static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
488 int wasfull) 503 int wasfull)
489{ 504{
490 struct rt_trie_node *chi = tn->child[i]; 505 struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
491 int isfull; 506 int isfull;
492 507
493 BUG_ON(i >= 1<<tn->bits); 508 BUG_ON(i >= 1<<tn->bits);
@@ -665,7 +680,7 @@ one_child:
665 for (i = 0; i < tnode_child_length(tn); i++) { 680 for (i = 0; i < tnode_child_length(tn); i++) {
666 struct rt_trie_node *n; 681 struct rt_trie_node *n;
667 682
668 n = tn->child[i]; 683 n = rtnl_dereference(tn->child[i]);
669 if (!n) 684 if (!n)
670 continue; 685 continue;
671 686
@@ -679,6 +694,20 @@ one_child:
679 return (struct rt_trie_node *) tn; 694 return (struct rt_trie_node *) tn;
680} 695}
681 696
697
698static void tnode_clean_free(struct tnode *tn)
699{
700 int i;
701 struct tnode *tofree;
702
703 for (i = 0; i < tnode_child_length(tn); i++) {
704 tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
705 if (tofree)
706 tnode_free(tofree);
707 }
708 tnode_free(tn);
709}
710
682static struct tnode *inflate(struct trie *t, struct tnode *tn) 711static struct tnode *inflate(struct trie *t, struct tnode *tn)
683{ 712{
684 struct tnode *oldtnode = tn; 713 struct tnode *oldtnode = tn;
@@ -755,8 +784,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
755 inode = (struct tnode *) node; 784 inode = (struct tnode *) node;
756 785
757 if (inode->bits == 1) { 786 if (inode->bits == 1) {
758 put_child(t, tn, 2*i, inode->child[0]); 787 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
759 put_child(t, tn, 2*i+1, inode->child[1]); 788 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
760 789
761 tnode_free_safe(inode); 790 tnode_free_safe(inode);
762 continue; 791 continue;
@@ -797,8 +826,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
797 826
798 size = tnode_child_length(left); 827 size = tnode_child_length(left);
799 for (j = 0; j < size; j++) { 828 for (j = 0; j < size; j++) {
800 put_child(t, left, j, inode->child[j]); 829 put_child(t, left, j, rtnl_dereference(inode->child[j]));
801 put_child(t, right, j, inode->child[j + size]); 830 put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
802 } 831 }
803 put_child(t, tn, 2*i, resize(t, left)); 832 put_child(t, tn, 2*i, resize(t, left));
804 put_child(t, tn, 2*i+1, resize(t, right)); 833 put_child(t, tn, 2*i+1, resize(t, right));
@@ -808,18 +837,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
808 tnode_free_safe(oldtnode); 837 tnode_free_safe(oldtnode);
809 return tn; 838 return tn;
810nomem: 839nomem:
811 { 840 tnode_clean_free(tn);
812 int size = tnode_child_length(tn); 841 return ERR_PTR(-ENOMEM);
813 int j;
814
815 for (j = 0; j < size; j++)
816 if (tn->child[j])
817 tnode_free((struct tnode *)tn->child[j]);
818
819 tnode_free(tn);
820
821 return ERR_PTR(-ENOMEM);
822 }
823} 842}
824 843
825static struct tnode *halve(struct trie *t, struct tnode *tn) 844static struct tnode *halve(struct trie *t, struct tnode *tn)
@@ -890,18 +909,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
890 tnode_free_safe(oldtnode); 909 tnode_free_safe(oldtnode);
891 return tn; 910 return tn;
892nomem: 911nomem:
893 { 912 tnode_clean_free(tn);
894 int size = tnode_child_length(tn); 913 return ERR_PTR(-ENOMEM);
895 int j;
896
897 for (j = 0; j < size; j++)
898 if (tn->child[j])
899 tnode_free((struct tnode *)tn->child[j]);
900
901 tnode_free(tn);
902
903 return ERR_PTR(-ENOMEM);
904 }
905} 914}
906 915
907/* readside must use rcu_read_lock currently dump routines 916/* readside must use rcu_read_lock currently dump routines
@@ -1033,7 +1042,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1033 t_key cindex; 1042 t_key cindex;
1034 1043
1035 pos = 0; 1044 pos = 0;
1036 n = t->trie; 1045 n = rtnl_dereference(t->trie);
1037 1046
1038 /* If we point to NULL, stop. Either the tree is empty and we should 1047 /* If we point to NULL, stop. Either the tree is empty and we should
1039 * just put a new leaf in if, or we have reached an empty child slot, 1048 * just put a new leaf in if, or we have reached an empty child slot,
@@ -1319,6 +1328,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1319 } 1328 }
1320 } 1329 }
1321 1330
1331 if (!plen)
1332 tb->tb_num_default++;
1333
1322 list_add_tail_rcu(&new_fa->fa_list, 1334 list_add_tail_rcu(&new_fa->fa_list,
1323 (fa ? &fa->fa_list : fa_head)); 1335 (fa ? &fa->fa_list : fa_head));
1324 1336
@@ -1684,6 +1696,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1684 1696
1685 list_del_rcu(&fa->fa_list); 1697 list_del_rcu(&fa->fa_list);
1686 1698
1699 if (!plen)
1700 tb->tb_num_default--;
1701
1687 if (list_empty(fa_head)) { 1702 if (list_empty(fa_head)) {
1688 hlist_del_rcu(&li->hlist); 1703 hlist_del_rcu(&li->hlist);
1689 free_leaf_info(li); 1704 free_leaf_info(li);
@@ -1756,7 +1771,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1756 continue; 1771 continue;
1757 1772
1758 if (IS_LEAF(c)) { 1773 if (IS_LEAF(c)) {
1759 prefetch(p->child[idx]); 1774 prefetch(rcu_dereference_rtnl(p->child[idx]));
1760 return (struct leaf *) c; 1775 return (struct leaf *) c;
1761 } 1776 }
1762 1777
@@ -1974,6 +1989,7 @@ struct fib_table *fib_trie_table(u32 id)
1974 1989
1975 tb->tb_id = id; 1990 tb->tb_id = id;
1976 tb->tb_default = -1; 1991 tb->tb_default = -1;
1992 tb->tb_num_default = 0;
1977 1993
1978 t = (struct trie *) tb->tb_data; 1994 t = (struct trie *) tb->tb_data;
1979 memset(t, 0, sizeof(*t)); 1995 memset(t, 0, sizeof(*t));
@@ -2269,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2269 2285
2270 /* walk rest of this hash chain */ 2286 /* walk rest of this hash chain */
2271 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 2287 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2272 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 2288 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2273 tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 2289 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2274 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 2290 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2275 if (n) 2291 if (n)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5f8a71d3a2a..5395e45dcce6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -83,6 +83,7 @@
83#include <net/tcp.h> 83#include <net/tcp.h>
84#include <net/udp.h> 84#include <net/udp.h>
85#include <net/raw.h> 85#include <net/raw.h>
86#include <net/ping.h>
86#include <linux/skbuff.h> 87#include <linux/skbuff.h>
87#include <net/sock.h> 88#include <net/sock.h>
88#include <linux/errno.h> 89#include <linux/errno.h>
@@ -108,8 +109,7 @@ struct icmp_bxm {
108 __be32 times[3]; 109 __be32 times[3];
109 } data; 110 } data;
110 int head_len; 111 int head_len;
111 struct ip_options replyopts; 112 struct ip_options_data replyopts;
112 unsigned char optbuf[40];
113}; 113};
114 114
115/* An array of errno for error messages from dest unreach. */ 115/* An array of errno for error messages from dest unreach. */
@@ -234,7 +234,7 @@ static inline void icmp_xmit_unlock(struct sock *sk)
234 */ 234 */
235 235
236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
237 int type, int code) 237 struct flowi4 *fl4, int type, int code)
238{ 238{
239 struct dst_entry *dst = &rt->dst; 239 struct dst_entry *dst = &rt->dst;
240 bool rc = true; 240 bool rc = true;
@@ -253,7 +253,7 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
253 /* Limit if icmp type is enabled in ratemask. */ 253 /* Limit if icmp type is enabled in ratemask. */
254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { 254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
255 if (!rt->peer) 255 if (!rt->peer)
256 rt_bind_peer(rt, 1); 256 rt_bind_peer(rt, fl4->daddr, 1);
257 rc = inet_peer_xrlim_allow(rt->peer, 257 rc = inet_peer_xrlim_allow(rt->peer,
258 net->ipv4.sysctl_icmp_ratelimit); 258 net->ipv4.sysctl_icmp_ratelimit);
259 } 259 }
@@ -291,13 +291,14 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
291} 291}
292 292
293static void icmp_push_reply(struct icmp_bxm *icmp_param, 293static void icmp_push_reply(struct icmp_bxm *icmp_param,
294 struct flowi4 *fl4,
294 struct ipcm_cookie *ipc, struct rtable **rt) 295 struct ipcm_cookie *ipc, struct rtable **rt)
295{ 296{
296 struct sock *sk; 297 struct sock *sk;
297 struct sk_buff *skb; 298 struct sk_buff *skb;
298 299
299 sk = icmp_sk(dev_net((*rt)->dst.dev)); 300 sk = icmp_sk(dev_net((*rt)->dst.dev));
300 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 301 if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
301 icmp_param->data_len+icmp_param->head_len, 302 icmp_param->data_len+icmp_param->head_len,
302 icmp_param->head_len, 303 icmp_param->head_len,
303 ipc, rt, MSG_DONTWAIT) < 0) { 304 ipc, rt, MSG_DONTWAIT) < 0) {
@@ -316,7 +317,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
316 icmp_param->head_len, csum); 317 icmp_param->head_len, csum);
317 icmph->checksum = csum_fold(csum); 318 icmph->checksum = csum_fold(csum);
318 skb->ip_summed = CHECKSUM_NONE; 319 skb->ip_summed = CHECKSUM_NONE;
319 ip_push_pending_frames(sk); 320 ip_push_pending_frames(sk, fl4);
320 } 321 }
321} 322}
322 323
@@ -329,11 +330,12 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
329 struct ipcm_cookie ipc; 330 struct ipcm_cookie ipc;
330 struct rtable *rt = skb_rtable(skb); 331 struct rtable *rt = skb_rtable(skb);
331 struct net *net = dev_net(rt->dst.dev); 332 struct net *net = dev_net(rt->dst.dev);
333 struct flowi4 fl4;
332 struct sock *sk; 334 struct sock *sk;
333 struct inet_sock *inet; 335 struct inet_sock *inet;
334 __be32 daddr; 336 __be32 daddr;
335 337
336 if (ip_options_echo(&icmp_param->replyopts, skb)) 338 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
337 return; 339 return;
338 340
339 sk = icmp_xmit_lock(net); 341 sk = icmp_xmit_lock(net);
@@ -344,65 +346,60 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
344 icmp_param->data.icmph.checksum = 0; 346 icmp_param->data.icmph.checksum = 0;
345 347
346 inet->tos = ip_hdr(skb)->tos; 348 inet->tos = ip_hdr(skb)->tos;
347 daddr = ipc.addr = rt->rt_src; 349 daddr = ipc.addr = ip_hdr(skb)->saddr;
348 ipc.opt = NULL; 350 ipc.opt = NULL;
349 ipc.tx_flags = 0; 351 ipc.tx_flags = 0;
350 if (icmp_param->replyopts.optlen) { 352 if (icmp_param->replyopts.opt.opt.optlen) {
351 ipc.opt = &icmp_param->replyopts; 353 ipc.opt = &icmp_param->replyopts.opt;
352 if (ipc.opt->srr) 354 if (ipc.opt->opt.srr)
353 daddr = icmp_param->replyopts.faddr; 355 daddr = icmp_param->replyopts.opt.opt.faddr;
354 } 356 }
355 { 357 memset(&fl4, 0, sizeof(fl4));
356 struct flowi4 fl4 = { 358 fl4.daddr = daddr;
357 .daddr = daddr, 359 fl4.saddr = rt->rt_spec_dst;
358 .saddr = rt->rt_spec_dst, 360 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
359 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), 361 fl4.flowi4_proto = IPPROTO_ICMP;
360 .flowi4_proto = IPPROTO_ICMP, 362 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
361 }; 363 rt = ip_route_output_key(net, &fl4);
362 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 364 if (IS_ERR(rt))
363 rt = ip_route_output_key(net, &fl4); 365 goto out_unlock;
364 if (IS_ERR(rt)) 366 if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type,
365 goto out_unlock;
366 }
367 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
368 icmp_param->data.icmph.code)) 367 icmp_param->data.icmph.code))
369 icmp_push_reply(icmp_param, &ipc, &rt); 368 icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
370 ip_rt_put(rt); 369 ip_rt_put(rt);
371out_unlock: 370out_unlock:
372 icmp_xmit_unlock(sk); 371 icmp_xmit_unlock(sk);
373} 372}
374 373
375static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, 374static struct rtable *icmp_route_lookup(struct net *net,
376 struct iphdr *iph, 375 struct flowi4 *fl4,
376 struct sk_buff *skb_in,
377 const struct iphdr *iph,
377 __be32 saddr, u8 tos, 378 __be32 saddr, u8 tos,
378 int type, int code, 379 int type, int code,
379 struct icmp_bxm *param) 380 struct icmp_bxm *param)
380{ 381{
381 struct flowi4 fl4 = {
382 .daddr = (param->replyopts.srr ?
383 param->replyopts.faddr : iph->saddr),
384 .saddr = saddr,
385 .flowi4_tos = RT_TOS(tos),
386 .flowi4_proto = IPPROTO_ICMP,
387 .fl4_icmp_type = type,
388 .fl4_icmp_code = code,
389 };
390 struct rtable *rt, *rt2; 382 struct rtable *rt, *rt2;
391 int err; 383 int err;
392 384
393 security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); 385 memset(fl4, 0, sizeof(*fl4));
394 rt = __ip_route_output_key(net, &fl4); 386 fl4->daddr = (param->replyopts.opt.opt.srr ?
387 param->replyopts.opt.opt.faddr : iph->saddr);
388 fl4->saddr = saddr;
389 fl4->flowi4_tos = RT_TOS(tos);
390 fl4->flowi4_proto = IPPROTO_ICMP;
391 fl4->fl4_icmp_type = type;
392 fl4->fl4_icmp_code = code;
393 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
394 rt = __ip_route_output_key(net, fl4);
395 if (IS_ERR(rt)) 395 if (IS_ERR(rt))
396 return rt; 396 return rt;
397 397
398 /* No need to clone since we're just using its address. */ 398 /* No need to clone since we're just using its address. */
399 rt2 = rt; 399 rt2 = rt;
400 400
401 if (!fl4.saddr)
402 fl4.saddr = rt->rt_src;
403
404 rt = (struct rtable *) xfrm_lookup(net, &rt->dst, 401 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
405 flowi4_to_flowi(&fl4), NULL, 0); 402 flowi4_to_flowi(fl4), NULL, 0);
406 if (!IS_ERR(rt)) { 403 if (!IS_ERR(rt)) {
407 if (rt != rt2) 404 if (rt != rt2)
408 return rt; 405 return rt;
@@ -411,19 +408,19 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
411 } else 408 } else
412 return rt; 409 return rt;
413 410
414 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); 411 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET);
415 if (err) 412 if (err)
416 goto relookup_failed; 413 goto relookup_failed;
417 414
418 if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { 415 if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) {
419 rt2 = __ip_route_output_key(net, &fl4); 416 rt2 = __ip_route_output_key(net, fl4);
420 if (IS_ERR(rt2)) 417 if (IS_ERR(rt2))
421 err = PTR_ERR(rt2); 418 err = PTR_ERR(rt2);
422 } else { 419 } else {
423 struct flowi4 fl4_2 = {}; 420 struct flowi4 fl4_2 = {};
424 unsigned long orefdst; 421 unsigned long orefdst;
425 422
426 fl4_2.daddr = fl4.saddr; 423 fl4_2.daddr = fl4->saddr;
427 rt2 = ip_route_output_key(net, &fl4_2); 424 rt2 = ip_route_output_key(net, &fl4_2);
428 if (IS_ERR(rt2)) { 425 if (IS_ERR(rt2)) {
429 err = PTR_ERR(rt2); 426 err = PTR_ERR(rt2);
@@ -431,7 +428,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
431 } 428 }
432 /* Ugh! */ 429 /* Ugh! */
433 orefdst = skb_in->_skb_refdst; /* save old refdst */ 430 orefdst = skb_in->_skb_refdst; /* save old refdst */
434 err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, 431 err = ip_route_input(skb_in, fl4->daddr, fl4->saddr,
435 RT_TOS(tos), rt2->dst.dev); 432 RT_TOS(tos), rt2->dst.dev);
436 433
437 dst_release(&rt2->dst); 434 dst_release(&rt2->dst);
@@ -443,7 +440,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
443 goto relookup_failed; 440 goto relookup_failed;
444 441
445 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, 442 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
446 flowi4_to_flowi(&fl4), NULL, 443 flowi4_to_flowi(fl4), NULL,
447 XFRM_LOOKUP_ICMP); 444 XFRM_LOOKUP_ICMP);
448 if (!IS_ERR(rt2)) { 445 if (!IS_ERR(rt2)) {
449 dst_release(&rt->dst); 446 dst_release(&rt->dst);
@@ -482,6 +479,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
482 struct icmp_bxm icmp_param; 479 struct icmp_bxm icmp_param;
483 struct rtable *rt = skb_rtable(skb_in); 480 struct rtable *rt = skb_rtable(skb_in);
484 struct ipcm_cookie ipc; 481 struct ipcm_cookie ipc;
482 struct flowi4 fl4;
485 __be32 saddr; 483 __be32 saddr;
486 u8 tos; 484 u8 tos;
487 struct net *net; 485 struct net *net;
@@ -581,7 +579,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
581 IPTOS_PREC_INTERNETCONTROL) : 579 IPTOS_PREC_INTERNETCONTROL) :
582 iph->tos; 580 iph->tos;
583 581
584 if (ip_options_echo(&icmp_param.replyopts, skb_in)) 582 if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
585 goto out_unlock; 583 goto out_unlock;
586 584
587 585
@@ -597,15 +595,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
597 icmp_param.offset = skb_network_offset(skb_in); 595 icmp_param.offset = skb_network_offset(skb_in);
598 inet_sk(sk)->tos = tos; 596 inet_sk(sk)->tos = tos;
599 ipc.addr = iph->saddr; 597 ipc.addr = iph->saddr;
600 ipc.opt = &icmp_param.replyopts; 598 ipc.opt = &icmp_param.replyopts.opt;
601 ipc.tx_flags = 0; 599 ipc.tx_flags = 0;
602 600
603 rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, 601 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
604 type, code, &icmp_param); 602 type, code, &icmp_param);
605 if (IS_ERR(rt)) 603 if (IS_ERR(rt))
606 goto out_unlock; 604 goto out_unlock;
607 605
608 if (!icmpv4_xrlim_allow(net, rt, type, code)) 606 if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
609 goto ende; 607 goto ende;
610 608
611 /* RFC says return as much as we can without exceeding 576 bytes. */ 609 /* RFC says return as much as we can without exceeding 576 bytes. */
@@ -613,7 +611,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
613 room = dst_mtu(&rt->dst); 611 room = dst_mtu(&rt->dst);
614 if (room > 576) 612 if (room > 576)
615 room = 576; 613 room = 576;
616 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 614 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
617 room -= sizeof(struct icmphdr); 615 room -= sizeof(struct icmphdr);
618 616
619 icmp_param.data_len = skb_in->len - icmp_param.offset; 617 icmp_param.data_len = skb_in->len - icmp_param.offset;
@@ -621,7 +619,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
621 icmp_param.data_len = room; 619 icmp_param.data_len = room;
622 icmp_param.head_len = sizeof(struct icmphdr); 620 icmp_param.head_len = sizeof(struct icmphdr);
623 621
624 icmp_push_reply(&icmp_param, &ipc, &rt); 622 icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
625ende: 623ende:
626 ip_rt_put(rt); 624 ip_rt_put(rt);
627out_unlock: 625out_unlock:
@@ -637,7 +635,7 @@ EXPORT_SYMBOL(icmp_send);
637 635
638static void icmp_unreach(struct sk_buff *skb) 636static void icmp_unreach(struct sk_buff *skb)
639{ 637{
640 struct iphdr *iph; 638 const struct iphdr *iph;
641 struct icmphdr *icmph; 639 struct icmphdr *icmph;
642 int hash, protocol; 640 int hash, protocol;
643 const struct net_protocol *ipprot; 641 const struct net_protocol *ipprot;
@@ -656,7 +654,7 @@ static void icmp_unreach(struct sk_buff *skb)
656 goto out_err; 654 goto out_err;
657 655
658 icmph = icmp_hdr(skb); 656 icmph = icmp_hdr(skb);
659 iph = (struct iphdr *)skb->data; 657 iph = (const struct iphdr *)skb->data;
660 658
661 if (iph->ihl < 5) /* Mangled header, drop. */ 659 if (iph->ihl < 5) /* Mangled header, drop. */
662 goto out_err; 660 goto out_err;
@@ -729,7 +727,7 @@ static void icmp_unreach(struct sk_buff *skb)
729 if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) 727 if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
730 goto out; 728 goto out;
731 729
732 iph = (struct iphdr *)skb->data; 730 iph = (const struct iphdr *)skb->data;
733 protocol = iph->protocol; 731 protocol = iph->protocol;
734 732
735 /* 733 /*
@@ -758,7 +756,7 @@ out_err:
758 756
759static void icmp_redirect(struct sk_buff *skb) 757static void icmp_redirect(struct sk_buff *skb)
760{ 758{
761 struct iphdr *iph; 759 const struct iphdr *iph;
762 760
763 if (skb->len < sizeof(struct iphdr)) 761 if (skb->len < sizeof(struct iphdr))
764 goto out_err; 762 goto out_err;
@@ -769,7 +767,7 @@ static void icmp_redirect(struct sk_buff *skb)
769 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 767 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
770 goto out; 768 goto out;
771 769
772 iph = (struct iphdr *)skb->data; 770 iph = (const struct iphdr *)skb->data;
773 771
774 switch (icmp_hdr(skb)->code & 7) { 772 switch (icmp_hdr(skb)->code & 7) {
775 case ICMP_REDIR_NET: 773 case ICMP_REDIR_NET:
@@ -784,6 +782,15 @@ static void icmp_redirect(struct sk_buff *skb)
784 iph->saddr, skb->dev); 782 iph->saddr, skb->dev);
785 break; 783 break;
786 } 784 }
785
786 /* Ping wants to see redirects.
787 * Let's pretend they are errors of sorts... */
788 if (iph->protocol == IPPROTO_ICMP &&
789 iph->ihl >= 5 &&
790 pskb_may_pull(skb, (iph->ihl<<2)+8)) {
791 ping_err(skb, icmp_hdr(skb)->un.gateway);
792 }
793
787out: 794out:
788 return; 795 return;
789out_err: 796out_err:
@@ -933,12 +940,12 @@ static void icmp_address_reply(struct sk_buff *skb)
933 BUG_ON(mp == NULL); 940 BUG_ON(mp == NULL);
934 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 941 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
935 if (*mp == ifa->ifa_mask && 942 if (*mp == ifa->ifa_mask &&
936 inet_ifa_match(rt->rt_src, ifa)) 943 inet_ifa_match(ip_hdr(skb)->saddr, ifa))
937 break; 944 break;
938 } 945 }
939 if (!ifa && net_ratelimit()) { 946 if (!ifa && net_ratelimit()) {
940 printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", 947 printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n",
941 mp, dev->name, &rt->rt_src); 948 mp, dev->name, &ip_hdr(skb)->saddr);
942 } 949 }
943 } 950 }
944} 951}
@@ -1044,7 +1051,7 @@ error:
1044 */ 1051 */
1045static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { 1052static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1046 [ICMP_ECHOREPLY] = { 1053 [ICMP_ECHOREPLY] = {
1047 .handler = icmp_discard, 1054 .handler = ping_rcv,
1048 }, 1055 },
1049 [1] = { 1056 [1] = {
1050 .handler = icmp_discard, 1057 .handler = icmp_discard,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1fd3d9ce8398..f1d27f6c9351 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,17 +149,11 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, 149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
150 int sfcount, __be32 *psfsrc, int delta); 150 int sfcount, __be32 *psfsrc, int delta);
151 151
152
153static void ip_mc_list_reclaim(struct rcu_head *head)
154{
155 kfree(container_of(head, struct ip_mc_list, rcu));
156}
157
158static void ip_ma_put(struct ip_mc_list *im) 152static void ip_ma_put(struct ip_mc_list *im)
159{ 153{
160 if (atomic_dec_and_test(&im->refcnt)) { 154 if (atomic_dec_and_test(&im->refcnt)) {
161 in_dev_put(im->interface); 155 in_dev_put(im->interface);
162 call_rcu(&im->rcu, ip_mc_list_reclaim); 156 kfree_rcu(im, rcu);
163 } 157 }
164} 158}
165 159
@@ -309,6 +303,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
309 struct iphdr *pip; 303 struct iphdr *pip;
310 struct igmpv3_report *pig; 304 struct igmpv3_report *pig;
311 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
306 struct flowi4 fl4;
312 307
313 while (1) { 308 while (1) {
314 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), 309 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
@@ -321,18 +316,13 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
321 } 316 }
322 igmp_skb_size(skb) = size; 317 igmp_skb_size(skb) = size;
323 318
324 rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, 319 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
325 0, 0, 320 0, 0,
326 IPPROTO_IGMP, 0, dev->ifindex); 321 IPPROTO_IGMP, 0, dev->ifindex);
327 if (IS_ERR(rt)) { 322 if (IS_ERR(rt)) {
328 kfree_skb(skb); 323 kfree_skb(skb);
329 return NULL; 324 return NULL;
330 } 325 }
331 if (rt->rt_src == 0) {
332 kfree_skb(skb);
333 ip_rt_put(rt);
334 return NULL;
335 }
336 326
337 skb_dst_set(skb, &rt->dst); 327 skb_dst_set(skb, &rt->dst);
338 skb->dev = dev; 328 skb->dev = dev;
@@ -348,8 +338,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
348 pip->tos = 0xc0; 338 pip->tos = 0xc0;
349 pip->frag_off = htons(IP_DF); 339 pip->frag_off = htons(IP_DF);
350 pip->ttl = 1; 340 pip->ttl = 1;
351 pip->daddr = rt->rt_dst; 341 pip->daddr = fl4.daddr;
352 pip->saddr = rt->rt_src; 342 pip->saddr = fl4.saddr;
353 pip->protocol = IPPROTO_IGMP; 343 pip->protocol = IPPROTO_IGMP;
354 pip->tot_len = 0; /* filled in later */ 344 pip->tot_len = 0; /* filled in later */
355 ip_select_ident(pip, &rt->dst, NULL); 345 ip_select_ident(pip, &rt->dst, NULL);
@@ -655,6 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
655 struct net_device *dev = in_dev->dev; 645 struct net_device *dev = in_dev->dev;
656 struct net *net = dev_net(dev); 646 struct net *net = dev_net(dev);
657 __be32 group = pmc ? pmc->multiaddr : 0; 647 __be32 group = pmc ? pmc->multiaddr : 0;
648 struct flowi4 fl4;
658 __be32 dst; 649 __be32 dst;
659 650
660 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) 651 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
@@ -664,17 +655,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
664 else 655 else
665 dst = group; 656 dst = group;
666 657
667 rt = ip_route_output_ports(net, NULL, dst, 0, 658 rt = ip_route_output_ports(net, &fl4, NULL, dst, 0,
668 0, 0, 659 0, 0,
669 IPPROTO_IGMP, 0, dev->ifindex); 660 IPPROTO_IGMP, 0, dev->ifindex);
670 if (IS_ERR(rt)) 661 if (IS_ERR(rt))
671 return -1; 662 return -1;
672 663
673 if (rt->rt_src == 0) {
674 ip_rt_put(rt);
675 return -1;
676 }
677
678 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 664 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
679 if (skb == NULL) { 665 if (skb == NULL) {
680 ip_rt_put(rt); 666 ip_rt_put(rt);
@@ -695,7 +681,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
695 iph->frag_off = htons(IP_DF); 681 iph->frag_off = htons(IP_DF);
696 iph->ttl = 1; 682 iph->ttl = 1;
697 iph->daddr = dst; 683 iph->daddr = dst;
698 iph->saddr = rt->rt_src; 684 iph->saddr = fl4.saddr;
699 iph->protocol = IPPROTO_IGMP; 685 iph->protocol = IPPROTO_IGMP;
700 ip_select_ident(iph, &rt->dst, NULL); 686 ip_select_ident(iph, &rt->dst, NULL);
701 ((u8*)&iph[1])[0] = IPOPT_RA; 687 ((u8*)&iph[1])[0] = IPOPT_RA;
@@ -1169,20 +1155,18 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1169 1155
1170 if (!in_dev->dead) { 1156 if (!in_dev->dead) {
1171 if (IGMP_V1_SEEN(in_dev)) 1157 if (IGMP_V1_SEEN(in_dev))
1172 goto done; 1158 return;
1173 if (IGMP_V2_SEEN(in_dev)) { 1159 if (IGMP_V2_SEEN(in_dev)) {
1174 if (reporter) 1160 if (reporter)
1175 igmp_send_report(in_dev, im, IGMP_HOST_LEAVE_MESSAGE); 1161 igmp_send_report(in_dev, im, IGMP_HOST_LEAVE_MESSAGE);
1176 goto done; 1162 return;
1177 } 1163 }
1178 /* IGMPv3 */ 1164 /* IGMPv3 */
1179 igmpv3_add_delrec(in_dev, im); 1165 igmpv3_add_delrec(in_dev, im);
1180 1166
1181 igmp_ifc_event(in_dev); 1167 igmp_ifc_event(in_dev);
1182 } 1168 }
1183done:
1184#endif 1169#endif
1185 ip_mc_clear_src(im);
1186} 1170}
1187 1171
1188static void igmp_group_added(struct ip_mc_list *im) 1172static void igmp_group_added(struct ip_mc_list *im)
@@ -1319,6 +1303,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1319 *ip = i->next_rcu; 1303 *ip = i->next_rcu;
1320 in_dev->mc_count--; 1304 in_dev->mc_count--;
1321 igmp_group_dropped(i); 1305 igmp_group_dropped(i);
1306 ip_mc_clear_src(i);
1322 1307
1323 if (!in_dev->dead) 1308 if (!in_dev->dead)
1324 ip_rt_multicast_event(in_dev); 1309 ip_rt_multicast_event(in_dev);
@@ -1428,7 +1413,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1428 in_dev->mc_list = i->next_rcu; 1413 in_dev->mc_list = i->next_rcu;
1429 in_dev->mc_count--; 1414 in_dev->mc_count--;
1430 1415
1431 igmp_group_dropped(i); 1416 /* We've dropped the groups in ip_mc_down already */
1417 ip_mc_clear_src(i);
1432 ip_ma_put(i); 1418 ip_ma_put(i);
1433 } 1419 }
1434} 1420}
@@ -1836,12 +1822,6 @@ done:
1836} 1822}
1837EXPORT_SYMBOL(ip_mc_join_group); 1823EXPORT_SYMBOL(ip_mc_join_group);
1838 1824
1839static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1840{
1841 kfree(container_of(rp, struct ip_sf_socklist, rcu));
1842 /* sk_omem_alloc should have been decreased by the caller*/
1843}
1844
1845static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, 1825static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1846 struct in_device *in_dev) 1826 struct in_device *in_dev)
1847{ 1827{
@@ -1858,18 +1838,10 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1858 rcu_assign_pointer(iml->sflist, NULL); 1838 rcu_assign_pointer(iml->sflist, NULL);
1859 /* decrease mem now to avoid the memleak warning */ 1839 /* decrease mem now to avoid the memleak warning */
1860 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); 1840 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
1861 call_rcu(&psf->rcu, ip_sf_socklist_reclaim); 1841 kfree_rcu(psf, rcu);
1862 return err; 1842 return err;
1863} 1843}
1864 1844
1865
1866static void ip_mc_socklist_reclaim(struct rcu_head *rp)
1867{
1868 kfree(container_of(rp, struct ip_mc_socklist, rcu));
1869 /* sk_omem_alloc should have been decreased by the caller*/
1870}
1871
1872
1873/* 1845/*
1874 * Ask a socket to leave a group. 1846 * Ask a socket to leave a group.
1875 */ 1847 */
@@ -1909,7 +1881,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1909 rtnl_unlock(); 1881 rtnl_unlock();
1910 /* decrease mem now to avoid the memleak warning */ 1882 /* decrease mem now to avoid the memleak warning */
1911 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 1883 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
1912 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 1884 kfree_rcu(iml, rcu);
1913 return 0; 1885 return 0;
1914 } 1886 }
1915 if (!in_dev) 1887 if (!in_dev)
@@ -2026,7 +1998,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2026 newpsl->sl_addr[i] = psl->sl_addr[i]; 1998 newpsl->sl_addr[i] = psl->sl_addr[i];
2027 /* decrease mem now to avoid the memleak warning */ 1999 /* decrease mem now to avoid the memleak warning */
2028 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2000 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2029 call_rcu(&psl->rcu, ip_sf_socklist_reclaim); 2001 kfree_rcu(psl, rcu);
2030 } 2002 }
2031 rcu_assign_pointer(pmc->sflist, newpsl); 2003 rcu_assign_pointer(pmc->sflist, newpsl);
2032 psl = newpsl; 2004 psl = newpsl;
@@ -2127,7 +2099,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2127 psl->sl_count, psl->sl_addr, 0); 2099 psl->sl_count, psl->sl_addr, 0);
2128 /* decrease mem now to avoid the memleak warning */ 2100 /* decrease mem now to avoid the memleak warning */
2129 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2101 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2130 call_rcu(&psl->rcu, ip_sf_socklist_reclaim); 2102 kfree_rcu(psl, rcu);
2131 } else 2103 } else
2132 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2104 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2133 0, NULL, 0); 2105 0, NULL, 0);
@@ -2324,7 +2296,7 @@ void ip_mc_drop_socket(struct sock *sk)
2324 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2296 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2325 /* decrease mem now to avoid the memleak warning */ 2297 /* decrease mem now to avoid the memleak warning */
2326 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2298 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2327 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2299 kfree_rcu(iml, rcu);
2328 } 2300 }
2329 rtnl_unlock(); 2301 rtnl_unlock();
2330} 2302}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 38f23e721b80..c14d88ad348d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
33 * This struct holds the first and last local port number. 33 * This struct holds the first and last local port number.
34 */ 34 */
35struct local_ports sysctl_local_ports __read_mostly = { 35struct local_ports sysctl_local_ports __read_mostly = {
36 .lock = SEQLOCK_UNLOCKED, 36 .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock),
37 .range = { 32768, 61000 }, 37 .range = { 32768, 61000 },
38}; 38};
39 39
@@ -350,30 +350,24 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
350EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 350EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
351 351
352struct dst_entry *inet_csk_route_req(struct sock *sk, 352struct dst_entry *inet_csk_route_req(struct sock *sk,
353 struct flowi4 *fl4,
353 const struct request_sock *req) 354 const struct request_sock *req)
354{ 355{
355 struct rtable *rt; 356 struct rtable *rt;
356 const struct inet_request_sock *ireq = inet_rsk(req); 357 const struct inet_request_sock *ireq = inet_rsk(req);
357 struct ip_options *opt = inet_rsk(req)->opt; 358 struct ip_options_rcu *opt = inet_rsk(req)->opt;
358 struct flowi4 fl4 = {
359 .flowi4_oif = sk->sk_bound_dev_if,
360 .flowi4_mark = sk->sk_mark,
361 .daddr = ((opt && opt->srr) ?
362 opt->faddr : ireq->rmt_addr),
363 .saddr = ireq->loc_addr,
364 .flowi4_tos = RT_CONN_FLAGS(sk),
365 .flowi4_proto = sk->sk_protocol,
366 .flowi4_flags = inet_sk_flowi_flags(sk),
367 .fl4_sport = inet_sk(sk)->inet_sport,
368 .fl4_dport = ireq->rmt_port,
369 };
370 struct net *net = sock_net(sk); 359 struct net *net = sock_net(sk);
371 360
372 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 361 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
373 rt = ip_route_output_flow(net, &fl4, sk); 362 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
363 sk->sk_protocol, inet_sk_flowi_flags(sk),
364 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
365 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
366 security_req_classify_flow(req, flowi4_to_flowi(fl4));
367 rt = ip_route_output_flow(net, fl4, sk);
374 if (IS_ERR(rt)) 368 if (IS_ERR(rt))
375 goto no_route; 369 goto no_route;
376 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 370 if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
377 goto route_err; 371 goto route_err;
378 return &rt->dst; 372 return &rt->dst;
379 373
@@ -385,6 +379,39 @@ no_route:
385} 379}
386EXPORT_SYMBOL_GPL(inet_csk_route_req); 380EXPORT_SYMBOL_GPL(inet_csk_route_req);
387 381
382struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
383 struct sock *newsk,
384 const struct request_sock *req)
385{
386 const struct inet_request_sock *ireq = inet_rsk(req);
387 struct inet_sock *newinet = inet_sk(newsk);
388 struct ip_options_rcu *opt = ireq->opt;
389 struct net *net = sock_net(sk);
390 struct flowi4 *fl4;
391 struct rtable *rt;
392
393 fl4 = &newinet->cork.fl.u.ip4;
394 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
395 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
396 sk->sk_protocol, inet_sk_flowi_flags(sk),
397 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
398 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
399 security_req_classify_flow(req, flowi4_to_flowi(fl4));
400 rt = ip_route_output_flow(net, fl4, sk);
401 if (IS_ERR(rt))
402 goto no_route;
403 if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
404 goto route_err;
405 return &rt->dst;
406
407route_err:
408 ip_rt_put(rt);
409no_route:
410 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
411 return NULL;
412}
413EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
414
388static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 415static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
389 const u32 rnd, const u32 synq_hsize) 416 const u32 rnd, const u32 synq_hsize)
390{ 417{
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada17129fce..6ffe94ca5bc9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -124,7 +124,7 @@ static int inet_csk_diag_fill(struct sock *sk,
124 124
125#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 125#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
126 if (r->idiag_family == AF_INET6) { 126 if (r->idiag_family == AF_INET6) {
127 struct ipv6_pinfo *np = inet6_sk(sk); 127 const struct ipv6_pinfo *np = inet6_sk(sk);
128 128
129 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 129 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
130 &np->rcv_saddr); 130 &np->rcv_saddr);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 47038cb6c138..85a0f75dae64 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -51,8 +51,8 @@ MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
51 * Basic tcp checks whether packet is suitable for LRO 51 * Basic tcp checks whether packet is suitable for LRO
52 */ 52 */
53 53
54static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, 54static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
55 int len, struct net_lro_desc *lro_desc) 55 int len, const struct net_lro_desc *lro_desc)
56{ 56{
57 /* check ip header: don't aggregate padded frames */ 57 /* check ip header: don't aggregate padded frames */
58 if (ntohs(iph->tot_len) != len) 58 if (ntohs(iph->tot_len) != len)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9df4e635fb5f..ce616d92cc54 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -154,11 +154,9 @@ void __init inet_initpeers(void)
154/* Called with or without local BH being disabled. */ 154/* Called with or without local BH being disabled. */
155static void unlink_from_unused(struct inet_peer *p) 155static void unlink_from_unused(struct inet_peer *p)
156{ 156{
157 if (!list_empty(&p->unused)) { 157 spin_lock_bh(&unused_peers.lock);
158 spin_lock_bh(&unused_peers.lock); 158 list_del_init(&p->unused);
159 list_del_init(&p->unused); 159 spin_unlock_bh(&unused_peers.lock);
160 spin_unlock_bh(&unused_peers.lock);
161 }
162} 160}
163 161
164static int addr_compare(const struct inetpeer_addr *a, 162static int addr_compare(const struct inetpeer_addr *a,
@@ -205,6 +203,20 @@ static int addr_compare(const struct inetpeer_addr *a,
205 u; \ 203 u; \
206}) 204})
207 205
206static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
207{
208 int cur, old = atomic_read(ptr);
209
210 while (old != u) {
211 *newv = old + a;
212 cur = atomic_cmpxchg(ptr, old, *newv);
213 if (cur == old)
214 return true;
215 old = cur;
216 }
217 return false;
218}
219
208/* 220/*
209 * Called with rcu_read_lock() 221 * Called with rcu_read_lock()
210 * Because we hold no lock against a writer, its quite possible we fall 222 * Because we hold no lock against a writer, its quite possible we fall
@@ -213,7 +225,8 @@ static int addr_compare(const struct inetpeer_addr *a,
213 * We exit from this function if number of links exceeds PEER_MAXDEPTH 225 * We exit from this function if number of links exceeds PEER_MAXDEPTH
214 */ 226 */
215static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, 227static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
216 struct inet_peer_base *base) 228 struct inet_peer_base *base,
229 int *newrefcnt)
217{ 230{
218 struct inet_peer *u = rcu_dereference(base->root); 231 struct inet_peer *u = rcu_dereference(base->root);
219 int count = 0; 232 int count = 0;
@@ -226,7 +239,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
226 * distinction between an unused entry (refcnt=0) and 239 * distinction between an unused entry (refcnt=0) and
227 * a freed one. 240 * a freed one.
228 */ 241 */
229 if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1))) 242 if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt))
230 u = NULL; 243 u = NULL;
231 return u; 244 return u;
232 } 245 }
@@ -465,22 +478,23 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
465 struct inet_peer_base *base = family_to_base(daddr->family); 478 struct inet_peer_base *base = family_to_base(daddr->family);
466 struct inet_peer *p; 479 struct inet_peer *p;
467 unsigned int sequence; 480 unsigned int sequence;
468 int invalidated; 481 int invalidated, newrefcnt = 0;
469 482
470 /* Look up for the address quickly, lockless. 483 /* Look up for the address quickly, lockless.
471 * Because of a concurrent writer, we might not find an existing entry. 484 * Because of a concurrent writer, we might not find an existing entry.
472 */ 485 */
473 rcu_read_lock(); 486 rcu_read_lock();
474 sequence = read_seqbegin(&base->lock); 487 sequence = read_seqbegin(&base->lock);
475 p = lookup_rcu(daddr, base); 488 p = lookup_rcu(daddr, base, &newrefcnt);
476 invalidated = read_seqretry(&base->lock, sequence); 489 invalidated = read_seqretry(&base->lock, sequence);
477 rcu_read_unlock(); 490 rcu_read_unlock();
478 491
479 if (p) { 492 if (p) {
480 /* The existing node has been found. 493found: /* The existing node has been found.
481 * Remove the entry from unused list if it was there. 494 * Remove the entry from unused list if it was there.
482 */ 495 */
483 unlink_from_unused(p); 496 if (newrefcnt == 1)
497 unlink_from_unused(p);
484 return p; 498 return p;
485 } 499 }
486 500
@@ -494,11 +508,9 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
494 write_seqlock_bh(&base->lock); 508 write_seqlock_bh(&base->lock);
495 p = lookup(daddr, stack, base); 509 p = lookup(daddr, stack, base);
496 if (p != peer_avl_empty) { 510 if (p != peer_avl_empty) {
497 atomic_inc(&p->refcnt); 511 newrefcnt = atomic_inc_return(&p->refcnt);
498 write_sequnlock_bh(&base->lock); 512 write_sequnlock_bh(&base->lock);
499 /* Remove the entry from unused list if it was there. */ 513 goto found;
500 unlink_from_unused(p);
501 return p;
502 } 514 }
503 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; 515 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
504 if (p) { 516 if (p) {
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 99461f09320f..3b34d1c86270 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb)
84 84
85 rt = skb_rtable(skb); 85 rt = skb_rtable(skb);
86 86
87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 87 if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway)
88 goto sr_failed; 88 goto sr_failed;
89 89
90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && 90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b1d282f11be7..0ad6035f6366 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -77,22 +77,40 @@ struct ipq {
77 struct inet_peer *peer; 77 struct inet_peer *peer;
78}; 78};
79 79
80#define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */ 80/* RFC 3168 support :
81#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */ 81 * We want to check ECN values of all fragments, do detect invalid combinations.
82 * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
83 */
84#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
85#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
86#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
87#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
82 88
83static inline u8 ip4_frag_ecn(u8 tos) 89static inline u8 ip4_frag_ecn(u8 tos)
84{ 90{
85 tos = (tos & INET_ECN_MASK) + 1; 91 return 1 << (tos & INET_ECN_MASK);
86 /*
87 * After the last operation we have (in binary):
88 * INET_ECN_NOT_ECT => 001
89 * INET_ECN_ECT_1 => 010
90 * INET_ECN_ECT_0 => 011
91 * INET_ECN_CE => 100
92 */
93 return (tos & 2) ? 0 : tos;
94} 92}
95 93
94/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
95 * Value : 0xff if frame should be dropped.
96 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
97 */
98static const u8 ip4_frag_ecn_table[16] = {
99 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
100 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
101 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
102 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
103
104 /* invalid combinations : drop frame */
105 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
106 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
107 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
108 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
109 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
110 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
111 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
112};
113
96static struct inet_frags ip4_frags; 114static struct inet_frags ip4_frags;
97 115
98int ip_frag_nqueues(struct net *net) 116int ip_frag_nqueues(struct net *net)
@@ -524,9 +542,15 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
524 int len; 542 int len;
525 int ihlen; 543 int ihlen;
526 int err; 544 int err;
545 u8 ecn;
527 546
528 ipq_kill(qp); 547 ipq_kill(qp);
529 548
549 ecn = ip4_frag_ecn_table[qp->ecn];
550 if (unlikely(ecn == 0xff)) {
551 err = -EINVAL;
552 goto out_fail;
553 }
530 /* Make the one we just received the head. */ 554 /* Make the one we just received the head. */
531 if (prev) { 555 if (prev) {
532 head = prev->next; 556 head = prev->next;
@@ -605,17 +629,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
605 iph = ip_hdr(head); 629 iph = ip_hdr(head);
606 iph->frag_off = 0; 630 iph->frag_off = 0;
607 iph->tot_len = htons(len); 631 iph->tot_len = htons(len);
608 /* RFC3168 5.3 Fragmentation support 632 iph->tos |= ecn;
609 * If one fragment had INET_ECN_NOT_ECT,
610 * reassembled frame also has INET_ECN_NOT_ECT
611 * Elif one fragment had INET_ECN_CE
612 * reassembled frame also has INET_ECN_CE
613 */
614 if (qp->ecn & IPFRAG_ECN_CLEAR)
615 iph->tos &= ~INET_ECN_MASK;
616 else if (qp->ecn & IPFRAG_ECN_SET_CE)
617 iph->tos |= INET_ECN_CE;
618
619 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 633 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
620 qp->q.fragments = NULL; 634 qp->q.fragments = NULL;
621 qp->q.fragments_tail = NULL; 635 qp->q.fragments_tail = NULL;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index da5941f18c3c..8871067560db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -413,11 +413,6 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
413 413
414 dev_net_set(dev, net); 414 dev_net_set(dev, net);
415 415
416 if (strchr(name, '%')) {
417 if (dev_alloc_name(dev, name) < 0)
418 goto failed_free;
419 }
420
421 nt = netdev_priv(dev); 416 nt = netdev_priv(dev);
422 nt->parms = *parms; 417 nt->parms = *parms;
423 dev->rtnl_link_ops = &ipgre_link_ops; 418 dev->rtnl_link_ops = &ipgre_link_ops;
@@ -462,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
462 by themself??? 457 by themself???
463 */ 458 */
464 459
465 struct iphdr *iph = (struct iphdr *)skb->data; 460 const struct iphdr *iph = (const struct iphdr *)skb->data;
466 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 461 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
467 int grehlen = (iph->ihl<<2) + 4; 462 int grehlen = (iph->ihl<<2) + 4;
468 const int type = icmp_hdr(skb)->type; 463 const int type = icmp_hdr(skb)->type;
@@ -534,7 +529,7 @@ out:
534 rcu_read_unlock(); 529 rcu_read_unlock();
535} 530}
536 531
537static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 532static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
538{ 533{
539 if (INET_ECN_is_ce(iph->tos)) { 534 if (INET_ECN_is_ce(iph->tos)) {
540 if (skb->protocol == htons(ETH_P_IP)) { 535 if (skb->protocol == htons(ETH_P_IP)) {
@@ -546,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
546} 541}
547 542
548static inline u8 543static inline u8
549ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 544ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
550{ 545{
551 u8 inner = 0; 546 u8 inner = 0;
552 if (skb->protocol == htons(ETH_P_IP)) 547 if (skb->protocol == htons(ETH_P_IP))
553 inner = old_iph->tos; 548 inner = old_iph->tos;
554 else if (skb->protocol == htons(ETH_P_IPV6)) 549 else if (skb->protocol == htons(ETH_P_IPV6))
555 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 550 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
556 return INET_ECN_encapsulate(tos, inner); 551 return INET_ECN_encapsulate(tos, inner);
557} 552}
558 553
559static int ipgre_rcv(struct sk_buff *skb) 554static int ipgre_rcv(struct sk_buff *skb)
560{ 555{
561 struct iphdr *iph; 556 const struct iphdr *iph;
562 u8 *h; 557 u8 *h;
563 __be16 flags; 558 __be16 flags;
564 __sum16 csum = 0; 559 __sum16 csum = 0;
@@ -697,8 +692,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
697{ 692{
698 struct ip_tunnel *tunnel = netdev_priv(dev); 693 struct ip_tunnel *tunnel = netdev_priv(dev);
699 struct pcpu_tstats *tstats; 694 struct pcpu_tstats *tstats;
700 struct iphdr *old_iph = ip_hdr(skb); 695 const struct iphdr *old_iph = ip_hdr(skb);
701 struct iphdr *tiph; 696 const struct iphdr *tiph;
697 struct flowi4 fl4;
702 u8 tos; 698 u8 tos;
703 __be16 df; 699 __be16 df;
704 struct rtable *rt; /* Route to the other host */ 700 struct rtable *rt; /* Route to the other host */
@@ -714,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
714 710
715 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 711 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
716 gre_hlen = 0; 712 gre_hlen = 0;
717 tiph = (struct iphdr *)skb->data; 713 tiph = (const struct iphdr *)skb->data;
718 } else { 714 } else {
719 gre_hlen = tunnel->hlen; 715 gre_hlen = tunnel->hlen;
720 tiph = &tunnel->parms.iph; 716 tiph = &tunnel->parms.iph;
@@ -735,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
735 } 731 }
736#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
737 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
738 struct in6_addr *addr6; 734 const struct in6_addr *addr6;
739 int addr_type; 735 int addr_type;
740 struct neighbour *neigh = skb_dst(skb)->neighbour; 736 struct neighbour *neigh = skb_dst(skb)->neighbour;
741 737
742 if (neigh == NULL) 738 if (neigh == NULL)
743 goto tx_error; 739 goto tx_error;
744 740
745 addr6 = (struct in6_addr *)&neigh->primary_key; 741 addr6 = (const struct in6_addr *)&neigh->primary_key;
746 addr_type = ipv6_addr_type(addr6); 742 addr_type = ipv6_addr_type(addr6);
747 743
748 if (addr_type == IPV6_ADDR_ANY) { 744 if (addr_type == IPV6_ADDR_ANY) {
@@ -766,10 +762,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
766 if (skb->protocol == htons(ETH_P_IP)) 762 if (skb->protocol == htons(ETH_P_IP))
767 tos = old_iph->tos; 763 tos = old_iph->tos;
768 else if (skb->protocol == htons(ETH_P_IPV6)) 764 else if (skb->protocol == htons(ETH_P_IPV6))
769 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 765 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
770 } 766 }
771 767
772 rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, 768 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
773 tunnel->parms.o_key, RT_TOS(tos), 769 tunnel->parms.o_key, RT_TOS(tos),
774 tunnel->parms.link); 770 tunnel->parms.link);
775 if (IS_ERR(rt)) { 771 if (IS_ERR(rt)) {
@@ -873,15 +869,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
873 iph->frag_off = df; 869 iph->frag_off = df;
874 iph->protocol = IPPROTO_GRE; 870 iph->protocol = IPPROTO_GRE;
875 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 871 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
876 iph->daddr = rt->rt_dst; 872 iph->daddr = fl4.daddr;
877 iph->saddr = rt->rt_src; 873 iph->saddr = fl4.saddr;
878 874
879 if ((iph->ttl = tiph->ttl) == 0) { 875 if ((iph->ttl = tiph->ttl) == 0) {
880 if (skb->protocol == htons(ETH_P_IP)) 876 if (skb->protocol == htons(ETH_P_IP))
881 iph->ttl = old_iph->ttl; 877 iph->ttl = old_iph->ttl;
882#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 878#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
883 else if (skb->protocol == htons(ETH_P_IPV6)) 879 else if (skb->protocol == htons(ETH_P_IPV6))
884 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 880 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
885#endif 881#endif
886 else 882 else
887 iph->ttl = ip4_dst_hoplimit(&rt->dst); 883 iph->ttl = ip4_dst_hoplimit(&rt->dst);
@@ -927,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
927{ 923{
928 struct net_device *tdev = NULL; 924 struct net_device *tdev = NULL;
929 struct ip_tunnel *tunnel; 925 struct ip_tunnel *tunnel;
930 struct iphdr *iph; 926 const struct iphdr *iph;
931 int hlen = LL_MAX_HEADER; 927 int hlen = LL_MAX_HEADER;
932 int mtu = ETH_DATA_LEN; 928 int mtu = ETH_DATA_LEN;
933 int addend = sizeof(struct iphdr) + 4; 929 int addend = sizeof(struct iphdr) + 4;
@@ -938,12 +934,14 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
938 /* Guess output device to choose reasonable mtu and needed_headroom */ 934 /* Guess output device to choose reasonable mtu and needed_headroom */
939 935
940 if (iph->daddr) { 936 if (iph->daddr) {
941 struct rtable *rt = ip_route_output_gre(dev_net(dev), 937 struct flowi4 fl4;
942 iph->daddr, iph->saddr, 938 struct rtable *rt;
943 tunnel->parms.o_key, 939
944 RT_TOS(iph->tos), 940 rt = ip_route_output_gre(dev_net(dev), &fl4,
945 tunnel->parms.link); 941 iph->daddr, iph->saddr,
946 942 tunnel->parms.o_key,
943 RT_TOS(iph->tos),
944 tunnel->parms.link);
947 if (!IS_ERR(rt)) { 945 if (!IS_ERR(rt)) {
948 tdev = rt->dst.dev; 946 tdev = rt->dst.dev;
949 ip_rt_put(rt); 947 ip_rt_put(rt);
@@ -1180,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1180 1178
1181static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1179static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1182{ 1180{
1183 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); 1181 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1184 memcpy(haddr, &iph->saddr, 4); 1182 memcpy(haddr, &iph->saddr, 4);
1185 return 4; 1183 return 4;
1186} 1184}
@@ -1196,13 +1194,15 @@ static int ipgre_open(struct net_device *dev)
1196 struct ip_tunnel *t = netdev_priv(dev); 1194 struct ip_tunnel *t = netdev_priv(dev);
1197 1195
1198 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1196 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1199 struct rtable *rt = ip_route_output_gre(dev_net(dev), 1197 struct flowi4 fl4;
1200 t->parms.iph.daddr, 1198 struct rtable *rt;
1201 t->parms.iph.saddr, 1199
1202 t->parms.o_key, 1200 rt = ip_route_output_gre(dev_net(dev), &fl4,
1203 RT_TOS(t->parms.iph.tos), 1201 t->parms.iph.daddr,
1204 t->parms.link); 1202 t->parms.iph.saddr,
1205 1203 t->parms.o_key,
1204 RT_TOS(t->parms.iph.tos),
1205 t->parms.link);
1206 if (IS_ERR(rt)) 1206 if (IS_ERR(rt))
1207 return -EADDRNOTAVAIL; 1207 return -EADDRNOTAVAIL;
1208 dev = rt->dst.dev; 1208 dev = rt->dst.dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d7b2b0987a3b..c8f48efc5fd3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb)
268static inline int ip_rcv_options(struct sk_buff *skb) 268static inline int ip_rcv_options(struct sk_buff *skb)
269{ 269{
270 struct ip_options *opt; 270 struct ip_options *opt;
271 struct iphdr *iph; 271 const struct iphdr *iph;
272 struct net_device *dev = skb->dev; 272 struct net_device *dev = skb->dev;
273 273
274 /* It looks as overkill, because not all 274 /* It looks as overkill, because not all
@@ -374,7 +374,7 @@ drop:
374 */ 374 */
375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
376{ 376{
377 struct iphdr *iph; 377 const struct iphdr *iph;
378 u32 len; 378 u32 len;
379 379
380 /* When the interface is in promisc. mode, drop all the crap 380 /* When the interface is in promisc. mode, drop all the crap
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2391b24e8251..c3118e1cd3bb 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -36,8 +36,8 @@
36 * saddr is address of outgoing interface. 36 * saddr is address of outgoing interface.
37 */ 37 */
38 38
39void ip_options_build(struct sk_buff * skb, struct ip_options * opt, 39void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
40 __be32 daddr, struct rtable *rt, int is_frag) 40 __be32 daddr, struct rtable *rt, int is_frag)
41{ 41{
42 unsigned char *iph = skb_network_header(skb); 42 unsigned char *iph = skb_network_header(skb);
43 43
@@ -50,9 +50,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
50 50
51 if (!is_frag) { 51 if (!is_frag) {
52 if (opt->rr_needaddr) 52 if (opt->rr_needaddr)
53 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); 53 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt);
54 if (opt->ts_needaddr) 54 if (opt->ts_needaddr)
55 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); 55 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
56 if (opt->ts_needtime) { 56 if (opt->ts_needtime) {
57 struct timespec tv; 57 struct timespec tv;
58 __be32 midtime; 58 __be32 midtime;
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
83 * NOTE: dopt cannot point to skb. 83 * NOTE: dopt cannot point to skb.
84 */ 84 */
85 85
86int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 86int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
87{ 87{
88 struct ip_options *sopt; 88 const struct ip_options *sopt;
89 unsigned char *sptr, *dptr; 89 unsigned char *sptr, *dptr;
90 int soffset, doffset; 90 int soffset, doffset;
91 int optlen; 91 int optlen;
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
95 95
96 sopt = &(IPCB(skb)->opt); 96 sopt = &(IPCB(skb)->opt);
97 97
98 if (sopt->optlen == 0) { 98 if (sopt->optlen == 0)
99 dopt->optlen = 0;
100 return 0; 99 return 0;
101 }
102 100
103 sptr = skb_network_header(skb); 101 sptr = skb_network_header(skb);
104 dptr = dopt->__data; 102 dptr = dopt->__data;
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
157 dopt->optlen += optlen; 155 dopt->optlen += optlen;
158 } 156 }
159 if (sopt->srr) { 157 if (sopt->srr) {
160 unsigned char * start = sptr+sopt->srr; 158 unsigned char *start = sptr+sopt->srr;
161 __be32 faddr; 159 __be32 faddr;
162 160
163 optlen = start[1]; 161 optlen = start[1];
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt)
499 } 497 }
500} 498}
501 499
502static struct ip_options *ip_options_get_alloc(const int optlen) 500static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
503{ 501{
504 return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), 502 return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
505 GFP_KERNEL); 503 GFP_KERNEL);
506} 504}
507 505
508static int ip_options_get_finish(struct net *net, struct ip_options **optp, 506static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
509 struct ip_options *opt, int optlen) 507 struct ip_options_rcu *opt, int optlen)
510{ 508{
511 while (optlen & 3) 509 while (optlen & 3)
512 opt->__data[optlen++] = IPOPT_END; 510 opt->opt.__data[optlen++] = IPOPT_END;
513 opt->optlen = optlen; 511 opt->opt.optlen = optlen;
514 if (optlen && ip_options_compile(net, opt, NULL)) { 512 if (optlen && ip_options_compile(net, &opt->opt, NULL)) {
515 kfree(opt); 513 kfree(opt);
516 return -EINVAL; 514 return -EINVAL;
517 } 515 }
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp,
520 return 0; 518 return 0;
521} 519}
522 520
523int ip_options_get_from_user(struct net *net, struct ip_options **optp, 521int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
524 unsigned char __user *data, int optlen) 522 unsigned char __user *data, int optlen)
525{ 523{
526 struct ip_options *opt = ip_options_get_alloc(optlen); 524 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
527 525
528 if (!opt) 526 if (!opt)
529 return -ENOMEM; 527 return -ENOMEM;
530 if (optlen && copy_from_user(opt->__data, data, optlen)) { 528 if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
531 kfree(opt); 529 kfree(opt);
532 return -EFAULT; 530 return -EFAULT;
533 } 531 }
534 return ip_options_get_finish(net, optp, opt, optlen); 532 return ip_options_get_finish(net, optp, opt, optlen);
535} 533}
536 534
537int ip_options_get(struct net *net, struct ip_options **optp, 535int ip_options_get(struct net *net, struct ip_options_rcu **optp,
538 unsigned char *data, int optlen) 536 unsigned char *data, int optlen)
539{ 537{
540 struct ip_options *opt = ip_options_get_alloc(optlen); 538 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
541 539
542 if (!opt) 540 if (!opt)
543 return -ENOMEM; 541 return -ENOMEM;
544 if (optlen) 542 if (optlen)
545 memcpy(opt->__data, data, optlen); 543 memcpy(opt->opt.__data, data, optlen);
546 return ip_options_get_finish(net, optp, opt, optlen); 544 return ip_options_get_finish(net, optp, opt, optlen);
547} 545}
548 546
@@ -555,7 +553,7 @@ void ip_forward_options(struct sk_buff *skb)
555 553
556 if (opt->rr_needaddr) { 554 if (opt->rr_needaddr) {
557 optptr = (unsigned char *)raw + opt->rr; 555 optptr = (unsigned char *)raw + opt->rr;
558 ip_rt_get_source(&optptr[optptr[2]-5], rt); 556 ip_rt_get_source(&optptr[optptr[2]-5], skb, rt);
559 opt->is_changed = 1; 557 opt->is_changed = 1;
560 } 558 }
561 if (opt->srr_is_hit) { 559 if (opt->srr_is_hit) {
@@ -569,19 +567,18 @@ void ip_forward_options(struct sk_buff *skb)
569 ) { 567 ) {
570 if (srrptr + 3 > srrspace) 568 if (srrptr + 3 > srrspace)
571 break; 569 break;
572 if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) 570 if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0)
573 break; 571 break;
574 } 572 }
575 if (srrptr + 3 <= srrspace) { 573 if (srrptr + 3 <= srrspace) {
576 opt->is_changed = 1; 574 opt->is_changed = 1;
577 ip_rt_get_source(&optptr[srrptr-1], rt); 575 ip_rt_get_source(&optptr[srrptr-1], skb, rt);
578 ip_hdr(skb)->daddr = rt->rt_dst;
579 optptr[2] = srrptr+4; 576 optptr[2] = srrptr+4;
580 } else if (net_ratelimit()) 577 } else if (net_ratelimit())
581 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); 578 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
582 if (opt->ts_needaddr) { 579 if (opt->ts_needaddr) {
583 optptr = raw + opt->ts; 580 optptr = raw + opt->ts;
584 ip_rt_get_source(&optptr[optptr[2]-9], rt); 581 ip_rt_get_source(&optptr[optptr[2]-9], skb, rt);
585 opt->is_changed = 1; 582 opt->is_changed = 1;
586 } 583 }
587 } 584 }
@@ -603,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
603 unsigned long orefdst; 600 unsigned long orefdst;
604 int err; 601 int err;
605 602
606 if (!opt->srr || !rt) 603 if (!rt)
607 return 0; 604 return 0;
608 605
609 if (skb->pkt_type != PACKET_HOST) 606 if (skb->pkt_type != PACKET_HOST)
@@ -637,7 +634,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
637 if (rt2->rt_type != RTN_LOCAL) 634 if (rt2->rt_type != RTN_LOCAL)
638 break; 635 break;
639 /* Superfast 8) loopback forward */ 636 /* Superfast 8) loopback forward */
640 memcpy(&iph->daddr, &optptr[srrptr-1], 4); 637 iph->daddr = nexthop;
641 opt->is_changed = 1; 638 opt->is_changed = 1;
642 } 639 }
643 if (srrptr <= srrspace) { 640 if (srrptr <= srrspace) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 459c011b1d4a..98af3697c718 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
140 * 140 *
141 */ 141 */
142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 __be32 saddr, __be32 daddr, struct ip_options *opt) 143 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
144{ 144{
145 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
146 struct rtable *rt = skb_rtable(skb); 146 struct rtable *rt = skb_rtable(skb);
147 struct iphdr *iph; 147 struct iphdr *iph;
148 148
149 /* Build the IP header. */ 149 /* Build the IP header. */
150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
151 skb_reset_network_header(skb); 151 skb_reset_network_header(skb);
152 iph = ip_hdr(skb); 152 iph = ip_hdr(skb);
153 iph->version = 4; 153 iph->version = 4;
@@ -158,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
158 else 158 else
159 iph->frag_off = 0; 159 iph->frag_off = 0;
160 iph->ttl = ip_select_ttl(inet, &rt->dst); 160 iph->ttl = ip_select_ttl(inet, &rt->dst);
161 iph->daddr = rt->rt_dst; 161 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
162 iph->saddr = rt->rt_src; 162 iph->saddr = saddr;
163 iph->protocol = sk->sk_protocol; 163 iph->protocol = sk->sk_protocol;
164 ip_select_ident(iph, &rt->dst, sk); 164 ip_select_ident(iph, &rt->dst, sk);
165 165
166 if (opt && opt->optlen) { 166 if (opt && opt->opt.optlen) {
167 iph->ihl += opt->optlen>>2; 167 iph->ihl += opt->opt.optlen>>2;
168 ip_options_build(skb, opt, daddr, rt, 0); 168 ip_options_build(skb, &opt->opt, daddr, rt, 0);
169 } 169 }
170 170
171 skb->priority = sk->sk_priority; 171 skb->priority = sk->sk_priority;
@@ -312,11 +312,12 @@ int ip_output(struct sk_buff *skb)
312 !(IPCB(skb)->flags & IPSKB_REROUTED)); 312 !(IPCB(skb)->flags & IPSKB_REROUTED));
313} 313}
314 314
315int ip_queue_xmit(struct sk_buff *skb) 315int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
316{ 316{
317 struct sock *sk = skb->sk; 317 struct sock *sk = skb->sk;
318 struct inet_sock *inet = inet_sk(sk); 318 struct inet_sock *inet = inet_sk(sk);
319 struct ip_options *opt = inet->opt; 319 struct ip_options_rcu *inet_opt;
320 struct flowi4 *fl4;
320 struct rtable *rt; 321 struct rtable *rt;
321 struct iphdr *iph; 322 struct iphdr *iph;
322 int res; 323 int res;
@@ -325,6 +326,8 @@ int ip_queue_xmit(struct sk_buff *skb)
325 * f.e. by something like SCTP. 326 * f.e. by something like SCTP.
326 */ 327 */
327 rcu_read_lock(); 328 rcu_read_lock();
329 inet_opt = rcu_dereference(inet->inet_opt);
330 fl4 = &fl->u.ip4;
328 rt = skb_rtable(skb); 331 rt = skb_rtable(skb);
329 if (rt != NULL) 332 if (rt != NULL)
330 goto packet_routed; 333 goto packet_routed;
@@ -336,14 +339,14 @@ int ip_queue_xmit(struct sk_buff *skb)
336 339
337 /* Use correct destination address if we have options. */ 340 /* Use correct destination address if we have options. */
338 daddr = inet->inet_daddr; 341 daddr = inet->inet_daddr;
339 if(opt && opt->srr) 342 if (inet_opt && inet_opt->opt.srr)
340 daddr = opt->faddr; 343 daddr = inet_opt->opt.faddr;
341 344
342 /* If this fails, retransmit mechanism of transport layer will 345 /* If this fails, retransmit mechanism of transport layer will
343 * keep trying until route appears or the connection times 346 * keep trying until route appears or the connection times
344 * itself out. 347 * itself out.
345 */ 348 */
346 rt = ip_route_output_ports(sock_net(sk), sk, 349 rt = ip_route_output_ports(sock_net(sk), fl4, sk,
347 daddr, inet->inet_saddr, 350 daddr, inet->inet_saddr,
348 inet->inet_dport, 351 inet->inet_dport,
349 inet->inet_sport, 352 inet->inet_sport,
@@ -357,11 +360,11 @@ int ip_queue_xmit(struct sk_buff *skb)
357 skb_dst_set_noref(skb, &rt->dst); 360 skb_dst_set_noref(skb, &rt->dst);
358 361
359packet_routed: 362packet_routed:
360 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 363 if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
361 goto no_route; 364 goto no_route;
362 365
363 /* OK, we know where to send it, allocate and build IP header. */ 366 /* OK, we know where to send it, allocate and build IP header. */
364 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 367 skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
365 skb_reset_network_header(skb); 368 skb_reset_network_header(skb);
366 iph = ip_hdr(skb); 369 iph = ip_hdr(skb);
367 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 370 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
@@ -371,13 +374,13 @@ packet_routed:
371 iph->frag_off = 0; 374 iph->frag_off = 0;
372 iph->ttl = ip_select_ttl(inet, &rt->dst); 375 iph->ttl = ip_select_ttl(inet, &rt->dst);
373 iph->protocol = sk->sk_protocol; 376 iph->protocol = sk->sk_protocol;
374 iph->saddr = rt->rt_src; 377 iph->saddr = fl4->saddr;
375 iph->daddr = rt->rt_dst; 378 iph->daddr = fl4->daddr;
376 /* Transport layer set skb->h.foo itself. */ 379 /* Transport layer set skb->h.foo itself. */
377 380
378 if (opt && opt->optlen) { 381 if (inet_opt && inet_opt->opt.optlen) {
379 iph->ihl += opt->optlen >> 2; 382 iph->ihl += inet_opt->opt.optlen >> 2;
380 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 383 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
381 } 384 }
382 385
383 ip_select_ident_more(iph, &rt->dst, sk, 386 ip_select_ident_more(iph, &rt->dst, sk,
@@ -773,7 +776,9 @@ static inline int ip_ufo_append_data(struct sock *sk,
773 (length - transhdrlen)); 776 (length - transhdrlen));
774} 777}
775 778
776static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, 779static int __ip_append_data(struct sock *sk,
780 struct flowi4 *fl4,
781 struct sk_buff_head *queue,
777 struct inet_cork *cork, 782 struct inet_cork *cork,
778 int getfrag(void *from, char *to, int offset, 783 int getfrag(void *from, char *to, int offset,
779 int len, int odd, struct sk_buff *skb), 784 int len, int odd, struct sk_buff *skb),
@@ -805,7 +810,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue,
805 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 810 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
806 811
807 if (cork->length + length > 0xFFFF - fragheaderlen) { 812 if (cork->length + length > 0xFFFF - fragheaderlen) {
808 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 813 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
809 mtu-exthdrlen); 814 mtu-exthdrlen);
810 return -EMSGSIZE; 815 return -EMSGSIZE;
811 } 816 }
@@ -1033,7 +1038,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1033 struct ipcm_cookie *ipc, struct rtable **rtp) 1038 struct ipcm_cookie *ipc, struct rtable **rtp)
1034{ 1039{
1035 struct inet_sock *inet = inet_sk(sk); 1040 struct inet_sock *inet = inet_sk(sk);
1036 struct ip_options *opt; 1041 struct ip_options_rcu *opt;
1037 struct rtable *rt; 1042 struct rtable *rt;
1038 1043
1039 /* 1044 /*
@@ -1047,7 +1052,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1047 if (unlikely(cork->opt == NULL)) 1052 if (unlikely(cork->opt == NULL))
1048 return -ENOBUFS; 1053 return -ENOBUFS;
1049 } 1054 }
1050 memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); 1055 memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
1051 cork->flags |= IPCORK_OPT; 1056 cork->flags |= IPCORK_OPT;
1052 cork->addr = ipc->addr; 1057 cork->addr = ipc->addr;
1053 } 1058 }
@@ -1080,7 +1085,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1080 * 1085 *
1081 * LATER: length must be adjusted by pad at tail, when it is required. 1086 * LATER: length must be adjusted by pad at tail, when it is required.
1082 */ 1087 */
1083int ip_append_data(struct sock *sk, 1088int ip_append_data(struct sock *sk, struct flowi4 *fl4,
1084 int getfrag(void *from, char *to, int offset, int len, 1089 int getfrag(void *from, char *to, int offset, int len,
1085 int odd, struct sk_buff *skb), 1090 int odd, struct sk_buff *skb),
1086 void *from, int length, int transhdrlen, 1091 void *from, int length, int transhdrlen,
@@ -1094,24 +1099,25 @@ int ip_append_data(struct sock *sk,
1094 return 0; 1099 return 0;
1095 1100
1096 if (skb_queue_empty(&sk->sk_write_queue)) { 1101 if (skb_queue_empty(&sk->sk_write_queue)) {
1097 err = ip_setup_cork(sk, &inet->cork, ipc, rtp); 1102 err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp);
1098 if (err) 1103 if (err)
1099 return err; 1104 return err;
1100 } else { 1105 } else {
1101 transhdrlen = 0; 1106 transhdrlen = 0;
1102 } 1107 }
1103 1108
1104 return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, 1109 return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
1105 from, length, transhdrlen, flags); 1110 from, length, transhdrlen, flags);
1106} 1111}
1107 1112
1108ssize_t ip_append_page(struct sock *sk, struct page *page, 1113ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1109 int offset, size_t size, int flags) 1114 int offset, size_t size, int flags)
1110{ 1115{
1111 struct inet_sock *inet = inet_sk(sk); 1116 struct inet_sock *inet = inet_sk(sk);
1112 struct sk_buff *skb; 1117 struct sk_buff *skb;
1113 struct rtable *rt; 1118 struct rtable *rt;
1114 struct ip_options *opt = NULL; 1119 struct ip_options *opt = NULL;
1120 struct inet_cork *cork;
1115 int hh_len; 1121 int hh_len;
1116 int mtu; 1122 int mtu;
1117 int len; 1123 int len;
@@ -1127,28 +1133,29 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1127 if (skb_queue_empty(&sk->sk_write_queue)) 1133 if (skb_queue_empty(&sk->sk_write_queue))
1128 return -EINVAL; 1134 return -EINVAL;
1129 1135
1130 rt = (struct rtable *)inet->cork.dst; 1136 cork = &inet->cork.base;
1131 if (inet->cork.flags & IPCORK_OPT) 1137 rt = (struct rtable *)cork->dst;
1132 opt = inet->cork.opt; 1138 if (cork->flags & IPCORK_OPT)
1139 opt = cork->opt;
1133 1140
1134 if (!(rt->dst.dev->features&NETIF_F_SG)) 1141 if (!(rt->dst.dev->features&NETIF_F_SG))
1135 return -EOPNOTSUPP; 1142 return -EOPNOTSUPP;
1136 1143
1137 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1144 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1138 mtu = inet->cork.fragsize; 1145 mtu = cork->fragsize;
1139 1146
1140 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1147 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1141 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1148 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1142 1149
1143 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1150 if (cork->length + size > 0xFFFF - fragheaderlen) {
1144 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); 1151 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu);
1145 return -EMSGSIZE; 1152 return -EMSGSIZE;
1146 } 1153 }
1147 1154
1148 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1155 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1149 return -EINVAL; 1156 return -EINVAL;
1150 1157
1151 inet->cork.length += size; 1158 cork->length += size;
1152 if ((size + skb->len > mtu) && 1159 if ((size + skb->len > mtu) &&
1153 (sk->sk_protocol == IPPROTO_UDP) && 1160 (sk->sk_protocol == IPPROTO_UDP) &&
1154 (rt->dst.dev->features & NETIF_F_UFO)) { 1161 (rt->dst.dev->features & NETIF_F_UFO)) {
@@ -1243,7 +1250,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1243 return 0; 1250 return 0;
1244 1251
1245error: 1252error:
1246 inet->cork.length -= size; 1253 cork->length -= size;
1247 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1254 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1248 return err; 1255 return err;
1249} 1256}
@@ -1262,6 +1269,7 @@ static void ip_cork_release(struct inet_cork *cork)
1262 * and push them out. 1269 * and push them out.
1263 */ 1270 */
1264struct sk_buff *__ip_make_skb(struct sock *sk, 1271struct sk_buff *__ip_make_skb(struct sock *sk,
1272 struct flowi4 *fl4,
1265 struct sk_buff_head *queue, 1273 struct sk_buff_head *queue,
1266 struct inet_cork *cork) 1274 struct inet_cork *cork)
1267{ 1275{
@@ -1319,17 +1327,18 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1319 iph = (struct iphdr *)skb->data; 1327 iph = (struct iphdr *)skb->data;
1320 iph->version = 4; 1328 iph->version = 4;
1321 iph->ihl = 5; 1329 iph->ihl = 5;
1322 if (opt) {
1323 iph->ihl += opt->optlen>>2;
1324 ip_options_build(skb, opt, cork->addr, rt, 0);
1325 }
1326 iph->tos = inet->tos; 1330 iph->tos = inet->tos;
1327 iph->frag_off = df; 1331 iph->frag_off = df;
1328 ip_select_ident(iph, &rt->dst, sk); 1332 ip_select_ident(iph, &rt->dst, sk);
1329 iph->ttl = ttl; 1333 iph->ttl = ttl;
1330 iph->protocol = sk->sk_protocol; 1334 iph->protocol = sk->sk_protocol;
1331 iph->saddr = rt->rt_src; 1335 iph->saddr = fl4->saddr;
1332 iph->daddr = rt->rt_dst; 1336 iph->daddr = fl4->daddr;
1337
1338 if (opt) {
1339 iph->ihl += opt->optlen>>2;
1340 ip_options_build(skb, opt, cork->addr, rt, 0);
1341 }
1333 1342
1334 skb->priority = sk->sk_priority; 1343 skb->priority = sk->sk_priority;
1335 skb->mark = sk->sk_mark; 1344 skb->mark = sk->sk_mark;
@@ -1365,11 +1374,11 @@ int ip_send_skb(struct sk_buff *skb)
1365 return err; 1374 return err;
1366} 1375}
1367 1376
1368int ip_push_pending_frames(struct sock *sk) 1377int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
1369{ 1378{
1370 struct sk_buff *skb; 1379 struct sk_buff *skb;
1371 1380
1372 skb = ip_finish_skb(sk); 1381 skb = ip_finish_skb(sk, fl4);
1373 if (!skb) 1382 if (!skb)
1374 return 0; 1383 return 0;
1375 1384
@@ -1394,17 +1403,18 @@ static void __ip_flush_pending_frames(struct sock *sk,
1394 1403
1395void ip_flush_pending_frames(struct sock *sk) 1404void ip_flush_pending_frames(struct sock *sk)
1396{ 1405{
1397 __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); 1406 __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
1398} 1407}
1399 1408
1400struct sk_buff *ip_make_skb(struct sock *sk, 1409struct sk_buff *ip_make_skb(struct sock *sk,
1410 struct flowi4 *fl4,
1401 int getfrag(void *from, char *to, int offset, 1411 int getfrag(void *from, char *to, int offset,
1402 int len, int odd, struct sk_buff *skb), 1412 int len, int odd, struct sk_buff *skb),
1403 void *from, int length, int transhdrlen, 1413 void *from, int length, int transhdrlen,
1404 struct ipcm_cookie *ipc, struct rtable **rtp, 1414 struct ipcm_cookie *ipc, struct rtable **rtp,
1405 unsigned int flags) 1415 unsigned int flags)
1406{ 1416{
1407 struct inet_cork cork = {}; 1417 struct inet_cork cork;
1408 struct sk_buff_head queue; 1418 struct sk_buff_head queue;
1409 int err; 1419 int err;
1410 1420
@@ -1413,18 +1423,21 @@ struct sk_buff *ip_make_skb(struct sock *sk,
1413 1423
1414 __skb_queue_head_init(&queue); 1424 __skb_queue_head_init(&queue);
1415 1425
1426 cork.flags = 0;
1427 cork.addr = 0;
1428 cork.opt = NULL;
1416 err = ip_setup_cork(sk, &cork, ipc, rtp); 1429 err = ip_setup_cork(sk, &cork, ipc, rtp);
1417 if (err) 1430 if (err)
1418 return ERR_PTR(err); 1431 return ERR_PTR(err);
1419 1432
1420 err = __ip_append_data(sk, &queue, &cork, getfrag, 1433 err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
1421 from, length, transhdrlen, flags); 1434 from, length, transhdrlen, flags);
1422 if (err) { 1435 if (err) {
1423 __ip_flush_pending_frames(sk, &queue, &cork); 1436 __ip_flush_pending_frames(sk, &queue, &cork);
1424 return ERR_PTR(err); 1437 return ERR_PTR(err);
1425 } 1438 }
1426 1439
1427 return __ip_make_skb(sk, &queue, &cork); 1440 return __ip_make_skb(sk, fl4, &queue, &cork);
1428} 1441}
1429 1442
1430/* 1443/*
@@ -1447,48 +1460,39 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1447 * Should run single threaded per socket because it uses the sock 1460 * Should run single threaded per socket because it uses the sock
1448 * structure to pass arguments. 1461 * structure to pass arguments.
1449 */ 1462 */
1450void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1463void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1451 unsigned int len) 1464 struct ip_reply_arg *arg, unsigned int len)
1452{ 1465{
1453 struct inet_sock *inet = inet_sk(sk); 1466 struct inet_sock *inet = inet_sk(sk);
1454 struct { 1467 struct ip_options_data replyopts;
1455 struct ip_options opt;
1456 char data[40];
1457 } replyopts;
1458 struct ipcm_cookie ipc; 1468 struct ipcm_cookie ipc;
1459 __be32 daddr; 1469 struct flowi4 fl4;
1460 struct rtable *rt = skb_rtable(skb); 1470 struct rtable *rt = skb_rtable(skb);
1461 1471
1462 if (ip_options_echo(&replyopts.opt, skb)) 1472 if (ip_options_echo(&replyopts.opt.opt, skb))
1463 return; 1473 return;
1464 1474
1465 daddr = ipc.addr = rt->rt_src; 1475 ipc.addr = daddr;
1466 ipc.opt = NULL; 1476 ipc.opt = NULL;
1467 ipc.tx_flags = 0; 1477 ipc.tx_flags = 0;
1468 1478
1469 if (replyopts.opt.optlen) { 1479 if (replyopts.opt.opt.optlen) {
1470 ipc.opt = &replyopts.opt; 1480 ipc.opt = &replyopts.opt;
1471 1481
1472 if (ipc.opt->srr) 1482 if (replyopts.opt.opt.srr)
1473 daddr = replyopts.opt.faddr; 1483 daddr = replyopts.opt.opt.faddr;
1474 } 1484 }
1475 1485
1476 { 1486 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1477 struct flowi4 fl4 = { 1487 RT_TOS(ip_hdr(skb)->tos),
1478 .flowi4_oif = arg->bound_dev_if, 1488 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1479 .daddr = daddr, 1489 ip_reply_arg_flowi_flags(arg),
1480 .saddr = rt->rt_spec_dst, 1490 daddr, rt->rt_spec_dst,
1481 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), 1491 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1482 .fl4_sport = tcp_hdr(skb)->dest, 1492 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1483 .fl4_dport = tcp_hdr(skb)->source, 1493 rt = ip_route_output_key(sock_net(sk), &fl4);
1484 .flowi4_proto = sk->sk_protocol, 1494 if (IS_ERR(rt))
1485 .flowi4_flags = ip_reply_arg_flowi_flags(arg), 1495 return;
1486 };
1487 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1488 rt = ip_route_output_key(sock_net(sk), &fl4);
1489 if (IS_ERR(rt))
1490 return;
1491 }
1492 1496
1493 /* And let IP do all the hard work. 1497 /* And let IP do all the hard work.
1494 1498
@@ -1501,7 +1505,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1501 sk->sk_priority = skb->priority; 1505 sk->sk_priority = skb->priority;
1502 sk->sk_protocol = ip_hdr(skb)->protocol; 1506 sk->sk_protocol = ip_hdr(skb)->protocol;
1503 sk->sk_bound_dev_if = arg->bound_dev_if; 1507 sk->sk_bound_dev_if = arg->bound_dev_if;
1504 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1508 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1505 &ipc, &rt, MSG_DONTWAIT); 1509 &ipc, &rt, MSG_DONTWAIT);
1506 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1510 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1507 if (arg->csumoffset >= 0) 1511 if (arg->csumoffset >= 0)
@@ -1509,7 +1513,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1509 arg->csumoffset) = csum_fold(csum_add(skb->csum, 1513 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1510 arg->csum)); 1514 arg->csum));
1511 skb->ip_summed = CHECKSUM_NONE; 1515 skb->ip_summed = CHECKSUM_NONE;
1512 ip_push_pending_frames(sk); 1516 ip_push_pending_frames(sk, &fl4);
1513 } 1517 }
1514 1518
1515 bh_unlock_sock(sk); 1519 bh_unlock_sock(sk);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3948c86e59ca..ab0c9efd1efa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -131,7 +131,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
131static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 131static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
132{ 132{
133 struct sockaddr_in sin; 133 struct sockaddr_in sin;
134 struct iphdr *iph = ip_hdr(skb); 134 const struct iphdr *iph = ip_hdr(skb);
135 __be16 *ports = (__be16 *)skb_transport_header(skb); 135 __be16 *ports = (__be16 *)skb_transport_header(skb);
136 136
137 if (skb_transport_offset(skb) + 4 > skb->len) 137 if (skb_transport_offset(skb) + 4 > skb->len)
@@ -451,6 +451,11 @@ out:
451} 451}
452 452
453 453
454static void opt_kfree_rcu(struct rcu_head *head)
455{
456 kfree(container_of(head, struct ip_options_rcu, rcu));
457}
458
454/* 459/*
455 * Socket option code for IP. This is the end of the line after any 460 * Socket option code for IP. This is the end of the line after any
456 * TCP,UDP etc options on an IP socket. 461 * TCP,UDP etc options on an IP socket.
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
497 switch (optname) { 502 switch (optname) {
498 case IP_OPTIONS: 503 case IP_OPTIONS:
499 { 504 {
500 struct ip_options *opt = NULL; 505 struct ip_options_rcu *old, *opt = NULL;
506
501 if (optlen > 40) 507 if (optlen > 40)
502 goto e_inval; 508 goto e_inval;
503 err = ip_options_get_from_user(sock_net(sk), &opt, 509 err = ip_options_get_from_user(sock_net(sk), &opt,
504 optval, optlen); 510 optval, optlen);
505 if (err) 511 if (err)
506 break; 512 break;
513 old = rcu_dereference_protected(inet->inet_opt,
514 sock_owned_by_user(sk));
507 if (inet->is_icsk) { 515 if (inet->is_icsk) {
508 struct inet_connection_sock *icsk = inet_csk(sk); 516 struct inet_connection_sock *icsk = inet_csk(sk);
509#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 517#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
512 (TCPF_LISTEN | TCPF_CLOSE)) && 520 (TCPF_LISTEN | TCPF_CLOSE)) &&
513 inet->inet_daddr != LOOPBACK4_IPV6)) { 521 inet->inet_daddr != LOOPBACK4_IPV6)) {
514#endif 522#endif
515 if (inet->opt) 523 if (old)
516 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 524 icsk->icsk_ext_hdr_len -= old->opt.optlen;
517 if (opt) 525 if (opt)
518 icsk->icsk_ext_hdr_len += opt->optlen; 526 icsk->icsk_ext_hdr_len += opt->opt.optlen;
519 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 527 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
520#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 528#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
521 } 529 }
522#endif 530#endif
523 } 531 }
524 opt = xchg(&inet->opt, opt); 532 rcu_assign_pointer(inet->inet_opt, opt);
525 kfree(opt); 533 if (old)
534 call_rcu(&old->rcu, opt_kfree_rcu);
526 break; 535 break;
527 } 536 }
528 case IP_PKTINFO: 537 case IP_PKTINFO:
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1081 case IP_OPTIONS: 1090 case IP_OPTIONS:
1082 { 1091 {
1083 unsigned char optbuf[sizeof(struct ip_options)+40]; 1092 unsigned char optbuf[sizeof(struct ip_options)+40];
1084 struct ip_options * opt = (struct ip_options *)optbuf; 1093 struct ip_options *opt = (struct ip_options *)optbuf;
1094 struct ip_options_rcu *inet_opt;
1095
1096 inet_opt = rcu_dereference_protected(inet->inet_opt,
1097 sock_owned_by_user(sk));
1085 opt->optlen = 0; 1098 opt->optlen = 0;
1086 if (inet->opt) 1099 if (inet_opt)
1087 memcpy(optbuf, inet->opt, 1100 memcpy(optbuf, &inet_opt->opt,
1088 sizeof(struct ip_options)+ 1101 sizeof(struct ip_options) +
1089 inet->opt->optlen); 1102 inet_opt->opt.optlen);
1090 release_sock(sk); 1103 release_sock(sk);
1091 1104
1092 if (opt->optlen == 0) 1105 if (opt->optlen == 0)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 629067571f02..c857f6f49b03 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -27,7 +27,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
27{ 27{
28 struct net *net = dev_net(skb->dev); 28 struct net *net = dev_net(skb->dev);
29 __be32 spi; 29 __be32 spi;
30 struct iphdr *iph = (struct iphdr *)skb->data; 30 const struct iphdr *iph = (const struct iphdr *)skb->data;
31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); 31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
32 struct xfrm_state *x; 32 struct xfrm_state *x;
33 33
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
36 return; 36 return;
37 37
38 spi = htonl(ntohs(ipch->cpi)); 38 spi = htonl(ntohs(ipch->cpi));
39 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, 39 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
40 spi, IPPROTO_COMP, AF_INET); 40 spi, IPPROTO_COMP, AF_INET);
41 if (!x) 41 if (!x)
42 return; 42 return;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cbff2ecccf3d..ab7e5542c1cf 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -87,8 +87,8 @@
87#endif 87#endif
88 88
89/* Define the friendly delay before and after opening net devices */ 89/* Define the friendly delay before and after opening net devices */
90#define CONF_PRE_OPEN 500 /* Before opening: 1/2 second */ 90#define CONF_POST_OPEN 10 /* After opening: 10 msecs */
91#define CONF_POST_OPEN 1 /* After opening: 1 second */ 91#define CONF_CARRIER_TIMEOUT 120000 /* Wait for carrier timeout */
92 92
93/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ 93/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
94#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ 94#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */
@@ -188,14 +188,14 @@ struct ic_device {
188static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ 188static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
189static struct net_device *ic_dev __initdata = NULL; /* Selected device */ 189static struct net_device *ic_dev __initdata = NULL; /* Selected device */
190 190
191static bool __init ic_device_match(struct net_device *dev) 191static bool __init ic_is_init_dev(struct net_device *dev)
192{ 192{
193 if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : 193 if (dev->flags & IFF_LOOPBACK)
194 return false;
195 return user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
194 (!(dev->flags & IFF_LOOPBACK) && 196 (!(dev->flags & IFF_LOOPBACK) &&
195 (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && 197 (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
196 strncmp(dev->name, "dummy", 5))) 198 strncmp(dev->name, "dummy", 5));
197 return true;
198 return false;
199} 199}
200 200
201static int __init ic_open_devs(void) 201static int __init ic_open_devs(void)
@@ -203,6 +203,7 @@ static int __init ic_open_devs(void)
203 struct ic_device *d, **last; 203 struct ic_device *d, **last;
204 struct net_device *dev; 204 struct net_device *dev;
205 unsigned short oflags; 205 unsigned short oflags;
206 unsigned long start;
206 207
207 last = &ic_first_dev; 208 last = &ic_first_dev;
208 rtnl_lock(); 209 rtnl_lock();
@@ -216,9 +217,7 @@ static int __init ic_open_devs(void)
216 } 217 }
217 218
218 for_each_netdev(&init_net, dev) { 219 for_each_netdev(&init_net, dev) {
219 if (dev->flags & IFF_LOOPBACK) 220 if (ic_is_init_dev(dev)) {
220 continue;
221 if (ic_device_match(dev)) {
222 int able = 0; 221 int able = 0;
223 if (dev->mtu >= 364) 222 if (dev->mtu >= 364)
224 able |= IC_BOOTP; 223 able |= IC_BOOTP;
@@ -252,6 +251,17 @@ static int __init ic_open_devs(void)
252 dev->name, able, d->xid)); 251 dev->name, able, d->xid));
253 } 252 }
254 } 253 }
254
255 /* wait for a carrier on at least one device */
256 start = jiffies;
257 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
258 for_each_netdev(&init_net, dev)
259 if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
260 goto have_carrier;
261
262 msleep(1);
263 }
264have_carrier:
255 rtnl_unlock(); 265 rtnl_unlock();
256 266
257 *last = NULL; 267 *last = NULL;
@@ -1324,14 +1334,13 @@ static int __init wait_for_devices(void)
1324{ 1334{
1325 int i; 1335 int i;
1326 1336
1327 msleep(CONF_PRE_OPEN);
1328 for (i = 0; i < DEVICE_WAIT_MAX; i++) { 1337 for (i = 0; i < DEVICE_WAIT_MAX; i++) {
1329 struct net_device *dev; 1338 struct net_device *dev;
1330 int found = 0; 1339 int found = 0;
1331 1340
1332 rtnl_lock(); 1341 rtnl_lock();
1333 for_each_netdev(&init_net, dev) { 1342 for_each_netdev(&init_net, dev) {
1334 if (ic_device_match(dev)) { 1343 if (ic_is_init_dev(dev)) {
1335 found = 1; 1344 found = 1;
1336 break; 1345 break;
1337 } 1346 }
@@ -1378,7 +1387,7 @@ static int __init ip_auto_config(void)
1378 return err; 1387 return err;
1379 1388
1380 /* Give drivers a chance to settle */ 1389 /* Give drivers a chance to settle */
1381 ssleep(CONF_POST_OPEN); 1390 msleep(CONF_POST_OPEN);
1382 1391
1383 /* 1392 /*
1384 * If the config information is insufficient (e.g., our IP address or 1393 * If the config information is insufficient (e.g., our IP address or
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfc17c5914e7..378b20b7ca6e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -276,11 +276,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
276 276
277 dev_net_set(dev, net); 277 dev_net_set(dev, net);
278 278
279 if (strchr(name, '%')) {
280 if (dev_alloc_name(dev, name) < 0)
281 goto failed_free;
282 }
283
284 nt = netdev_priv(dev); 279 nt = netdev_priv(dev);
285 nt->parms = *parms; 280 nt->parms = *parms;
286 281
@@ -319,7 +314,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
319 8 bytes of packet payload. It means, that precise relaying of 314 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible. 315 ICMP in the real Internet is absolutely infeasible.
321 */ 316 */
322 struct iphdr *iph = (struct iphdr *)skb->data; 317 const struct iphdr *iph = (const struct iphdr *)skb->data;
323 const int type = icmp_hdr(skb)->type; 318 const int type = icmp_hdr(skb)->type;
324 const int code = icmp_hdr(skb)->code; 319 const int code = icmp_hdr(skb)->code;
325 struct ip_tunnel *t; 320 struct ip_tunnel *t;
@@ -433,15 +428,16 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
433{ 428{
434 struct ip_tunnel *tunnel = netdev_priv(dev); 429 struct ip_tunnel *tunnel = netdev_priv(dev);
435 struct pcpu_tstats *tstats; 430 struct pcpu_tstats *tstats;
436 struct iphdr *tiph = &tunnel->parms.iph; 431 const struct iphdr *tiph = &tunnel->parms.iph;
437 u8 tos = tunnel->parms.iph.tos; 432 u8 tos = tunnel->parms.iph.tos;
438 __be16 df = tiph->frag_off; 433 __be16 df = tiph->frag_off;
439 struct rtable *rt; /* Route to the other host */ 434 struct rtable *rt; /* Route to the other host */
440 struct net_device *tdev; /* Device to other host */ 435 struct net_device *tdev; /* Device to other host */
441 struct iphdr *old_iph = ip_hdr(skb); 436 const struct iphdr *old_iph = ip_hdr(skb);
442 struct iphdr *iph; /* Our new IP header */ 437 struct iphdr *iph; /* Our new IP header */
443 unsigned int max_headroom; /* The extra header space needed */ 438 unsigned int max_headroom; /* The extra header space needed */
444 __be32 dst = tiph->daddr; 439 __be32 dst = tiph->daddr;
440 struct flowi4 fl4;
445 int mtu; 441 int mtu;
446 442
447 if (skb->protocol != htons(ETH_P_IP)) 443 if (skb->protocol != htons(ETH_P_IP))
@@ -460,7 +456,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
460 goto tx_error_icmp; 456 goto tx_error_icmp;
461 } 457 }
462 458
463 rt = ip_route_output_ports(dev_net(dev), NULL, 459 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
464 dst, tiph->saddr, 460 dst, tiph->saddr,
465 0, 0, 461 0, 0,
466 IPPROTO_IPIP, RT_TOS(tos), 462 IPPROTO_IPIP, RT_TOS(tos),
@@ -549,8 +545,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
549 iph->frag_off = df; 545 iph->frag_off = df;
550 iph->protocol = IPPROTO_IPIP; 546 iph->protocol = IPPROTO_IPIP;
551 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); 547 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
552 iph->daddr = rt->rt_dst; 548 iph->daddr = fl4.daddr;
553 iph->saddr = rt->rt_src; 549 iph->saddr = fl4.saddr;
554 550
555 if ((iph->ttl = tiph->ttl) == 0) 551 if ((iph->ttl = tiph->ttl) == 0)
556 iph->ttl = old_iph->ttl; 552 iph->ttl = old_iph->ttl;
@@ -572,19 +568,21 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
572{ 568{
573 struct net_device *tdev = NULL; 569 struct net_device *tdev = NULL;
574 struct ip_tunnel *tunnel; 570 struct ip_tunnel *tunnel;
575 struct iphdr *iph; 571 const struct iphdr *iph;
576 572
577 tunnel = netdev_priv(dev); 573 tunnel = netdev_priv(dev);
578 iph = &tunnel->parms.iph; 574 iph = &tunnel->parms.iph;
579 575
580 if (iph->daddr) { 576 if (iph->daddr) {
581 struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, 577 struct rtable *rt;
582 iph->daddr, iph->saddr, 578 struct flowi4 fl4;
583 0, 0, 579
584 IPPROTO_IPIP, 580 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
585 RT_TOS(iph->tos), 581 iph->daddr, iph->saddr,
586 tunnel->parms.link); 582 0, 0,
587 583 IPPROTO_IPIP,
584 RT_TOS(iph->tos),
585 tunnel->parms.link);
588 if (!IS_ERR(rt)) { 586 if (!IS_ERR(rt)) {
589 tdev = rt->dst.dev; 587 tdev = rt->dst.dev;
590 ip_rt_put(rt); 588 ip_rt_put(rt);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1f62eaeb6de4..30a7763c400e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1549,7 +1549,7 @@ static struct notifier_block ip_mr_notifier = {
1549static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1549static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1550{ 1550{
1551 struct iphdr *iph; 1551 struct iphdr *iph;
1552 struct iphdr *old_iph = ip_hdr(skb); 1552 const struct iphdr *old_iph = ip_hdr(skb);
1553 1553
1554 skb_push(skb, sizeof(struct iphdr)); 1554 skb_push(skb, sizeof(struct iphdr));
1555 skb->transport_header = skb->network_header; 1555 skb->transport_header = skb->network_header;
@@ -1595,6 +1595,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1595 struct vif_device *vif = &mrt->vif_table[vifi]; 1595 struct vif_device *vif = &mrt->vif_table[vifi];
1596 struct net_device *dev; 1596 struct net_device *dev;
1597 struct rtable *rt; 1597 struct rtable *rt;
1598 struct flowi4 fl4;
1598 int encap = 0; 1599 int encap = 0;
1599 1600
1600 if (vif->dev == NULL) 1601 if (vif->dev == NULL)
@@ -1612,7 +1613,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1612#endif 1613#endif
1613 1614
1614 if (vif->flags & VIFF_TUNNEL) { 1615 if (vif->flags & VIFF_TUNNEL) {
1615 rt = ip_route_output_ports(net, NULL, 1616 rt = ip_route_output_ports(net, &fl4, NULL,
1616 vif->remote, vif->local, 1617 vif->remote, vif->local,
1617 0, 0, 1618 0, 0,
1618 IPPROTO_IPIP, 1619 IPPROTO_IPIP,
@@ -1621,7 +1622,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1621 goto out_free; 1622 goto out_free;
1622 encap = sizeof(struct iphdr); 1623 encap = sizeof(struct iphdr);
1623 } else { 1624 } else {
1624 rt = ip_route_output_ports(net, NULL, iph->daddr, 0, 1625 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1625 0, 0, 1626 0, 0,
1626 IPPROTO_IPIP, 1627 IPPROTO_IPIP,
1627 RT_TOS(iph->tos), vif->link); 1628 RT_TOS(iph->tos), vif->link);
@@ -1788,12 +1789,14 @@ dont_forward:
1788 return 0; 1789 return 0;
1789} 1790}
1790 1791
1791static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) 1792static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
1792{ 1793{
1794 struct rtable *rt = skb_rtable(skb);
1795 struct iphdr *iph = ip_hdr(skb);
1793 struct flowi4 fl4 = { 1796 struct flowi4 fl4 = {
1794 .daddr = rt->rt_key_dst, 1797 .daddr = iph->daddr,
1795 .saddr = rt->rt_key_src, 1798 .saddr = iph->saddr,
1796 .flowi4_tos = rt->rt_tos, 1799 .flowi4_tos = iph->tos,
1797 .flowi4_oif = rt->rt_oif, 1800 .flowi4_oif = rt->rt_oif,
1798 .flowi4_iif = rt->rt_iif, 1801 .flowi4_iif = rt->rt_iif,
1799 .flowi4_mark = rt->rt_mark, 1802 .flowi4_mark = rt->rt_mark,
@@ -1825,7 +1828,7 @@ int ip_mr_input(struct sk_buff *skb)
1825 if (IPCB(skb)->flags & IPSKB_FORWARDED) 1828 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1826 goto dont_forward; 1829 goto dont_forward;
1827 1830
1828 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); 1831 mrt = ipmr_rt_fib_lookup(net, skb);
1829 if (IS_ERR(mrt)) { 1832 if (IS_ERR(mrt)) {
1830 kfree_skb(skb); 1833 kfree_skb(skb);
1831 return PTR_ERR(mrt); 1834 return PTR_ERR(mrt);
@@ -1957,7 +1960,7 @@ int pim_rcv_v1(struct sk_buff *skb)
1957 1960
1958 pim = igmp_hdr(skb); 1961 pim = igmp_hdr(skb);
1959 1962
1960 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); 1963 mrt = ipmr_rt_fib_lookup(net, skb);
1961 if (IS_ERR(mrt)) 1964 if (IS_ERR(mrt))
1962 goto drop; 1965 goto drop;
1963 if (!mrt->mroute_do_pim || 1966 if (!mrt->mroute_do_pim ||
@@ -1989,7 +1992,7 @@ static int pim_rcv(struct sk_buff *skb)
1989 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1992 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1990 goto drop; 1993 goto drop;
1991 1994
1992 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); 1995 mrt = ipmr_rt_fib_lookup(net, skb);
1993 if (IS_ERR(mrt)) 1996 if (IS_ERR(mrt))
1994 goto drop; 1997 goto drop;
1995 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1998 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
@@ -2038,20 +2041,20 @@ rtattr_failure:
2038 return -EMSGSIZE; 2041 return -EMSGSIZE;
2039} 2042}
2040 2043
2041int ipmr_get_route(struct net *net, 2044int ipmr_get_route(struct net *net, struct sk_buff *skb,
2042 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 2045 __be32 saddr, __be32 daddr,
2046 struct rtmsg *rtm, int nowait)
2043{ 2047{
2044 int err;
2045 struct mr_table *mrt;
2046 struct mfc_cache *cache; 2048 struct mfc_cache *cache;
2047 struct rtable *rt = skb_rtable(skb); 2049 struct mr_table *mrt;
2050 int err;
2048 2051
2049 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2052 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2050 if (mrt == NULL) 2053 if (mrt == NULL)
2051 return -ENOENT; 2054 return -ENOENT;
2052 2055
2053 rcu_read_lock(); 2056 rcu_read_lock();
2054 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 2057 cache = ipmr_cache_find(mrt, saddr, daddr);
2055 2058
2056 if (cache == NULL) { 2059 if (cache == NULL) {
2057 struct sk_buff *skb2; 2060 struct sk_buff *skb2;
@@ -2084,8 +2087,8 @@ int ipmr_get_route(struct net *net,
2084 skb_reset_network_header(skb2); 2087 skb_reset_network_header(skb2);
2085 iph = ip_hdr(skb2); 2088 iph = ip_hdr(skb2);
2086 iph->ihl = sizeof(struct iphdr) >> 2; 2089 iph->ihl = sizeof(struct iphdr) >> 2;
2087 iph->saddr = rt->rt_src; 2090 iph->saddr = saddr;
2088 iph->daddr = rt->rt_dst; 2091 iph->daddr = daddr;
2089 iph->version = 0; 2092 iph->version = 0;
2090 err = ipmr_cache_unresolved(mrt, vif, skb2); 2093 err = ipmr_cache_unresolved(mrt, vif, skb2);
2091 read_unlock(&mrt_lock); 2094 read_unlock(&mrt_lock);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 89bc7e66d598..fd7a3f68917f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
260 void *table_base; 260 void *table_base;
261 const struct xt_table_info *private; 261 const struct xt_table_info *private;
262 struct xt_action_param acpar; 262 struct xt_action_param acpar;
263 unsigned int addend;
263 264
264 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 265 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
265 return NF_DROP; 266 return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
267 indev = in ? in->name : nulldevname; 268 indev = in ? in->name : nulldevname;
268 outdev = out ? out->name : nulldevname; 269 outdev = out ? out->name : nulldevname;
269 270
270 xt_info_rdlock_bh(); 271 local_bh_disable();
272 addend = xt_write_recseq_begin();
271 private = table->private; 273 private = table->private;
272 table_base = private->entries[smp_processor_id()]; 274 table_base = private->entries[smp_processor_id()];
273 275
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
338 /* Verdict */ 340 /* Verdict */
339 break; 341 break;
340 } while (!acpar.hotdrop); 342 } while (!acpar.hotdrop);
341 xt_info_rdunlock_bh(); 343 xt_write_recseq_end(addend);
344 local_bh_enable();
342 345
343 if (acpar.hotdrop) 346 if (acpar.hotdrop)
344 return NF_DROP; 347 return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
712 unsigned int i; 715 unsigned int i;
713 716
714 for_each_possible_cpu(cpu) { 717 for_each_possible_cpu(cpu) {
715 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 718 seqcount_t *s = &per_cpu(xt_recseq, cpu);
716 719
717 i = 0; 720 i = 0;
718 xt_entry_foreach(iter, t->entries[cpu], t->size) { 721 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
720 unsigned int start; 723 unsigned int start;
721 724
722 do { 725 do {
723 start = read_seqbegin(lock); 726 start = read_seqcount_begin(s);
724 bcnt = iter->counters.bcnt; 727 bcnt = iter->counters.bcnt;
725 pcnt = iter->counters.pcnt; 728 pcnt = iter->counters.pcnt;
726 } while (read_seqretry(lock, start)); 729 } while (read_seqcount_retry(s, start));
727 730
728 ADD_COUNTER(counters[i], bcnt, pcnt); 731 ADD_COUNTER(counters[i], bcnt, pcnt);
729 ++i; 732 ++i;
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1115 int ret = 0; 1118 int ret = 0;
1116 void *loc_cpu_entry; 1119 void *loc_cpu_entry;
1117 struct arpt_entry *iter; 1120 struct arpt_entry *iter;
1121 unsigned int addend;
1118#ifdef CONFIG_COMPAT 1122#ifdef CONFIG_COMPAT
1119 struct compat_xt_counters_info compat_tmp; 1123 struct compat_xt_counters_info compat_tmp;
1120 1124
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
1171 /* Choose the copy that is on our node */ 1175 /* Choose the copy that is on our node */
1172 curcpu = smp_processor_id(); 1176 curcpu = smp_processor_id();
1173 loc_cpu_entry = private->entries[curcpu]; 1177 loc_cpu_entry = private->entries[curcpu];
1174 xt_info_wrlock(curcpu); 1178 addend = xt_write_recseq_begin();
1175 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1179 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1176 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1180 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1177 ++i; 1181 ++i;
1178 } 1182 }
1179 xt_info_wrunlock(curcpu); 1183 xt_write_recseq_end(addend);
1180 unlock_up_free: 1184 unlock_up_free:
1181 local_bh_enable(); 1185 local_bh_enable();
1182 xt_table_unlock(t); 1186 xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 704915028009..764743843503 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
68} 68}
69EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); 69EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
70 70
71/*
72 We keep a set of rules for each CPU, so we can avoid write-locking
73 them in the softirq when updating the counters and therefore
74 only need to read-lock in the softirq; doing a write_lock_bh() in user
75 context stops packets coming through and allows user context to read
76 the counters or update the rules.
77
78 Hence the start of any table is given by get_table() below. */
79
80/* Returns whether matches rule or not. */ 71/* Returns whether matches rule or not. */
81/* Performance critical - called for every packet */ 72/* Performance critical - called for every packet */
82static inline bool 73static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
311 unsigned int *stackptr, origptr, cpu; 302 unsigned int *stackptr, origptr, cpu;
312 const struct xt_table_info *private; 303 const struct xt_table_info *private;
313 struct xt_action_param acpar; 304 struct xt_action_param acpar;
305 unsigned int addend;
314 306
315 /* Initialization */ 307 /* Initialization */
316 ip = ip_hdr(skb); 308 ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
331 acpar.hooknum = hook; 323 acpar.hooknum = hook;
332 324
333 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 325 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
334 xt_info_rdlock_bh(); 326 local_bh_disable();
327 addend = xt_write_recseq_begin();
335 private = table->private; 328 private = table->private;
336 cpu = smp_processor_id(); 329 cpu = smp_processor_id();
337 table_base = private->entries[cpu]; 330 table_base = private->entries[cpu];
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
430 pr_debug("Exiting %s; resetting sp from %u to %u\n", 423 pr_debug("Exiting %s; resetting sp from %u to %u\n",
431 __func__, *stackptr, origptr); 424 __func__, *stackptr, origptr);
432 *stackptr = origptr; 425 *stackptr = origptr;
433 xt_info_rdunlock_bh(); 426 xt_write_recseq_end(addend);
427 local_bh_enable();
428
434#ifdef DEBUG_ALLOW_ALL 429#ifdef DEBUG_ALLOW_ALL
435 return NF_ACCEPT; 430 return NF_ACCEPT;
436#else 431#else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
886 unsigned int i; 881 unsigned int i;
887 882
888 for_each_possible_cpu(cpu) { 883 for_each_possible_cpu(cpu) {
889 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 884 seqcount_t *s = &per_cpu(xt_recseq, cpu);
890 885
891 i = 0; 886 i = 0;
892 xt_entry_foreach(iter, t->entries[cpu], t->size) { 887 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
894 unsigned int start; 889 unsigned int start;
895 890
896 do { 891 do {
897 start = read_seqbegin(lock); 892 start = read_seqcount_begin(s);
898 bcnt = iter->counters.bcnt; 893 bcnt = iter->counters.bcnt;
899 pcnt = iter->counters.pcnt; 894 pcnt = iter->counters.pcnt;
900 } while (read_seqretry(lock, start)); 895 } while (read_seqcount_retry(s, start));
901 896
902 ADD_COUNTER(counters[i], bcnt, pcnt); 897 ADD_COUNTER(counters[i], bcnt, pcnt);
903 ++i; /* macro does multi eval of i */ 898 ++i; /* macro does multi eval of i */
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
1312 int ret = 0; 1307 int ret = 0;
1313 void *loc_cpu_entry; 1308 void *loc_cpu_entry;
1314 struct ipt_entry *iter; 1309 struct ipt_entry *iter;
1310 unsigned int addend;
1315#ifdef CONFIG_COMPAT 1311#ifdef CONFIG_COMPAT
1316 struct compat_xt_counters_info compat_tmp; 1312 struct compat_xt_counters_info compat_tmp;
1317 1313
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
1368 /* Choose the copy that is on our node */ 1364 /* Choose the copy that is on our node */
1369 curcpu = smp_processor_id(); 1365 curcpu = smp_processor_id();
1370 loc_cpu_entry = private->entries[curcpu]; 1366 loc_cpu_entry = private->entries[curcpu];
1371 xt_info_wrlock(curcpu); 1367 addend = xt_write_recseq_begin();
1372 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1368 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1373 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1369 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1374 ++i; 1370 ++i;
1375 } 1371 }
1376 xt_info_wrunlock(curcpu); 1372 xt_write_recseq_end(addend);
1377 unlock_up_free: 1373 unlock_up_free:
1378 local_bh_enable(); 1374 local_bh_enable();
1379 xt_table_unlock(t); 1375 xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 31427fb57aa8..99cfa28b6d38 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,7 +153,7 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data, 156static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen) 157 int datalen, __sum16 *check, int oldlen)
158{ 158{
159 struct rtable *rt = skb_rtable(skb); 159 struct rtable *rt = skb_rtable(skb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
new file mode 100644
index 000000000000..9aaa67165f42
--- /dev/null
+++ b/net/ipv4/ping.c
@@ -0,0 +1,932 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * "Ping" sockets
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Based on ipv4/udp.c code.
14 *
15 * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6),
16 * Pavel Kankovsky (for Linux 2.4.32)
17 *
18 * Pavel gave all rights to bugs to Vasiliy,
19 * none of the bugs are Pavel's now.
20 *
21 */
22
23#include <asm/system.h>
24#include <linux/uaccess.h>
25#include <linux/types.h>
26#include <linux/fcntl.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/in.h>
30#include <linux/errno.h>
31#include <linux/timer.h>
32#include <linux/mm.h>
33#include <linux/inet.h>
34#include <linux/netdevice.h>
35#include <net/snmp.h>
36#include <net/ip.h>
37#include <net/ipv6.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <linux/skbuff.h>
41#include <linux/proc_fs.h>
42#include <net/sock.h>
43#include <net/ping.h>
44#include <net/icmp.h>
45#include <net/udp.h>
46#include <net/route.h>
47#include <net/inet_common.h>
48#include <net/checksum.h>
49
50
51static struct ping_table ping_table;
52
53static u16 ping_port_rover;
54
55static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
56{
57 int res = (num + net_hash_mix(net)) & mask;
58 pr_debug("hash(%d) = %d\n", num, res);
59 return res;
60}
61
62static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
63 struct net *net, unsigned num)
64{
65 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
66}
67
68static int ping_v4_get_port(struct sock *sk, unsigned short ident)
69{
70 struct hlist_nulls_node *node;
71 struct hlist_nulls_head *hlist;
72 struct inet_sock *isk, *isk2;
73 struct sock *sk2 = NULL;
74
75 isk = inet_sk(sk);
76 write_lock_bh(&ping_table.lock);
77 if (ident == 0) {
78 u32 i;
79 u16 result = ping_port_rover + 1;
80
81 for (i = 0; i < (1L << 16); i++, result++) {
82 if (!result)
83 result++; /* avoid zero */
84 hlist = ping_hashslot(&ping_table, sock_net(sk),
85 result);
86 ping_portaddr_for_each_entry(sk2, node, hlist) {
87 isk2 = inet_sk(sk2);
88
89 if (isk2->inet_num == result)
90 goto next_port;
91 }
92
93 /* found */
94 ping_port_rover = ident = result;
95 break;
96next_port:
97 ;
98 }
99 if (i >= (1L << 16))
100 goto fail;
101 } else {
102 hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
103 ping_portaddr_for_each_entry(sk2, node, hlist) {
104 isk2 = inet_sk(sk2);
105
106 if ((isk2->inet_num == ident) &&
107 (sk2 != sk) &&
108 (!sk2->sk_reuse || !sk->sk_reuse))
109 goto fail;
110 }
111 }
112
113 pr_debug("found port/ident = %d\n", ident);
114 isk->inet_num = ident;
115 if (sk_unhashed(sk)) {
116 pr_debug("was not hashed\n");
117 sock_hold(sk);
118 hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
119 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
120 }
121 write_unlock_bh(&ping_table.lock);
122 return 0;
123
124fail:
125 write_unlock_bh(&ping_table.lock);
126 return 1;
127}
128
129static void ping_v4_hash(struct sock *sk)
130{
131 pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
132 BUG(); /* "Please do not press this button again." */
133}
134
135static void ping_v4_unhash(struct sock *sk)
136{
137 struct inet_sock *isk = inet_sk(sk);
138 pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
139 if (sk_hashed(sk)) {
140 write_lock_bh(&ping_table.lock);
141 hlist_nulls_del(&sk->sk_nulls_node);
142 sock_put(sk);
143 isk->inet_num = isk->inet_sport = 0;
144 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
145 write_unlock_bh(&ping_table.lock);
146 }
147}
148
149static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr,
150 u16 ident, int dif)
151{
152 struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
153 struct sock *sk = NULL;
154 struct inet_sock *isk;
155 struct hlist_nulls_node *hnode;
156
157 pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n",
158 (int)ident, (unsigned long)daddr, dif);
159 read_lock_bh(&ping_table.lock);
160
161 ping_portaddr_for_each_entry(sk, hnode, hslot) {
162 isk = inet_sk(sk);
163
164 pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk,
165 (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr,
166 sk->sk_bound_dev_if);
167
168 pr_debug("iterate\n");
169 if (isk->inet_num != ident)
170 continue;
171 if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
172 continue;
173 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
174 continue;
175
176 sock_hold(sk);
177 goto exit;
178 }
179
180 sk = NULL;
181exit:
182 read_unlock_bh(&ping_table.lock);
183
184 return sk;
185}
186
187static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
188 gid_t *high)
189{
190 gid_t *data = net->ipv4.sysctl_ping_group_range;
191 unsigned seq;
192 do {
193 seq = read_seqbegin(&sysctl_local_ports.lock);
194
195 *low = data[0];
196 *high = data[1];
197 } while (read_seqretry(&sysctl_local_ports.lock, seq));
198}
199
200
201static int ping_init_sock(struct sock *sk)
202{
203 struct net *net = sock_net(sk);
204 gid_t group = current_egid();
205 gid_t range[2];
206 struct group_info *group_info = get_current_groups();
207 int i, j, count = group_info->ngroups;
208
209 inet_get_ping_group_range_net(net, range, range+1);
210 if (range[0] <= group && group <= range[1])
211 return 0;
212
213 for (i = 0; i < group_info->nblocks; i++) {
214 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
215
216 for (j = 0; j < cp_count; j++) {
217 group = group_info->blocks[i][j];
218 if (range[0] <= group && group <= range[1])
219 return 0;
220 }
221
222 count -= cp_count;
223 }
224
225 return -EACCES;
226}
227
228static void ping_close(struct sock *sk, long timeout)
229{
230 pr_debug("ping_close(sk=%p,sk->num=%u)\n",
231 inet_sk(sk), inet_sk(sk)->inet_num);
232 pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
233
234 sk_common_release(sk);
235}
236
237/*
238 * We need our own bind because there are no privileged id's == local ports.
239 * Moreover, we don't allow binding to multi- and broadcast addresses.
240 */
241
242static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
243{
244 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
245 struct inet_sock *isk = inet_sk(sk);
246 unsigned short snum;
247 int chk_addr_ret;
248 int err;
249
250 if (addr_len < sizeof(struct sockaddr_in))
251 return -EINVAL;
252
253 pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
254 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
255
256 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
257 if (addr->sin_addr.s_addr == INADDR_ANY)
258 chk_addr_ret = RTN_LOCAL;
259
260 if ((sysctl_ip_nonlocal_bind == 0 &&
261 isk->freebind == 0 && isk->transparent == 0 &&
262 chk_addr_ret != RTN_LOCAL) ||
263 chk_addr_ret == RTN_MULTICAST ||
264 chk_addr_ret == RTN_BROADCAST)
265 return -EADDRNOTAVAIL;
266
267 lock_sock(sk);
268
269 err = -EINVAL;
270 if (isk->inet_num != 0)
271 goto out;
272
273 err = -EADDRINUSE;
274 isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
275 snum = ntohs(addr->sin_port);
276 if (ping_v4_get_port(sk, snum) != 0) {
277 isk->inet_saddr = isk->inet_rcv_saddr = 0;
278 goto out;
279 }
280
281 pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n",
282 (int)isk->inet_num,
283 (unsigned long) isk->inet_rcv_saddr,
284 (int)sk->sk_bound_dev_if);
285
286 err = 0;
287 if (isk->inet_rcv_saddr)
288 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
289 if (snum)
290 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
291 isk->inet_sport = htons(isk->inet_num);
292 isk->inet_daddr = 0;
293 isk->inet_dport = 0;
294 sk_dst_reset(sk);
295out:
296 release_sock(sk);
297 pr_debug("ping_v4_bind -> %d\n", err);
298 return err;
299}
300
301/*
302 * Is this a supported type of ICMP message?
303 */
304
305static inline int ping_supported(int type, int code)
306{
307 if (type == ICMP_ECHO && code == 0)
308 return 1;
309 return 0;
310}
311
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition.
315 */
316
317static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
318
319void ping_err(struct sk_buff *skb, u32 info)
320{
321 struct iphdr *iph = (struct iphdr *)skb->data;
322 struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
323 struct inet_sock *inet_sock;
324 int type = icmph->type;
325 int code = icmph->code;
326 struct net *net = dev_net(skb->dev);
327 struct sock *sk;
328 int harderr;
329 int err;
330
331 /* We assume the packet has already been checked by icmp_unreach */
332
333 if (!ping_supported(icmph->type, icmph->code))
334 return;
335
336 pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
337 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
338
339 sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
340 ntohs(icmph->un.echo.id), skb->dev->ifindex);
341 if (sk == NULL) {
342 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
343 pr_debug("no socket, dropping\n");
344 return; /* No socket for error */
345 }
346 pr_debug("err on socket %p\n", sk);
347
348 err = 0;
349 harderr = 0;
350 inet_sock = inet_sk(sk);
351
352 switch (type) {
353 default:
354 case ICMP_TIME_EXCEEDED:
355 err = EHOSTUNREACH;
356 break;
357 case ICMP_SOURCE_QUENCH:
358 /* This is not a real error but ping wants to see it.
359 * Report it with some fake errno. */
360 err = EREMOTEIO;
361 break;
362 case ICMP_PARAMETERPROB:
363 err = EPROTO;
364 harderr = 1;
365 break;
366 case ICMP_DEST_UNREACH:
367 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
368 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
369 err = EMSGSIZE;
370 harderr = 1;
371 break;
372 }
373 goto out;
374 }
375 err = EHOSTUNREACH;
376 if (code <= NR_ICMP_UNREACH) {
377 harderr = icmp_err_convert[code].fatal;
378 err = icmp_err_convert[code].errno;
379 }
380 break;
381 case ICMP_REDIRECT:
382 /* See ICMP_SOURCE_QUENCH */
383 err = EREMOTEIO;
384 break;
385 }
386
387 /*
388 * RFC1122: OK. Passes ICMP errors back to application, as per
389 * 4.1.3.3.
390 */
391 if (!inet_sock->recverr) {
392 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
393 goto out;
394 } else {
395 ip_icmp_error(sk, skb, err, 0 /* no remote port */,
396 info, (u8 *)icmph);
397 }
398 sk->sk_err = err;
399 sk->sk_error_report(sk);
400out:
401 sock_put(sk);
402}
403
404/*
405 * Copy and checksum an ICMP Echo packet from user space into a buffer.
406 */
407
408struct pingfakehdr {
409 struct icmphdr icmph;
410 struct iovec *iov;
411 u32 wcheck;
412};
413
414static int ping_getfrag(void *from, char * to,
415 int offset, int fraglen, int odd, struct sk_buff *skb)
416{
417 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
418
419 if (offset == 0) {
420 if (fraglen < sizeof(struct icmphdr))
421 BUG();
422 if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
423 pfh->iov, 0, fraglen - sizeof(struct icmphdr),
424 &pfh->wcheck))
425 return -EFAULT;
426
427 return 0;
428 }
429 if (offset < sizeof(struct icmphdr))
430 BUG();
431 if (csum_partial_copy_fromiovecend
432 (to, pfh->iov, offset - sizeof(struct icmphdr),
433 fraglen, &pfh->wcheck))
434 return -EFAULT;
435 return 0;
436}
437
438static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
439 struct flowi4 *fl4)
440{
441 struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
442
443 pfh->wcheck = csum_partial((char *)&pfh->icmph,
444 sizeof(struct icmphdr), pfh->wcheck);
445 pfh->icmph.checksum = csum_fold(pfh->wcheck);
446 memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr));
447 skb->ip_summed = CHECKSUM_NONE;
448 return ip_push_pending_frames(sk, fl4);
449}
450
451static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
452 size_t len)
453{
454 struct net *net = sock_net(sk);
455 struct flowi4 fl4;
456 struct inet_sock *inet = inet_sk(sk);
457 struct ipcm_cookie ipc;
458 struct icmphdr user_icmph;
459 struct pingfakehdr pfh;
460 struct rtable *rt = NULL;
461 struct ip_options_data opt_copy;
462 int free = 0;
463 u32 saddr, daddr, faddr;
464 u8 tos;
465 int err;
466
467 pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
468
469
470 if (len > 0xFFFF)
471 return -EMSGSIZE;
472
473 /*
474 * Check the flags.
475 */
476
477 /* Mirror BSD error message compatibility */
478 if (msg->msg_flags & MSG_OOB)
479 return -EOPNOTSUPP;
480
481 /*
482 * Fetch the ICMP header provided by the userland.
483 * iovec is modified!
484 */
485
486 if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
487 sizeof(struct icmphdr)))
488 return -EFAULT;
489 if (!ping_supported(user_icmph.type, user_icmph.code))
490 return -EINVAL;
491
492 /*
493 * Get and verify the address.
494 */
495
496 if (msg->msg_name) {
497 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
498 if (msg->msg_namelen < sizeof(*usin))
499 return -EINVAL;
500 if (usin->sin_family != AF_INET)
501 return -EINVAL;
502 daddr = usin->sin_addr.s_addr;
503 /* no remote port */
504 } else {
505 if (sk->sk_state != TCP_ESTABLISHED)
506 return -EDESTADDRREQ;
507 daddr = inet->inet_daddr;
508 /* no remote port */
509 }
510
511 ipc.addr = inet->inet_saddr;
512 ipc.opt = NULL;
513 ipc.oif = sk->sk_bound_dev_if;
514 ipc.tx_flags = 0;
515 err = sock_tx_timestamp(sk, &ipc.tx_flags);
516 if (err)
517 return err;
518
519 if (msg->msg_controllen) {
520 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
521 if (err)
522 return err;
523 if (ipc.opt)
524 free = 1;
525 }
526 if (!ipc.opt) {
527 struct ip_options_rcu *inet_opt;
528
529 rcu_read_lock();
530 inet_opt = rcu_dereference(inet->inet_opt);
531 if (inet_opt) {
532 memcpy(&opt_copy, inet_opt,
533 sizeof(*inet_opt) + inet_opt->opt.optlen);
534 ipc.opt = &opt_copy.opt;
535 }
536 rcu_read_unlock();
537 }
538
539 saddr = ipc.addr;
540 ipc.addr = faddr = daddr;
541
542 if (ipc.opt && ipc.opt->opt.srr) {
543 if (!daddr)
544 return -EINVAL;
545 faddr = ipc.opt->opt.faddr;
546 }
547 tos = RT_TOS(inet->tos);
548 if (sock_flag(sk, SOCK_LOCALROUTE) ||
549 (msg->msg_flags & MSG_DONTROUTE) ||
550 (ipc.opt && ipc.opt->opt.is_strictroute)) {
551 tos |= RTO_ONLINK;
552 }
553
554 if (ipv4_is_multicast(daddr)) {
555 if (!ipc.oif)
556 ipc.oif = inet->mc_index;
557 if (!saddr)
558 saddr = inet->mc_addr;
559 }
560
561 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
562 RT_SCOPE_UNIVERSE, sk->sk_protocol,
563 inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
564
565 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
566 rt = ip_route_output_flow(net, &fl4, sk);
567 if (IS_ERR(rt)) {
568 err = PTR_ERR(rt);
569 rt = NULL;
570 if (err == -ENETUNREACH)
571 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
572 goto out;
573 }
574
575 err = -EACCES;
576 if ((rt->rt_flags & RTCF_BROADCAST) &&
577 !sock_flag(sk, SOCK_BROADCAST))
578 goto out;
579
580 if (msg->msg_flags & MSG_CONFIRM)
581 goto do_confirm;
582back_from_confirm:
583
584 if (!ipc.addr)
585 ipc.addr = fl4.daddr;
586
587 lock_sock(sk);
588
589 pfh.icmph.type = user_icmph.type; /* already checked */
590 pfh.icmph.code = user_icmph.code; /* ditto */
591 pfh.icmph.checksum = 0;
592 pfh.icmph.un.echo.id = inet->inet_sport;
593 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
594 pfh.iov = msg->msg_iov;
595 pfh.wcheck = 0;
596
597 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
598 0, &ipc, &rt, msg->msg_flags);
599 if (err)
600 ip_flush_pending_frames(sk);
601 else
602 err = ping_push_pending_frames(sk, &pfh, &fl4);
603 release_sock(sk);
604
605out:
606 ip_rt_put(rt);
607 if (free)
608 kfree(ipc.opt);
609 if (!err) {
610 icmp_out_count(sock_net(sk), user_icmph.type);
611 return len;
612 }
613 return err;
614
615do_confirm:
616 dst_confirm(&rt->dst);
617 if (!(msg->msg_flags & MSG_PROBE) || len)
618 goto back_from_confirm;
619 err = 0;
620 goto out;
621}
622
623static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
624 size_t len, int noblock, int flags, int *addr_len)
625{
626 struct inet_sock *isk = inet_sk(sk);
627 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
628 struct sk_buff *skb;
629 int copied, err;
630
631 pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
632
633 if (flags & MSG_OOB)
634 goto out;
635
636 if (addr_len)
637 *addr_len = sizeof(*sin);
638
639 if (flags & MSG_ERRQUEUE)
640 return ip_recv_error(sk, msg, len);
641
642 skb = skb_recv_datagram(sk, flags, noblock, &err);
643 if (!skb)
644 goto out;
645
646 copied = skb->len;
647 if (copied > len) {
648 msg->msg_flags |= MSG_TRUNC;
649 copied = len;
650 }
651
652 /* Don't bother checking the checksum */
653 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
654 if (err)
655 goto done;
656
657 sock_recv_timestamp(msg, sk, skb);
658
659 /* Copy the address. */
660 if (sin) {
661 sin->sin_family = AF_INET;
662 sin->sin_port = 0 /* skb->h.uh->source */;
663 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
664 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
665 }
666 if (isk->cmsg_flags)
667 ip_cmsg_recv(msg, skb);
668 err = copied;
669
670done:
671 skb_free_datagram(sk, skb);
672out:
673 pr_debug("ping_recvmsg -> %d\n", err);
674 return err;
675}
676
677static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
678{
679 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
680 inet_sk(sk), inet_sk(sk)->inet_num, skb);
681 if (sock_queue_rcv_skb(sk, skb) < 0) {
682 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
683 kfree_skb(skb);
684 pr_debug("ping_queue_rcv_skb -> failed\n");
685 return -1;
686 }
687 return 0;
688}
689
690
691/*
692 * All we need to do is get the socket.
693 */
694
695void ping_rcv(struct sk_buff *skb)
696{
697 struct sock *sk;
698 struct net *net = dev_net(skb->dev);
699 struct iphdr *iph = ip_hdr(skb);
700 struct icmphdr *icmph = icmp_hdr(skb);
701 u32 saddr = iph->saddr;
702 u32 daddr = iph->daddr;
703
704 /* We assume the packet has already been checked by icmp_rcv */
705
706 pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
707 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
708
709 /* Push ICMP header back */
710 skb_push(skb, skb->data - (u8 *)icmph);
711
712 sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
713 skb->dev->ifindex);
714 if (sk != NULL) {
715 pr_debug("rcv on socket %p\n", sk);
716 ping_queue_rcv_skb(sk, skb_get(skb));
717 sock_put(sk);
718 return;
719 }
720 pr_debug("no socket, dropping\n");
721
722 /* We're called from icmp_rcv(). kfree_skb() is done there. */
723}
724
725struct proto ping_prot = {
726 .name = "PING",
727 .owner = THIS_MODULE,
728 .init = ping_init_sock,
729 .close = ping_close,
730 .connect = ip4_datagram_connect,
731 .disconnect = udp_disconnect,
732 .setsockopt = ip_setsockopt,
733 .getsockopt = ip_getsockopt,
734 .sendmsg = ping_sendmsg,
735 .recvmsg = ping_recvmsg,
736 .bind = ping_bind,
737 .backlog_rcv = ping_queue_rcv_skb,
738 .hash = ping_v4_hash,
739 .unhash = ping_v4_unhash,
740 .get_port = ping_v4_get_port,
741 .obj_size = sizeof(struct inet_sock),
742};
743EXPORT_SYMBOL(ping_prot);
744
745#ifdef CONFIG_PROC_FS
746
747static struct sock *ping_get_first(struct seq_file *seq, int start)
748{
749 struct sock *sk;
750 struct ping_iter_state *state = seq->private;
751 struct net *net = seq_file_net(seq);
752
753 for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
754 ++state->bucket) {
755 struct hlist_nulls_node *node;
756 struct hlist_nulls_head *hslot;
757
758 hslot = &ping_table.hash[state->bucket];
759
760 if (hlist_nulls_empty(hslot))
761 continue;
762
763 sk_nulls_for_each(sk, node, hslot) {
764 if (net_eq(sock_net(sk), net))
765 goto found;
766 }
767 }
768 sk = NULL;
769found:
770 return sk;
771}
772
773static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
774{
775 struct ping_iter_state *state = seq->private;
776 struct net *net = seq_file_net(seq);
777
778 do {
779 sk = sk_nulls_next(sk);
780 } while (sk && (!net_eq(sock_net(sk), net)));
781
782 if (!sk)
783 return ping_get_first(seq, state->bucket + 1);
784 return sk;
785}
786
787static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
788{
789 struct sock *sk = ping_get_first(seq, 0);
790
791 if (sk)
792 while (pos && (sk = ping_get_next(seq, sk)) != NULL)
793 --pos;
794 return pos ? NULL : sk;
795}
796
797static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
798{
799 struct ping_iter_state *state = seq->private;
800 state->bucket = 0;
801
802 read_lock_bh(&ping_table.lock);
803
804 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
805}
806
807static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
808{
809 struct sock *sk;
810
811 if (v == SEQ_START_TOKEN)
812 sk = ping_get_idx(seq, 0);
813 else
814 sk = ping_get_next(seq, v);
815
816 ++*pos;
817 return sk;
818}
819
820static void ping_seq_stop(struct seq_file *seq, void *v)
821{
822 read_unlock_bh(&ping_table.lock);
823}
824
825static void ping_format_sock(struct sock *sp, struct seq_file *f,
826 int bucket, int *len)
827{
828 struct inet_sock *inet = inet_sk(sp);
829 __be32 dest = inet->inet_daddr;
830 __be32 src = inet->inet_rcv_saddr;
831 __u16 destp = ntohs(inet->inet_dport);
832 __u16 srcp = ntohs(inet->inet_sport);
833
834 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
835 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
836 bucket, src, srcp, dest, destp, sp->sk_state,
837 sk_wmem_alloc_get(sp),
838 sk_rmem_alloc_get(sp),
839 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
840 atomic_read(&sp->sk_refcnt), sp,
841 atomic_read(&sp->sk_drops), len);
842}
843
844static int ping_seq_show(struct seq_file *seq, void *v)
845{
846 if (v == SEQ_START_TOKEN)
847 seq_printf(seq, "%-127s\n",
848 " sl local_address rem_address st tx_queue "
849 "rx_queue tr tm->when retrnsmt uid timeout "
850 "inode ref pointer drops");
851 else {
852 struct ping_iter_state *state = seq->private;
853 int len;
854
855 ping_format_sock(v, seq, state->bucket, &len);
856 seq_printf(seq, "%*s\n", 127 - len, "");
857 }
858 return 0;
859}
860
861static const struct seq_operations ping_seq_ops = {
862 .show = ping_seq_show,
863 .start = ping_seq_start,
864 .next = ping_seq_next,
865 .stop = ping_seq_stop,
866};
867
868static int ping_seq_open(struct inode *inode, struct file *file)
869{
870 return seq_open_net(inode, file, &ping_seq_ops,
871 sizeof(struct ping_iter_state));
872}
873
874static const struct file_operations ping_seq_fops = {
875 .open = ping_seq_open,
876 .read = seq_read,
877 .llseek = seq_lseek,
878 .release = seq_release_net,
879};
880
881static int ping_proc_register(struct net *net)
882{
883 struct proc_dir_entry *p;
884 int rc = 0;
885
886 p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
887 if (!p)
888 rc = -ENOMEM;
889 return rc;
890}
891
892static void ping_proc_unregister(struct net *net)
893{
894 proc_net_remove(net, "icmp");
895}
896
897
898static int __net_init ping_proc_init_net(struct net *net)
899{
900 return ping_proc_register(net);
901}
902
903static void __net_exit ping_proc_exit_net(struct net *net)
904{
905 ping_proc_unregister(net);
906}
907
908static struct pernet_operations ping_net_ops = {
909 .init = ping_proc_init_net,
910 .exit = ping_proc_exit_net,
911};
912
913int __init ping_proc_init(void)
914{
915 return register_pernet_subsys(&ping_net_ops);
916}
917
918void ping_proc_exit(void)
919{
920 unregister_pernet_subsys(&ping_net_ops);
921}
922
923#endif
924
925void __init ping_init(void)
926{
927 int i;
928
929 for (i = 0; i < PING_HTABLE_SIZE; i++)
930 INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
931 rwlock_init(&ping_table.lock);
932}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bceaec42c37d..c9893d43242e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -154,7 +154,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
154 * RFC 1122: SHOULD pass TOS value up to the transport layer. 154 * RFC 1122: SHOULD pass TOS value up to the transport layer.
155 * -> It does. And not only TOS, but all IP header. 155 * -> It does. And not only TOS, but all IP header.
156 */ 156 */
157static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) 157static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
158{ 158{
159 struct sock *sk; 159 struct sock *sk;
160 struct hlist_head *head; 160 struct hlist_head *head;
@@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
247 } 247 }
248 248
249 if (inet->recverr) { 249 if (inet->recverr) {
250 struct iphdr *iph = (struct iphdr *)skb->data; 250 const struct iphdr *iph = (const struct iphdr *)skb->data;
251 u8 *payload = skb->data + (iph->ihl << 2); 251 u8 *payload = skb->data + (iph->ihl << 2);
252 252
253 if (inet->hdrincl) 253 if (inet->hdrincl)
@@ -265,7 +265,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
265{ 265{
266 int hash; 266 int hash;
267 struct sock *raw_sk; 267 struct sock *raw_sk;
268 struct iphdr *iph; 268 const struct iphdr *iph;
269 struct net *net; 269 struct net *net;
270 270
271 hash = protocol & (RAW_HTABLE_SIZE - 1); 271 hash = protocol & (RAW_HTABLE_SIZE - 1);
@@ -273,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
273 read_lock(&raw_v4_hashinfo.lock); 273 read_lock(&raw_v4_hashinfo.lock);
274 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); 274 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
275 if (raw_sk != NULL) { 275 if (raw_sk != NULL) {
276 iph = (struct iphdr *)skb->data; 276 iph = (const struct iphdr *)skb->data;
277 net = dev_net(skb->dev); 277 net = dev_net(skb->dev);
278 278
279 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, 279 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
@@ -281,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
281 skb->dev->ifindex)) != NULL) { 281 skb->dev->ifindex)) != NULL) {
282 raw_err(raw_sk, skb, info); 282 raw_err(raw_sk, skb, info);
283 raw_sk = sk_next(raw_sk); 283 raw_sk = sk_next(raw_sk);
284 iph = (struct iphdr *)skb->data; 284 iph = (const struct iphdr *)skb->data;
285 } 285 }
286 } 286 }
287 read_unlock(&raw_v4_hashinfo.lock); 287 read_unlock(&raw_v4_hashinfo.lock);
@@ -314,9 +314,10 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
314 return 0; 314 return 0;
315} 315}
316 316
317static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, 317static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
318 struct rtable **rtp, 318 void *from, size_t length,
319 unsigned int flags) 319 struct rtable **rtp,
320 unsigned int flags)
320{ 321{
321 struct inet_sock *inet = inet_sk(sk); 322 struct inet_sock *inet = inet_sk(sk);
322 struct net *net = sock_net(sk); 323 struct net *net = sock_net(sk);
@@ -327,7 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
327 struct rtable *rt = *rtp; 328 struct rtable *rt = *rtp;
328 329
329 if (length > rt->dst.dev->mtu) { 330 if (length > rt->dst.dev->mtu) {
330 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 331 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
331 rt->dst.dev->mtu); 332 rt->dst.dev->mtu);
332 return -EMSGSIZE; 333 return -EMSGSIZE;
333 } 334 }
@@ -372,7 +373,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
372 373
373 if (iphlen >= sizeof(*iph)) { 374 if (iphlen >= sizeof(*iph)) {
374 if (!iph->saddr) 375 if (!iph->saddr)
375 iph->saddr = rt->rt_src; 376 iph->saddr = fl4->saddr;
376 iph->check = 0; 377 iph->check = 0;
377 iph->tot_len = htons(length); 378 iph->tot_len = htons(length);
378 if (!iph->id) 379 if (!iph->id)
@@ -455,11 +456,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
455 struct inet_sock *inet = inet_sk(sk); 456 struct inet_sock *inet = inet_sk(sk);
456 struct ipcm_cookie ipc; 457 struct ipcm_cookie ipc;
457 struct rtable *rt = NULL; 458 struct rtable *rt = NULL;
459 struct flowi4 fl4;
458 int free = 0; 460 int free = 0;
459 __be32 daddr; 461 __be32 daddr;
460 __be32 saddr; 462 __be32 saddr;
461 u8 tos; 463 u8 tos;
462 int err; 464 int err;
465 struct ip_options_data opt_copy;
463 466
464 err = -EMSGSIZE; 467 err = -EMSGSIZE;
465 if (len > 0xFFFF) 468 if (len > 0xFFFF)
@@ -520,8 +523,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
520 saddr = ipc.addr; 523 saddr = ipc.addr;
521 ipc.addr = daddr; 524 ipc.addr = daddr;
522 525
523 if (!ipc.opt) 526 if (!ipc.opt) {
524 ipc.opt = inet->opt; 527 struct ip_options_rcu *inet_opt;
528
529 rcu_read_lock();
530 inet_opt = rcu_dereference(inet->inet_opt);
531 if (inet_opt) {
532 memcpy(&opt_copy, inet_opt,
533 sizeof(*inet_opt) + inet_opt->opt.optlen);
534 ipc.opt = &opt_copy.opt;
535 }
536 rcu_read_unlock();
537 }
525 538
526 if (ipc.opt) { 539 if (ipc.opt) {
527 err = -EINVAL; 540 err = -EINVAL;
@@ -530,10 +543,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
530 */ 543 */
531 if (inet->hdrincl) 544 if (inet->hdrincl)
532 goto done; 545 goto done;
533 if (ipc.opt->srr) { 546 if (ipc.opt->opt.srr) {
534 if (!daddr) 547 if (!daddr)
535 goto done; 548 goto done;
536 daddr = ipc.opt->faddr; 549 daddr = ipc.opt->opt.faddr;
537 } 550 }
538 } 551 }
539 tos = RT_CONN_FLAGS(sk); 552 tos = RT_CONN_FLAGS(sk);
@@ -547,31 +560,23 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
547 saddr = inet->mc_addr; 560 saddr = inet->mc_addr;
548 } 561 }
549 562
550 { 563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
551 struct flowi4 fl4 = { 564 RT_SCOPE_UNIVERSE,
552 .flowi4_oif = ipc.oif, 565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
553 .flowi4_mark = sk->sk_mark, 566 FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
554 .daddr = daddr,
555 .saddr = saddr,
556 .flowi4_tos = tos,
557 .flowi4_proto = (inet->hdrincl ?
558 IPPROTO_RAW :
559 sk->sk_protocol),
560 .flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
561 };
562 if (!inet->hdrincl) {
563 err = raw_probe_proto_opt(&fl4, msg);
564 if (err)
565 goto done;
566 }
567 567
568 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 568 if (!inet->hdrincl) {
569 rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 569 err = raw_probe_proto_opt(&fl4, msg);
570 if (IS_ERR(rt)) { 570 if (err)
571 err = PTR_ERR(rt);
572 rt = NULL;
573 goto done; 571 goto done;
574 } 572 }
573
574 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
575 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
576 if (IS_ERR(rt)) {
577 err = PTR_ERR(rt);
578 rt = NULL;
579 goto done;
575 } 580 }
576 581
577 err = -EACCES; 582 err = -EACCES;
@@ -583,19 +588,20 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
583back_from_confirm: 588back_from_confirm:
584 589
585 if (inet->hdrincl) 590 if (inet->hdrincl)
586 err = raw_send_hdrinc(sk, msg->msg_iov, len, 591 err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
587 &rt, msg->msg_flags); 592 &rt, msg->msg_flags);
588 593
589 else { 594 else {
590 if (!ipc.addr) 595 if (!ipc.addr)
591 ipc.addr = rt->rt_dst; 596 ipc.addr = fl4.daddr;
592 lock_sock(sk); 597 lock_sock(sk);
593 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, 598 err = ip_append_data(sk, &fl4, ip_generic_getfrag,
594 &ipc, &rt, msg->msg_flags); 599 msg->msg_iov, len, 0,
600 &ipc, &rt, msg->msg_flags);
595 if (err) 601 if (err)
596 ip_flush_pending_frames(sk); 602 ip_flush_pending_frames(sk);
597 else if (!(msg->msg_flags & MSG_MORE)) { 603 else if (!(msg->msg_flags & MSG_MORE)) {
598 err = ip_push_pending_frames(sk); 604 err = ip_push_pending_frames(sk, &fl4);
599 if (err == -ENOBUFS && !inet->recverr) 605 if (err == -ENOBUFS && !inet->recverr)
600 err = 0; 606 err = 0;
601 } 607 }
@@ -973,7 +979,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
973 srcp = inet->inet_num; 979 srcp = inet->inet_num;
974 980
975 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 981 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
976 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 982 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
977 i, src, srcp, dest, destp, sp->sk_state, 983 i, src, srcp, dest, destp, sp->sk_state,
978 sk_wmem_alloc_get(sp), 984 sk_wmem_alloc_get(sp),
979 sk_rmem_alloc_get(sp), 985 sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 99e6e4bb1c72..52b0b956508b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -156,7 +156,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
156 u32 *p = NULL; 156 u32 *p = NULL;
157 157
158 if (!rt->peer) 158 if (!rt->peer)
159 rt_bind_peer(rt, 1); 159 rt_bind_peer(rt, rt->rt_dst, 1);
160 160
161 peer = rt->peer; 161 peer = rt->peer;
162 if (peer) { 162 if (peer) {
@@ -424,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
424 dst_metric(&r->dst, RTAX_WINDOW), 424 dst_metric(&r->dst, RTAX_WINDOW),
425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
426 dst_metric(&r->dst, RTAX_RTTVAR)), 426 dst_metric(&r->dst, RTAX_RTTVAR)),
427 r->rt_tos, 427 r->rt_key_tos,
428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
429 r->dst.hh ? (r->dst.hh->hh_output == 429 r->dst.hh ? (r->dst.hh->hh_output ==
430 dev_queue_xmit) : 0, 430 dev_queue_xmit) : 0,
@@ -724,7 +724,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | 724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | 725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
726 (rt1->rt_mark ^ rt2->rt_mark) | 726 (rt1->rt_mark ^ rt2->rt_mark) |
727 (rt1->rt_tos ^ rt2->rt_tos) | 727 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
728 (rt1->rt_oif ^ rt2->rt_oif) | 728 (rt1->rt_oif ^ rt2->rt_oif) |
729 (rt1->rt_iif ^ rt2->rt_iif)) == 0; 729 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
730} 730}
@@ -968,10 +968,6 @@ static int rt_garbage_collect(struct dst_ops *ops)
968 break; 968 break;
969 969
970 expire >>= 1; 970 expire >>= 1;
971#if RT_CACHE_DEBUG >= 2
972 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
973 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
974#endif
975 971
976 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) 972 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
977 goto out; 973 goto out;
@@ -992,10 +988,6 @@ work_done:
992 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || 988 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
993 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) 989 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
994 expire = ip_rt_gc_timeout; 990 expire = ip_rt_gc_timeout;
995#if RT_CACHE_DEBUG >= 2
996 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
997 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
998#endif
999out: return 0; 991out: return 0;
1000} 992}
1001 993
@@ -1179,16 +1171,6 @@ restart:
1179 1171
1180 rt->dst.rt_next = rt_hash_table[hash].chain; 1172 rt->dst.rt_next = rt_hash_table[hash].chain;
1181 1173
1182#if RT_CACHE_DEBUG >= 2
1183 if (rt->dst.rt_next) {
1184 struct rtable *trt;
1185 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1186 hash, &rt->rt_dst);
1187 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1188 printk(" . %pI4", &trt->rt_dst);
1189 printk("\n");
1190 }
1191#endif
1192 /* 1174 /*
1193 * Since lookup is lockfree, we must make sure 1175 * Since lookup is lockfree, we must make sure
1194 * previous writes to rt are committed to memory 1176 * previous writes to rt are committed to memory
@@ -1211,11 +1193,11 @@ static u32 rt_peer_genid(void)
1211 return atomic_read(&__rt_peer_genid); 1193 return atomic_read(&__rt_peer_genid);
1212} 1194}
1213 1195
1214void rt_bind_peer(struct rtable *rt, int create) 1196void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
1215{ 1197{
1216 struct inet_peer *peer; 1198 struct inet_peer *peer;
1217 1199
1218 peer = inet_getpeer_v4(rt->rt_dst, create); 1200 peer = inet_getpeer_v4(daddr, create);
1219 1201
1220 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1202 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1221 inet_putpeer(peer); 1203 inet_putpeer(peer);
@@ -1249,7 +1231,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1249 1231
1250 if (rt) { 1232 if (rt) {
1251 if (rt->peer == NULL) 1233 if (rt->peer == NULL)
1252 rt_bind_peer(rt, 1); 1234 rt_bind_peer(rt, rt->rt_dst, 1);
1253 1235
1254 /* If peer is attached to destination, it is never detached, 1236 /* If peer is attached to destination, it is never detached,
1255 so that we need not to grab a lock to dereference it. 1237 so that we need not to grab a lock to dereference it.
@@ -1347,10 +1329,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1347 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, 1329 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1348 rt->rt_oif, 1330 rt->rt_oif,
1349 rt_genid(dev_net(dst->dev))); 1331 rt_genid(dev_net(dst->dev)));
1350#if RT_CACHE_DEBUG >= 1
1351 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1352 &rt->rt_dst, rt->rt_tos);
1353#endif
1354 rt_del(hash, rt); 1332 rt_del(hash, rt);
1355 ret = NULL; 1333 ret = NULL;
1356 } else if (rt->peer && 1334 } else if (rt->peer &&
@@ -1399,7 +1377,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1399 rcu_read_unlock(); 1377 rcu_read_unlock();
1400 1378
1401 if (!rt->peer) 1379 if (!rt->peer)
1402 rt_bind_peer(rt, 1); 1380 rt_bind_peer(rt, rt->rt_dst, 1);
1403 peer = rt->peer; 1381 peer = rt->peer;
1404 if (!peer) { 1382 if (!peer) {
1405 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1383 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
@@ -1435,7 +1413,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1435 peer->rate_tokens == ip_rt_redirect_number && 1413 peer->rate_tokens == ip_rt_redirect_number &&
1436 net_ratelimit()) 1414 net_ratelimit())
1437 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1415 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1438 &rt->rt_src, rt->rt_iif, 1416 &ip_hdr(skb)->saddr, rt->rt_iif,
1439 &rt->rt_dst, &rt->rt_gateway); 1417 &rt->rt_dst, &rt->rt_gateway);
1440#endif 1418#endif
1441 } 1419 }
@@ -1467,7 +1445,7 @@ static int ip_error(struct sk_buff *skb)
1467 } 1445 }
1468 1446
1469 if (!rt->peer) 1447 if (!rt->peer)
1470 rt_bind_peer(rt, 1); 1448 rt_bind_peer(rt, rt->rt_dst, 1);
1471 peer = rt->peer; 1449 peer = rt->peer;
1472 1450
1473 send = true; 1451 send = true;
@@ -1507,7 +1485,7 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
1507 return 68; 1485 return 68;
1508} 1486}
1509 1487
1510unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, 1488unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1511 unsigned short new_mtu, 1489 unsigned short new_mtu,
1512 struct net_device *dev) 1490 struct net_device *dev)
1513{ 1491{
@@ -1574,7 +1552,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1574 dst_confirm(dst); 1552 dst_confirm(dst);
1575 1553
1576 if (!rt->peer) 1554 if (!rt->peer)
1577 rt_bind_peer(rt, 1); 1555 rt_bind_peer(rt, rt->rt_dst, 1);
1578 peer = rt->peer; 1556 peer = rt->peer;
1579 if (peer) { 1557 if (peer) {
1580 if (mtu < ip_rt_min_pmtu) 1558 if (mtu < ip_rt_min_pmtu)
@@ -1631,7 +1609,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1631 struct inet_peer *peer; 1609 struct inet_peer *peer;
1632 1610
1633 if (!rt->peer) 1611 if (!rt->peer)
1634 rt_bind_peer(rt, 0); 1612 rt_bind_peer(rt, rt->rt_dst, 0);
1635 1613
1636 peer = rt->peer; 1614 peer = rt->peer;
1637 if (peer && peer->pmtu_expires) 1615 if (peer && peer->pmtu_expires)
@@ -1687,6 +1665,7 @@ static int ip_rt_bug(struct sk_buff *skb)
1687 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 1665 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1688 skb->dev ? skb->dev->name : "?"); 1666 skb->dev ? skb->dev->name : "?");
1689 kfree_skb(skb); 1667 kfree_skb(skb);
1668 WARN_ON(1);
1690 return 0; 1669 return 0;
1691} 1670}
1692 1671
@@ -1699,22 +1678,26 @@ static int ip_rt_bug(struct sk_buff *skb)
1699 in IP options! 1678 in IP options!
1700 */ 1679 */
1701 1680
1702void ip_rt_get_source(u8 *addr, struct rtable *rt) 1681void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1703{ 1682{
1704 __be32 src; 1683 __be32 src;
1705 struct fib_result res;
1706 1684
1707 if (rt_is_output_route(rt)) 1685 if (rt_is_output_route(rt))
1708 src = rt->rt_src; 1686 src = ip_hdr(skb)->saddr;
1709 else { 1687 else {
1710 struct flowi4 fl4 = { 1688 struct fib_result res;
1711 .daddr = rt->rt_key_dst, 1689 struct flowi4 fl4;
1712 .saddr = rt->rt_key_src, 1690 struct iphdr *iph;
1713 .flowi4_tos = rt->rt_tos, 1691
1714 .flowi4_oif = rt->rt_oif, 1692 iph = ip_hdr(skb);
1715 .flowi4_iif = rt->rt_iif, 1693
1716 .flowi4_mark = rt->rt_mark, 1694 memset(&fl4, 0, sizeof(fl4));
1717 }; 1695 fl4.daddr = iph->daddr;
1696 fl4.saddr = iph->saddr;
1697 fl4.flowi4_tos = iph->tos;
1698 fl4.flowi4_oif = rt->dst.dev->ifindex;
1699 fl4.flowi4_iif = skb->dev->ifindex;
1700 fl4.flowi4_mark = skb->mark;
1718 1701
1719 rcu_read_lock(); 1702 rcu_read_lock();
1720 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) 1703 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
@@ -1767,7 +1750,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1767 return mtu; 1750 return mtu;
1768} 1751}
1769 1752
1770static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, 1753static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1771 struct fib_info *fi) 1754 struct fib_info *fi)
1772{ 1755{
1773 struct inet_peer *peer; 1756 struct inet_peer *peer;
@@ -1776,7 +1759,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1776 /* If a peer entry exists for this destination, we must hook 1759 /* If a peer entry exists for this destination, we must hook
1777 * it up in order to get at cached metrics. 1760 * it up in order to get at cached metrics.
1778 */ 1761 */
1779 if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) 1762 if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1780 create = 1; 1763 create = 1;
1781 1764
1782 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); 1765 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
@@ -1803,7 +1786,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1803 } 1786 }
1804} 1787}
1805 1788
1806static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, 1789static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1807 const struct fib_result *res, 1790 const struct fib_result *res,
1808 struct fib_info *fi, u16 type, u32 itag) 1791 struct fib_info *fi, u16 type, u32 itag)
1809{ 1792{
@@ -1813,7 +1796,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1813 if (FIB_RES_GW(*res) && 1796 if (FIB_RES_GW(*res) &&
1814 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1797 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1815 rt->rt_gateway = FIB_RES_GW(*res); 1798 rt->rt_gateway = FIB_RES_GW(*res);
1816 rt_init_metrics(rt, oldflp4, fi); 1799 rt_init_metrics(rt, fl4, fi);
1817#ifdef CONFIG_IP_ROUTE_CLASSID 1800#ifdef CONFIG_IP_ROUTE_CLASSID
1818 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1801 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1819#endif 1802#endif
@@ -1830,20 +1813,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1830#endif 1813#endif
1831 set_class_tag(rt, itag); 1814 set_class_tag(rt, itag);
1832#endif 1815#endif
1833 rt->rt_type = type;
1834} 1816}
1835 1817
1836static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) 1818static struct rtable *rt_dst_alloc(struct net_device *dev,
1819 bool nopolicy, bool noxfrm)
1837{ 1820{
1838 struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); 1821 return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
1839 if (rt) { 1822 DST_HOST |
1840 rt->dst.obsolete = -1; 1823 (nopolicy ? DST_NOPOLICY : 0) |
1841 1824 (noxfrm ? DST_NOXFRM : 0));
1842 rt->dst.flags = DST_HOST |
1843 (nopolicy ? DST_NOPOLICY : 0) |
1844 (noxfrm ? DST_NOXFRM : 0);
1845 }
1846 return rt;
1847} 1825}
1848 1826
1849/* called in rcu_read_lock() section */ 1827/* called in rcu_read_lock() section */
@@ -1871,36 +1849,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1871 goto e_inval; 1849 goto e_inval;
1872 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1850 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1873 } else { 1851 } else {
1874 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1852 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
1875 &itag, 0); 1853 &itag);
1876 if (err < 0) 1854 if (err < 0)
1877 goto e_err; 1855 goto e_err;
1878 } 1856 }
1879 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); 1857 rth = rt_dst_alloc(init_net.loopback_dev,
1858 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1880 if (!rth) 1859 if (!rth)
1881 goto e_nobufs; 1860 goto e_nobufs;
1882 1861
1862#ifdef CONFIG_IP_ROUTE_CLASSID
1863 rth->dst.tclassid = itag;
1864#endif
1883 rth->dst.output = ip_rt_bug; 1865 rth->dst.output = ip_rt_bug;
1884 1866
1885 rth->rt_key_dst = daddr; 1867 rth->rt_key_dst = daddr;
1886 rth->rt_dst = daddr;
1887 rth->rt_tos = tos;
1888 rth->rt_mark = skb->mark;
1889 rth->rt_key_src = saddr; 1868 rth->rt_key_src = saddr;
1869 rth->rt_genid = rt_genid(dev_net(dev));
1870 rth->rt_flags = RTCF_MULTICAST;
1871 rth->rt_type = RTN_MULTICAST;
1872 rth->rt_key_tos = tos;
1873 rth->rt_dst = daddr;
1890 rth->rt_src = saddr; 1874 rth->rt_src = saddr;
1891#ifdef CONFIG_IP_ROUTE_CLASSID
1892 rth->dst.tclassid = itag;
1893#endif
1894 rth->rt_route_iif = dev->ifindex; 1875 rth->rt_route_iif = dev->ifindex;
1895 rth->rt_iif = dev->ifindex; 1876 rth->rt_iif = dev->ifindex;
1896 rth->dst.dev = init_net.loopback_dev;
1897 dev_hold(rth->dst.dev);
1898 rth->rt_oif = 0; 1877 rth->rt_oif = 0;
1878 rth->rt_mark = skb->mark;
1899 rth->rt_gateway = daddr; 1879 rth->rt_gateway = daddr;
1900 rth->rt_spec_dst= spec_dst; 1880 rth->rt_spec_dst= spec_dst;
1901 rth->rt_genid = rt_genid(dev_net(dev)); 1881 rth->rt_peer_genid = 0;
1902 rth->rt_flags = RTCF_MULTICAST; 1882 rth->peer = NULL;
1903 rth->rt_type = RTN_MULTICAST; 1883 rth->fi = NULL;
1904 if (our) { 1884 if (our) {
1905 rth->dst.input= ip_local_deliver; 1885 rth->dst.input= ip_local_deliver;
1906 rth->rt_flags |= RTCF_LOCAL; 1886 rth->rt_flags |= RTCF_LOCAL;
@@ -1981,8 +1961,8 @@ static int __mkroute_input(struct sk_buff *skb,
1981 } 1961 }
1982 1962
1983 1963
1984 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 1964 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1985 in_dev->dev, &spec_dst, &itag, skb->mark); 1965 in_dev->dev, &spec_dst, &itag);
1986 if (err < 0) { 1966 if (err < 0) {
1987 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1967 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1988 saddr); 1968 saddr);
@@ -2013,7 +1993,8 @@ static int __mkroute_input(struct sk_buff *skb,
2013 } 1993 }
2014 } 1994 }
2015 1995
2016 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), 1996 rth = rt_dst_alloc(out_dev->dev,
1997 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2017 IN_DEV_CONF_GET(out_dev, NOXFRM)); 1998 IN_DEV_CONF_GET(out_dev, NOXFRM));
2018 if (!rth) { 1999 if (!rth) {
2019 err = -ENOBUFS; 2000 err = -ENOBUFS;
@@ -2021,27 +2002,28 @@ static int __mkroute_input(struct sk_buff *skb,
2021 } 2002 }
2022 2003
2023 rth->rt_key_dst = daddr; 2004 rth->rt_key_dst = daddr;
2024 rth->rt_dst = daddr;
2025 rth->rt_tos = tos;
2026 rth->rt_mark = skb->mark;
2027 rth->rt_key_src = saddr; 2005 rth->rt_key_src = saddr;
2006 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2007 rth->rt_flags = flags;
2008 rth->rt_type = res->type;
2009 rth->rt_key_tos = tos;
2010 rth->rt_dst = daddr;
2028 rth->rt_src = saddr; 2011 rth->rt_src = saddr;
2029 rth->rt_gateway = daddr;
2030 rth->rt_route_iif = in_dev->dev->ifindex; 2012 rth->rt_route_iif = in_dev->dev->ifindex;
2031 rth->rt_iif = in_dev->dev->ifindex; 2013 rth->rt_iif = in_dev->dev->ifindex;
2032 rth->dst.dev = (out_dev)->dev;
2033 dev_hold(rth->dst.dev);
2034 rth->rt_oif = 0; 2014 rth->rt_oif = 0;
2015 rth->rt_mark = skb->mark;
2016 rth->rt_gateway = daddr;
2035 rth->rt_spec_dst= spec_dst; 2017 rth->rt_spec_dst= spec_dst;
2018 rth->rt_peer_genid = 0;
2019 rth->peer = NULL;
2020 rth->fi = NULL;
2036 2021
2037 rth->dst.input = ip_forward; 2022 rth->dst.input = ip_forward;
2038 rth->dst.output = ip_output; 2023 rth->dst.output = ip_output;
2039 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2040 2024
2041 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); 2025 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2042 2026
2043 rth->rt_flags = flags;
2044
2045 *result = rth; 2027 *result = rth;
2046 err = 0; 2028 err = 0;
2047 cleanup: 2029 cleanup:
@@ -2150,9 +2132,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2150 goto brd_input; 2132 goto brd_input;
2151 2133
2152 if (res.type == RTN_LOCAL) { 2134 if (res.type == RTN_LOCAL) {
2153 err = fib_validate_source(saddr, daddr, tos, 2135 err = fib_validate_source(skb, saddr, daddr, tos,
2154 net->loopback_dev->ifindex, 2136 net->loopback_dev->ifindex,
2155 dev, &spec_dst, &itag, skb->mark); 2137 dev, &spec_dst, &itag);
2156 if (err < 0) 2138 if (err < 0)
2157 goto martian_source_keep_err; 2139 goto martian_source_keep_err;
2158 if (err) 2140 if (err)
@@ -2176,8 +2158,8 @@ brd_input:
2176 if (ipv4_is_zeronet(saddr)) 2158 if (ipv4_is_zeronet(saddr))
2177 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 2159 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2178 else { 2160 else {
2179 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2161 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
2180 &itag, skb->mark); 2162 &itag);
2181 if (err < 0) 2163 if (err < 0)
2182 goto martian_source_keep_err; 2164 goto martian_source_keep_err;
2183 if (err) 2165 if (err)
@@ -2188,36 +2170,42 @@ brd_input:
2188 RT_CACHE_STAT_INC(in_brd); 2170 RT_CACHE_STAT_INC(in_brd);
2189 2171
2190local_input: 2172local_input:
2191 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); 2173 rth = rt_dst_alloc(net->loopback_dev,
2174 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2192 if (!rth) 2175 if (!rth)
2193 goto e_nobufs; 2176 goto e_nobufs;
2194 2177
2178 rth->dst.input= ip_local_deliver;
2195 rth->dst.output= ip_rt_bug; 2179 rth->dst.output= ip_rt_bug;
2196 rth->rt_genid = rt_genid(net); 2180#ifdef CONFIG_IP_ROUTE_CLASSID
2181 rth->dst.tclassid = itag;
2182#endif
2197 2183
2198 rth->rt_key_dst = daddr; 2184 rth->rt_key_dst = daddr;
2199 rth->rt_dst = daddr;
2200 rth->rt_tos = tos;
2201 rth->rt_mark = skb->mark;
2202 rth->rt_key_src = saddr; 2185 rth->rt_key_src = saddr;
2186 rth->rt_genid = rt_genid(net);
2187 rth->rt_flags = flags|RTCF_LOCAL;
2188 rth->rt_type = res.type;
2189 rth->rt_key_tos = tos;
2190 rth->rt_dst = daddr;
2203 rth->rt_src = saddr; 2191 rth->rt_src = saddr;
2204#ifdef CONFIG_IP_ROUTE_CLASSID 2192#ifdef CONFIG_IP_ROUTE_CLASSID
2205 rth->dst.tclassid = itag; 2193 rth->dst.tclassid = itag;
2206#endif 2194#endif
2207 rth->rt_route_iif = dev->ifindex; 2195 rth->rt_route_iif = dev->ifindex;
2208 rth->rt_iif = dev->ifindex; 2196 rth->rt_iif = dev->ifindex;
2209 rth->dst.dev = net->loopback_dev; 2197 rth->rt_oif = 0;
2210 dev_hold(rth->dst.dev); 2198 rth->rt_mark = skb->mark;
2211 rth->rt_gateway = daddr; 2199 rth->rt_gateway = daddr;
2212 rth->rt_spec_dst= spec_dst; 2200 rth->rt_spec_dst= spec_dst;
2213 rth->dst.input= ip_local_deliver; 2201 rth->rt_peer_genid = 0;
2214 rth->rt_flags = flags|RTCF_LOCAL; 2202 rth->peer = NULL;
2203 rth->fi = NULL;
2215 if (res.type == RTN_UNREACHABLE) { 2204 if (res.type == RTN_UNREACHABLE) {
2216 rth->dst.input= ip_error; 2205 rth->dst.input= ip_error;
2217 rth->dst.error= -err; 2206 rth->dst.error= -err;
2218 rth->rt_flags &= ~RTCF_LOCAL; 2207 rth->rt_flags &= ~RTCF_LOCAL;
2219 } 2208 }
2220 rth->rt_type = res.type;
2221 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); 2209 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2222 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); 2210 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2223 err = 0; 2211 err = 0;
@@ -2288,7 +2276,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2288 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | 2276 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2289 (rth->rt_iif ^ iif) | 2277 (rth->rt_iif ^ iif) |
2290 rth->rt_oif | 2278 rth->rt_oif |
2291 (rth->rt_tos ^ tos)) == 0 && 2279 (rth->rt_key_tos ^ tos)) == 0 &&
2292 rth->rt_mark == skb->mark && 2280 rth->rt_mark == skb->mark &&
2293 net_eq(dev_net(rth->dst.dev), net) && 2281 net_eq(dev_net(rth->dst.dev), net) &&
2294 !rt_is_expired(rth)) { 2282 !rt_is_expired(rth)) {
@@ -2349,12 +2337,12 @@ EXPORT_SYMBOL(ip_route_input_common);
2349/* called with rcu_read_lock() */ 2337/* called with rcu_read_lock() */
2350static struct rtable *__mkroute_output(const struct fib_result *res, 2338static struct rtable *__mkroute_output(const struct fib_result *res,
2351 const struct flowi4 *fl4, 2339 const struct flowi4 *fl4,
2352 const struct flowi4 *oldflp4, 2340 __be32 orig_daddr, __be32 orig_saddr,
2353 struct net_device *dev_out, 2341 int orig_oif, struct net_device *dev_out,
2354 unsigned int flags) 2342 unsigned int flags)
2355{ 2343{
2356 struct fib_info *fi = res->fi; 2344 struct fib_info *fi = res->fi;
2357 u32 tos = RT_FL_TOS(oldflp4); 2345 u32 tos = RT_FL_TOS(fl4);
2358 struct in_device *in_dev; 2346 struct in_device *in_dev;
2359 u16 type = res->type; 2347 u16 type = res->type;
2360 struct rtable *rth; 2348 struct rtable *rth;
@@ -2381,8 +2369,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2381 fi = NULL; 2369 fi = NULL;
2382 } else if (type == RTN_MULTICAST) { 2370 } else if (type == RTN_MULTICAST) {
2383 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2371 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2384 if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, 2372 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2385 oldflp4->flowi4_proto)) 2373 fl4->flowi4_proto))
2386 flags &= ~RTCF_LOCAL; 2374 flags &= ~RTCF_LOCAL;
2387 /* If multicast route do not exist use 2375 /* If multicast route do not exist use
2388 * default one, but do not gateway in this case. 2376 * default one, but do not gateway in this case.
@@ -2392,29 +2380,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2392 fi = NULL; 2380 fi = NULL;
2393 } 2381 }
2394 2382
2395 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), 2383 rth = rt_dst_alloc(dev_out,
2384 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2396 IN_DEV_CONF_GET(in_dev, NOXFRM)); 2385 IN_DEV_CONF_GET(in_dev, NOXFRM));
2397 if (!rth) 2386 if (!rth)
2398 return ERR_PTR(-ENOBUFS); 2387 return ERR_PTR(-ENOBUFS);
2399 2388
2400 rth->rt_key_dst = oldflp4->daddr; 2389 rth->dst.output = ip_output;
2401 rth->rt_tos = tos; 2390
2402 rth->rt_key_src = oldflp4->saddr; 2391 rth->rt_key_dst = orig_daddr;
2403 rth->rt_oif = oldflp4->flowi4_oif; 2392 rth->rt_key_src = orig_saddr;
2404 rth->rt_mark = oldflp4->flowi4_mark; 2393 rth->rt_genid = rt_genid(dev_net(dev_out));
2394 rth->rt_flags = flags;
2395 rth->rt_type = type;
2396 rth->rt_key_tos = tos;
2405 rth->rt_dst = fl4->daddr; 2397 rth->rt_dst = fl4->daddr;
2406 rth->rt_src = fl4->saddr; 2398 rth->rt_src = fl4->saddr;
2407 rth->rt_route_iif = 0; 2399 rth->rt_route_iif = 0;
2408 rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; 2400 rth->rt_iif = orig_oif ? : dev_out->ifindex;
2409 /* get references to the devices that are to be hold by the routing 2401 rth->rt_oif = orig_oif;
2410 cache entry */ 2402 rth->rt_mark = fl4->flowi4_mark;
2411 rth->dst.dev = dev_out;
2412 dev_hold(dev_out);
2413 rth->rt_gateway = fl4->daddr; 2403 rth->rt_gateway = fl4->daddr;
2414 rth->rt_spec_dst= fl4->saddr; 2404 rth->rt_spec_dst= fl4->saddr;
2415 2405 rth->rt_peer_genid = 0;
2416 rth->dst.output=ip_output; 2406 rth->peer = NULL;
2417 rth->rt_genid = rt_genid(dev_net(dev_out)); 2407 rth->fi = NULL;
2418 2408
2419 RT_CACHE_STAT_INC(out_slow_tot); 2409 RT_CACHE_STAT_INC(out_slow_tot);
2420 2410
@@ -2432,7 +2422,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2432#ifdef CONFIG_IP_MROUTE 2422#ifdef CONFIG_IP_MROUTE
2433 if (type == RTN_MULTICAST) { 2423 if (type == RTN_MULTICAST) {
2434 if (IN_DEV_MFORWARD(in_dev) && 2424 if (IN_DEV_MFORWARD(in_dev) &&
2435 !ipv4_is_local_multicast(oldflp4->daddr)) { 2425 !ipv4_is_local_multicast(fl4->daddr)) {
2436 rth->dst.input = ip_mr_input; 2426 rth->dst.input = ip_mr_input;
2437 rth->dst.output = ip_mc_output; 2427 rth->dst.output = ip_mc_output;
2438 } 2428 }
@@ -2440,9 +2430,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2440#endif 2430#endif
2441 } 2431 }
2442 2432
2443 rt_set_nexthop(rth, oldflp4, res, fi, type, 0); 2433 rt_set_nexthop(rth, fl4, res, fi, type, 0);
2444 2434
2445 rth->rt_flags = flags;
2446 return rth; 2435 return rth;
2447} 2436}
2448 2437
@@ -2451,36 +2440,37 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2451 * called with rcu_read_lock(); 2440 * called with rcu_read_lock();
2452 */ 2441 */
2453 2442
2454static struct rtable *ip_route_output_slow(struct net *net, 2443static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2455 const struct flowi4 *oldflp4)
2456{ 2444{
2457 u32 tos = RT_FL_TOS(oldflp4);
2458 struct flowi4 fl4;
2459 struct fib_result res;
2460 unsigned int flags = 0;
2461 struct net_device *dev_out = NULL; 2445 struct net_device *dev_out = NULL;
2446 u32 tos = RT_FL_TOS(fl4);
2447 unsigned int flags = 0;
2448 struct fib_result res;
2462 struct rtable *rth; 2449 struct rtable *rth;
2450 __be32 orig_daddr;
2451 __be32 orig_saddr;
2452 int orig_oif;
2463 2453
2464 res.fi = NULL; 2454 res.fi = NULL;
2465#ifdef CONFIG_IP_MULTIPLE_TABLES 2455#ifdef CONFIG_IP_MULTIPLE_TABLES
2466 res.r = NULL; 2456 res.r = NULL;
2467#endif 2457#endif
2468 2458
2469 fl4.flowi4_oif = oldflp4->flowi4_oif; 2459 orig_daddr = fl4->daddr;
2470 fl4.flowi4_iif = net->loopback_dev->ifindex; 2460 orig_saddr = fl4->saddr;
2471 fl4.flowi4_mark = oldflp4->flowi4_mark; 2461 orig_oif = fl4->flowi4_oif;
2472 fl4.daddr = oldflp4->daddr; 2462
2473 fl4.saddr = oldflp4->saddr; 2463 fl4->flowi4_iif = net->loopback_dev->ifindex;
2474 fl4.flowi4_tos = tos & IPTOS_RT_MASK; 2464 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2475 fl4.flowi4_scope = ((tos & RTO_ONLINK) ? 2465 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2476 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); 2466 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2477 2467
2478 rcu_read_lock(); 2468 rcu_read_lock();
2479 if (oldflp4->saddr) { 2469 if (fl4->saddr) {
2480 rth = ERR_PTR(-EINVAL); 2470 rth = ERR_PTR(-EINVAL);
2481 if (ipv4_is_multicast(oldflp4->saddr) || 2471 if (ipv4_is_multicast(fl4->saddr) ||
2482 ipv4_is_lbcast(oldflp4->saddr) || 2472 ipv4_is_lbcast(fl4->saddr) ||
2483 ipv4_is_zeronet(oldflp4->saddr)) 2473 ipv4_is_zeronet(fl4->saddr))
2484 goto out; 2474 goto out;
2485 2475
2486 /* I removed check for oif == dev_out->oif here. 2476 /* I removed check for oif == dev_out->oif here.
@@ -2491,11 +2481,11 @@ static struct rtable *ip_route_output_slow(struct net *net,
2491 of another iface. --ANK 2481 of another iface. --ANK
2492 */ 2482 */
2493 2483
2494 if (oldflp4->flowi4_oif == 0 && 2484 if (fl4->flowi4_oif == 0 &&
2495 (ipv4_is_multicast(oldflp4->daddr) || 2485 (ipv4_is_multicast(fl4->daddr) ||
2496 ipv4_is_lbcast(oldflp4->daddr))) { 2486 ipv4_is_lbcast(fl4->daddr))) {
2497 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2487 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2498 dev_out = __ip_dev_find(net, oldflp4->saddr, false); 2488 dev_out = __ip_dev_find(net, fl4->saddr, false);
2499 if (dev_out == NULL) 2489 if (dev_out == NULL)
2500 goto out; 2490 goto out;
2501 2491
@@ -2514,20 +2504,20 @@ static struct rtable *ip_route_output_slow(struct net *net,
2514 Luckily, this hack is good workaround. 2504 Luckily, this hack is good workaround.
2515 */ 2505 */
2516 2506
2517 fl4.flowi4_oif = dev_out->ifindex; 2507 fl4->flowi4_oif = dev_out->ifindex;
2518 goto make_route; 2508 goto make_route;
2519 } 2509 }
2520 2510
2521 if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { 2511 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2522 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2512 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2523 if (!__ip_dev_find(net, oldflp4->saddr, false)) 2513 if (!__ip_dev_find(net, fl4->saddr, false))
2524 goto out; 2514 goto out;
2525 } 2515 }
2526 } 2516 }
2527 2517
2528 2518
2529 if (oldflp4->flowi4_oif) { 2519 if (fl4->flowi4_oif) {
2530 dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); 2520 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2531 rth = ERR_PTR(-ENODEV); 2521 rth = ERR_PTR(-ENODEV);
2532 if (dev_out == NULL) 2522 if (dev_out == NULL)
2533 goto out; 2523 goto out;
@@ -2537,37 +2527,37 @@ static struct rtable *ip_route_output_slow(struct net *net,
2537 rth = ERR_PTR(-ENETUNREACH); 2527 rth = ERR_PTR(-ENETUNREACH);
2538 goto out; 2528 goto out;
2539 } 2529 }
2540 if (ipv4_is_local_multicast(oldflp4->daddr) || 2530 if (ipv4_is_local_multicast(fl4->daddr) ||
2541 ipv4_is_lbcast(oldflp4->daddr)) { 2531 ipv4_is_lbcast(fl4->daddr)) {
2542 if (!fl4.saddr) 2532 if (!fl4->saddr)
2543 fl4.saddr = inet_select_addr(dev_out, 0, 2533 fl4->saddr = inet_select_addr(dev_out, 0,
2544 RT_SCOPE_LINK); 2534 RT_SCOPE_LINK);
2545 goto make_route; 2535 goto make_route;
2546 } 2536 }
2547 if (!fl4.saddr) { 2537 if (fl4->saddr) {
2548 if (ipv4_is_multicast(oldflp4->daddr)) 2538 if (ipv4_is_multicast(fl4->daddr))
2549 fl4.saddr = inet_select_addr(dev_out, 0, 2539 fl4->saddr = inet_select_addr(dev_out, 0,
2550 fl4.flowi4_scope); 2540 fl4->flowi4_scope);
2551 else if (!oldflp4->daddr) 2541 else if (!fl4->daddr)
2552 fl4.saddr = inet_select_addr(dev_out, 0, 2542 fl4->saddr = inet_select_addr(dev_out, 0,
2553 RT_SCOPE_HOST); 2543 RT_SCOPE_HOST);
2554 } 2544 }
2555 } 2545 }
2556 2546
2557 if (!fl4.daddr) { 2547 if (!fl4->daddr) {
2558 fl4.daddr = fl4.saddr; 2548 fl4->daddr = fl4->saddr;
2559 if (!fl4.daddr) 2549 if (!fl4->daddr)
2560 fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); 2550 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2561 dev_out = net->loopback_dev; 2551 dev_out = net->loopback_dev;
2562 fl4.flowi4_oif = net->loopback_dev->ifindex; 2552 fl4->flowi4_oif = net->loopback_dev->ifindex;
2563 res.type = RTN_LOCAL; 2553 res.type = RTN_LOCAL;
2564 flags |= RTCF_LOCAL; 2554 flags |= RTCF_LOCAL;
2565 goto make_route; 2555 goto make_route;
2566 } 2556 }
2567 2557
2568 if (fib_lookup(net, &fl4, &res)) { 2558 if (fib_lookup(net, fl4, &res)) {
2569 res.fi = NULL; 2559 res.fi = NULL;
2570 if (oldflp4->flowi4_oif) { 2560 if (fl4->flowi4_oif) {
2571 /* Apparently, routing tables are wrong. Assume, 2561 /* Apparently, routing tables are wrong. Assume,
2572 that the destination is on link. 2562 that the destination is on link.
2573 2563
@@ -2586,9 +2576,9 @@ static struct rtable *ip_route_output_slow(struct net *net,
2586 likely IPv6, but we do not. 2576 likely IPv6, but we do not.
2587 */ 2577 */
2588 2578
2589 if (fl4.saddr == 0) 2579 if (fl4->saddr == 0)
2590 fl4.saddr = inet_select_addr(dev_out, 0, 2580 fl4->saddr = inet_select_addr(dev_out, 0,
2591 RT_SCOPE_LINK); 2581 RT_SCOPE_LINK);
2592 res.type = RTN_UNICAST; 2582 res.type = RTN_UNICAST;
2593 goto make_route; 2583 goto make_route;
2594 } 2584 }
@@ -2597,42 +2587,45 @@ static struct rtable *ip_route_output_slow(struct net *net,
2597 } 2587 }
2598 2588
2599 if (res.type == RTN_LOCAL) { 2589 if (res.type == RTN_LOCAL) {
2600 if (!fl4.saddr) { 2590 if (!fl4->saddr) {
2601 if (res.fi->fib_prefsrc) 2591 if (res.fi->fib_prefsrc)
2602 fl4.saddr = res.fi->fib_prefsrc; 2592 fl4->saddr = res.fi->fib_prefsrc;
2603 else 2593 else
2604 fl4.saddr = fl4.daddr; 2594 fl4->saddr = fl4->daddr;
2605 } 2595 }
2606 dev_out = net->loopback_dev; 2596 dev_out = net->loopback_dev;
2607 fl4.flowi4_oif = dev_out->ifindex; 2597 fl4->flowi4_oif = dev_out->ifindex;
2608 res.fi = NULL; 2598 res.fi = NULL;
2609 flags |= RTCF_LOCAL; 2599 flags |= RTCF_LOCAL;
2610 goto make_route; 2600 goto make_route;
2611 } 2601 }
2612 2602
2613#ifdef CONFIG_IP_ROUTE_MULTIPATH 2603#ifdef CONFIG_IP_ROUTE_MULTIPATH
2614 if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) 2604 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2615 fib_select_multipath(&res); 2605 fib_select_multipath(&res);
2616 else 2606 else
2617#endif 2607#endif
2618 if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) 2608 if (!res.prefixlen &&
2609 res.table->tb_num_default > 1 &&
2610 res.type == RTN_UNICAST && !fl4->flowi4_oif)
2619 fib_select_default(&res); 2611 fib_select_default(&res);
2620 2612
2621 if (!fl4.saddr) 2613 if (!fl4->saddr)
2622 fl4.saddr = FIB_RES_PREFSRC(net, res); 2614 fl4->saddr = FIB_RES_PREFSRC(net, res);
2623 2615
2624 dev_out = FIB_RES_DEV(res); 2616 dev_out = FIB_RES_DEV(res);
2625 fl4.flowi4_oif = dev_out->ifindex; 2617 fl4->flowi4_oif = dev_out->ifindex;
2626 2618
2627 2619
2628make_route: 2620make_route:
2629 rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); 2621 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2622 dev_out, flags);
2630 if (!IS_ERR(rth)) { 2623 if (!IS_ERR(rth)) {
2631 unsigned int hash; 2624 unsigned int hash;
2632 2625
2633 hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, 2626 hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
2634 rt_genid(dev_net(dev_out))); 2627 rt_genid(dev_net(dev_out)));
2635 rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); 2628 rth = rt_intern_hash(hash, rth, NULL, orig_oif);
2636 } 2629 }
2637 2630
2638out: 2631out:
@@ -2640,7 +2633,7 @@ out:
2640 return rth; 2633 return rth;
2641} 2634}
2642 2635
2643struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) 2636struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2644{ 2637{
2645 struct rtable *rth; 2638 struct rtable *rth;
2646 unsigned int hash; 2639 unsigned int hash;
@@ -2658,13 +2651,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
2658 rt_is_output_route(rth) && 2651 rt_is_output_route(rth) &&
2659 rth->rt_oif == flp4->flowi4_oif && 2652 rth->rt_oif == flp4->flowi4_oif &&
2660 rth->rt_mark == flp4->flowi4_mark && 2653 rth->rt_mark == flp4->flowi4_mark &&
2661 !((rth->rt_tos ^ flp4->flowi4_tos) & 2654 !((rth->rt_key_tos ^ flp4->flowi4_tos) &
2662 (IPTOS_RT_MASK | RTO_ONLINK)) && 2655 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2663 net_eq(dev_net(rth->dst.dev), net) && 2656 net_eq(dev_net(rth->dst.dev), net) &&
2664 !rt_is_expired(rth)) { 2657 !rt_is_expired(rth)) {
2665 dst_use(&rth->dst, jiffies); 2658 dst_use(&rth->dst, jiffies);
2666 RT_CACHE_STAT_INC(out_hit); 2659 RT_CACHE_STAT_INC(out_hit);
2667 rcu_read_unlock_bh(); 2660 rcu_read_unlock_bh();
2661 if (!flp4->saddr)
2662 flp4->saddr = rth->rt_src;
2663 if (!flp4->daddr)
2664 flp4->daddr = rth->rt_dst;
2668 return rth; 2665 return rth;
2669 } 2666 }
2670 RT_CACHE_STAT_INC(out_hlist_search); 2667 RT_CACHE_STAT_INC(out_hlist_search);
@@ -2709,7 +2706,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2709 2706
2710struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2707struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2711{ 2708{
2712 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); 2709 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
2713 struct rtable *ort = (struct rtable *) dst_orig; 2710 struct rtable *ort = (struct rtable *) dst_orig;
2714 2711
2715 if (rt) { 2712 if (rt) {
@@ -2726,7 +2723,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2726 2723
2727 rt->rt_key_dst = ort->rt_key_dst; 2724 rt->rt_key_dst = ort->rt_key_dst;
2728 rt->rt_key_src = ort->rt_key_src; 2725 rt->rt_key_src = ort->rt_key_src;
2729 rt->rt_tos = ort->rt_tos; 2726 rt->rt_key_tos = ort->rt_key_tos;
2730 rt->rt_route_iif = ort->rt_route_iif; 2727 rt->rt_route_iif = ort->rt_route_iif;
2731 rt->rt_iif = ort->rt_iif; 2728 rt->rt_iif = ort->rt_iif;
2732 rt->rt_oif = ort->rt_oif; 2729 rt->rt_oif = ort->rt_oif;
@@ -2762,15 +2759,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2762 if (IS_ERR(rt)) 2759 if (IS_ERR(rt))
2763 return rt; 2760 return rt;
2764 2761
2765 if (flp4->flowi4_proto) { 2762 if (flp4->flowi4_proto)
2766 if (!flp4->saddr)
2767 flp4->saddr = rt->rt_src;
2768 if (!flp4->daddr)
2769 flp4->daddr = rt->rt_dst;
2770 rt = (struct rtable *) xfrm_lookup(net, &rt->dst, 2763 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2771 flowi4_to_flowi(flp4), 2764 flowi4_to_flowi(flp4),
2772 sk, 0); 2765 sk, 0);
2773 }
2774 2766
2775 return rt; 2767 return rt;
2776} 2768}
@@ -2794,7 +2786,7 @@ static int rt_fill_info(struct net *net,
2794 r->rtm_family = AF_INET; 2786 r->rtm_family = AF_INET;
2795 r->rtm_dst_len = 32; 2787 r->rtm_dst_len = 32;
2796 r->rtm_src_len = 0; 2788 r->rtm_src_len = 0;
2797 r->rtm_tos = rt->rt_tos; 2789 r->rtm_tos = rt->rt_key_tos;
2798 r->rtm_table = RT_TABLE_MAIN; 2790 r->rtm_table = RT_TABLE_MAIN;
2799 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2791 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2800 r->rtm_type = rt->rt_type; 2792 r->rtm_type = rt->rt_type;
@@ -2848,7 +2840,9 @@ static int rt_fill_info(struct net *net,
2848 2840
2849 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && 2841 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2850 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { 2842 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2851 int err = ipmr_get_route(net, skb, r, nowait); 2843 int err = ipmr_get_route(net, skb,
2844 rt->rt_src, rt->rt_dst,
2845 r, nowait);
2852 if (err <= 0) { 2846 if (err <= 0) {
2853 if (!nowait) { 2847 if (!nowait) {
2854 if (err == 0) 2848 if (err == 0)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8b44c6d2a79b..26461492a847 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 321 * the ACK carries the same options again (see RFC1122 4.2.3.8)
322 */ 322 */
323 if (opt && opt->optlen) { 323 if (opt && opt->optlen) {
324 int opt_size = sizeof(struct ip_options) + opt->optlen; 324 int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
325 325
326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC); 326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
327 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { 327 if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
328 kfree(ireq->opt); 328 kfree(ireq->opt);
329 ireq->opt = NULL; 329 ireq->opt = NULL;
330 } 330 }
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
345 * no easy way to do this. 345 * no easy way to do this.
346 */ 346 */
347 { 347 {
348 struct flowi4 fl4 = { 348 struct flowi4 fl4;
349 .flowi4_mark = sk->sk_mark, 349
350 .daddr = ((opt && opt->srr) ? 350 flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
351 opt->faddr : ireq->rmt_addr), 351 RT_SCOPE_UNIVERSE, IPPROTO_TCP,
352 .saddr = ireq->loc_addr, 352 inet_sk_flowi_flags(sk),
353 .flowi4_tos = RT_CONN_FLAGS(sk), 353 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
354 .flowi4_proto = IPPROTO_TCP, 354 ireq->loc_addr, th->source, th->dest);
355 .flowi4_flags = inet_sk_flowi_flags(sk),
356 .fl4_sport = th->dest,
357 .fl4_dport = th->source,
358 };
359 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 355 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
360 rt = ip_route_output_key(sock_net(sk), &fl4); 356 rt = ip_route_output_key(sock_net(sk), &fl4);
361 if (IS_ERR(rt)) { 357 if (IS_ERR(rt)) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 321e6e84dbcc..57d0752e239a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,6 +13,7 @@
13#include <linux/seqlock.h> 13#include <linux/seqlock.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/nsproxy.h>
16#include <net/snmp.h> 17#include <net/snmp.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/ip.h> 19#include <net/ip.h>
@@ -21,6 +22,7 @@
21#include <net/udp.h> 22#include <net/udp.h>
22#include <net/cipso_ipv4.h> 23#include <net/cipso_ipv4.h>
23#include <net/inet_frag.h> 24#include <net/inet_frag.h>
25#include <net/ping.h>
24 26
25static int zero; 27static int zero;
26static int tcp_retr1_max = 255; 28static int tcp_retr1_max = 255;
@@ -30,6 +32,8 @@ static int tcp_adv_win_scale_min = -31;
30static int tcp_adv_win_scale_max = 31; 32static int tcp_adv_win_scale_max = 31;
31static int ip_ttl_min = 1; 33static int ip_ttl_min = 1;
32static int ip_ttl_max = 255; 34static int ip_ttl_max = 255;
35static int ip_ping_group_range_min[] = { 0, 0 };
36static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
33 37
34/* Update system visible IP port range */ 38/* Update system visible IP port range */
35static void set_local_port_range(int range[2]) 39static void set_local_port_range(int range[2])
@@ -68,6 +72,53 @@ static int ipv4_local_port_range(ctl_table *table, int write,
68 return ret; 72 return ret;
69} 73}
70 74
75
76void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
77{
78 gid_t *data = table->data;
79 unsigned seq;
80 do {
81 seq = read_seqbegin(&sysctl_local_ports.lock);
82
83 *low = data[0];
84 *high = data[1];
85 } while (read_seqretry(&sysctl_local_ports.lock, seq));
86}
87
88/* Update system visible IP port range */
89static void set_ping_group_range(struct ctl_table *table, int range[2])
90{
91 gid_t *data = table->data;
92 write_seqlock(&sysctl_local_ports.lock);
93 data[0] = range[0];
94 data[1] = range[1];
95 write_sequnlock(&sysctl_local_ports.lock);
96}
97
98/* Validate changes from /proc interface. */
99static int ipv4_ping_group_range(ctl_table *table, int write,
100 void __user *buffer,
101 size_t *lenp, loff_t *ppos)
102{
103 int ret;
104 gid_t range[2];
105 ctl_table tmp = {
106 .data = &range,
107 .maxlen = sizeof(range),
108 .mode = table->mode,
109 .extra1 = &ip_ping_group_range_min,
110 .extra2 = &ip_ping_group_range_max,
111 };
112
113 inet_get_ping_group_range_table(table, range, range + 1);
114 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
115
116 if (write && ret == 0)
117 set_ping_group_range(table, range);
118
119 return ret;
120}
121
71static int proc_tcp_congestion_control(ctl_table *ctl, int write, 122static int proc_tcp_congestion_control(ctl_table *ctl, int write,
72 void __user *buffer, size_t *lenp, loff_t *ppos) 123 void __user *buffer, size_t *lenp, loff_t *ppos)
73{ 124{
@@ -677,6 +728,13 @@ static struct ctl_table ipv4_net_table[] = {
677 .mode = 0644, 728 .mode = 0644,
678 .proc_handler = proc_dointvec 729 .proc_handler = proc_dointvec
679 }, 730 },
731 {
732 .procname = "ping_group_range",
733 .data = &init_net.ipv4.sysctl_ping_group_range,
734 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
735 .mode = 0644,
736 .proc_handler = ipv4_ping_group_range,
737 },
680 { } 738 { }
681}; 739};
682 740
@@ -711,8 +769,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
711 &net->ipv4.sysctl_icmp_ratemask; 769 &net->ipv4.sysctl_icmp_ratemask;
712 table[6].data = 770 table[6].data =
713 &net->ipv4.sysctl_rt_cache_rebuild_count; 771 &net->ipv4.sysctl_rt_cache_rebuild_count;
772 table[7].data =
773 &net->ipv4.sysctl_ping_group_range;
774
714 } 775 }
715 776
777 /*
778 * Sane defaults - nobody may create ping sockets.
779 * Boot scripts should set this to distro-specific group.
780 */
781 net->ipv4.sysctl_ping_group_range[0] = 1;
782 net->ipv4.sysctl_ping_group_range[1] = 0;
783
716 net->ipv4.sysctl_rt_cache_rebuild_count = 4; 784 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
717 785
718 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 786 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
999 /* We have some space in skb head. Superb! */ 999 /* We have some space in skb head. Superb! */
1000 if (copy > skb_tailroom(skb)) 1000 if (copy > skb_tailroom(skb))
1001 copy = skb_tailroom(skb); 1001 copy = skb_tailroom(skb);
1002 if ((err = skb_add_data(skb, from, copy)) != 0) 1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err)
1003 goto do_fault; 1004 goto do_fault;
1004 } else { 1005 } else {
1005 int merge = 0; 1006 int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
1042 1043
1043 /* Time to copy data. We are close to 1044 /* Time to copy data. We are close to
1044 * the end! */ 1045 * the end! */
1045 err = skb_copy_to_page(sk, from, skb, page, 1046 err = skb_copy_to_page_nocache(sk, from, skb,
1046 off, copy); 1047 page, off, copy);
1047 if (err) { 1048 if (err) {
1048 /* If this page was new, give it to the 1049 /* If this page was new, give it to the
1049 * socket so it does not get leaked. 1050 * socket so it does not get leaked.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f7e6c2c2d2bb..a7d6671e33b8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{ 148{
149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk); 151 struct tcp_sock *tp = tcp_sk(sk);
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152 __be16 orig_sport, orig_dport; 152 __be16 orig_sport, orig_dport;
153 struct rtable *rt;
154 __be32 daddr, nexthop; 153 __be32 daddr, nexthop;
154 struct flowi4 *fl4;
155 struct rtable *rt;
155 int err; 156 int err;
157 struct ip_options_rcu *inet_opt;
156 158
157 if (addr_len < sizeof(struct sockaddr_in)) 159 if (addr_len < sizeof(struct sockaddr_in))
158 return -EINVAL; 160 return -EINVAL;
@@ -161,15 +163,18 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
161 return -EAFNOSUPPORT; 163 return -EAFNOSUPPORT;
162 164
163 nexthop = daddr = usin->sin_addr.s_addr; 165 nexthop = daddr = usin->sin_addr.s_addr;
164 if (inet->opt && inet->opt->srr) { 166 inet_opt = rcu_dereference_protected(inet->inet_opt,
167 sock_owned_by_user(sk));
168 if (inet_opt && inet_opt->opt.srr) {
165 if (!daddr) 169 if (!daddr)
166 return -EINVAL; 170 return -EINVAL;
167 nexthop = inet->opt->faddr; 171 nexthop = inet_opt->opt.faddr;
168 } 172 }
169 173
170 orig_sport = inet->inet_sport; 174 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port; 175 orig_dport = usin->sin_port;
172 rt = ip_route_connect(nexthop, inet->inet_saddr, 176 fl4 = &inet->cork.fl.u.ip4;
177 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 178 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP, 179 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true); 180 orig_sport, orig_dport, sk, true);
@@ -185,11 +190,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
185 return -ENETUNREACH; 190 return -ENETUNREACH;
186 } 191 }
187 192
188 if (!inet->opt || !inet->opt->srr) 193 if (!inet_opt || !inet_opt->opt.srr)
189 daddr = rt->rt_dst; 194 daddr = fl4->daddr;
190 195
191 if (!inet->inet_saddr) 196 if (!inet->inet_saddr)
192 inet->inet_saddr = rt->rt_src; 197 inet->inet_saddr = fl4->saddr;
193 inet->inet_rcv_saddr = inet->inet_saddr; 198 inet->inet_rcv_saddr = inet->inet_saddr;
194 199
195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 200 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -200,8 +205,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
200 } 205 }
201 206
202 if (tcp_death_row.sysctl_tw_recycle && 207 if (tcp_death_row.sysctl_tw_recycle &&
203 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 208 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
204 struct inet_peer *peer = rt_get_peer(rt); 209 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
205 /* 210 /*
206 * VJ's idea. We save last timestamp seen from 211 * VJ's idea. We save last timestamp seen from
207 * the destination in peer table, when entering state 212 * the destination in peer table, when entering state
@@ -221,8 +226,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
221 inet->inet_daddr = daddr; 226 inet->inet_daddr = daddr;
222 227
223 inet_csk(sk)->icsk_ext_hdr_len = 0; 228 inet_csk(sk)->icsk_ext_hdr_len = 0;
224 if (inet->opt) 229 if (inet_opt)
225 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 230 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
226 231
227 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 232 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
228 233
@@ -236,8 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
236 if (err) 241 if (err)
237 goto failure; 242 goto failure;
238 243
239 rt = ip_route_newports(rt, IPPROTO_TCP, 244 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
240 orig_sport, orig_dport,
241 inet->inet_sport, inet->inet_dport, sk); 245 inet->inet_sport, inet->inet_dport, sk);
242 if (IS_ERR(rt)) { 246 if (IS_ERR(rt)) {
243 err = PTR_ERR(rt); 247 err = PTR_ERR(rt);
@@ -279,7 +283,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
279/* 283/*
280 * This routine does path mtu discovery as defined in RFC1191. 284 * This routine does path mtu discovery as defined in RFC1191.
281 */ 285 */
282static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 286static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
283{ 287{
284 struct dst_entry *dst; 288 struct dst_entry *dst;
285 struct inet_sock *inet = inet_sk(sk); 289 struct inet_sock *inet = inet_sk(sk);
@@ -341,7 +345,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
341 345
342void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 346void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
343{ 347{
344 struct iphdr *iph = (struct iphdr *)icmp_skb->data; 348 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
345 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 349 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
346 struct inet_connection_sock *icsk; 350 struct inet_connection_sock *icsk;
347 struct tcp_sock *tp; 351 struct tcp_sock *tp;
@@ -647,7 +651,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
647 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 651 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
648 652
649 net = dev_net(skb_dst(skb)->dev); 653 net = dev_net(skb_dst(skb)->dev);
650 ip_send_reply(net->ipv4.tcp_sock, skb, 654 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
651 &arg, arg.iov[0].iov_len); 655 &arg, arg.iov[0].iov_len);
652 656
653 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 657 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -722,7 +726,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
722 if (oif) 726 if (oif)
723 arg.bound_dev_if = oif; 727 arg.bound_dev_if = oif;
724 728
725 ip_send_reply(net->ipv4.tcp_sock, skb, 729 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
726 &arg, arg.iov[0].iov_len); 730 &arg, arg.iov[0].iov_len);
727 731
728 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 732 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -765,11 +769,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
765 struct request_values *rvp) 769 struct request_values *rvp)
766{ 770{
767 const struct inet_request_sock *ireq = inet_rsk(req); 771 const struct inet_request_sock *ireq = inet_rsk(req);
772 struct flowi4 fl4;
768 int err = -1; 773 int err = -1;
769 struct sk_buff * skb; 774 struct sk_buff * skb;
770 775
771 /* First, grab a route. */ 776 /* First, grab a route. */
772 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 777 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
773 return -1; 778 return -1;
774 779
775 skb = tcp_make_synack(sk, dst, req, rvp); 780 skb = tcp_make_synack(sk, dst, req, rvp);
@@ -820,17 +825,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
820/* 825/*
821 * Save and compile IPv4 options into the request_sock if needed. 826 * Save and compile IPv4 options into the request_sock if needed.
822 */ 827 */
823static struct ip_options *tcp_v4_save_options(struct sock *sk, 828static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
824 struct sk_buff *skb) 829 struct sk_buff *skb)
825{ 830{
826 struct ip_options *opt = &(IPCB(skb)->opt); 831 const struct ip_options *opt = &(IPCB(skb)->opt);
827 struct ip_options *dopt = NULL; 832 struct ip_options_rcu *dopt = NULL;
828 833
829 if (opt && opt->optlen) { 834 if (opt && opt->optlen) {
830 int opt_size = optlength(opt); 835 int opt_size = sizeof(*dopt) + opt->optlen;
836
831 dopt = kmalloc(opt_size, GFP_ATOMIC); 837 dopt = kmalloc(opt_size, GFP_ATOMIC);
832 if (dopt) { 838 if (dopt) {
833 if (ip_options_echo(dopt, skb)) { 839 if (ip_options_echo(&dopt->opt, skb)) {
834 kfree(dopt); 840 kfree(dopt);
835 dopt = NULL; 841 dopt = NULL;
836 } 842 }
@@ -1333,6 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1333 req->cookie_ts = tmp_opt.tstamp_ok; 1339 req->cookie_ts = tmp_opt.tstamp_ok;
1334 } else if (!isn) { 1340 } else if (!isn) {
1335 struct inet_peer *peer = NULL; 1341 struct inet_peer *peer = NULL;
1342 struct flowi4 fl4;
1336 1343
1337 /* VJ's idea. We save last timestamp seen 1344 /* VJ's idea. We save last timestamp seen
1338 * from the destination in peer table, when entering 1345 * from the destination in peer table, when entering
@@ -1345,9 +1352,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1345 */ 1352 */
1346 if (tmp_opt.saw_tstamp && 1353 if (tmp_opt.saw_tstamp &&
1347 tcp_death_row.sysctl_tw_recycle && 1354 tcp_death_row.sysctl_tw_recycle &&
1348 (dst = inet_csk_route_req(sk, req)) != NULL && 1355 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1349 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1356 fl4.daddr == saddr &&
1350 peer->daddr.addr.a4 == saddr) { 1357 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1351 inet_peer_refcheck(peer); 1358 inet_peer_refcheck(peer);
1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1359 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1353 (s32)(peer->tcp_ts - req->ts_recent) > 1360 (s32)(peer->tcp_ts - req->ts_recent) >
@@ -1411,19 +1418,16 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1411#ifdef CONFIG_TCP_MD5SIG 1418#ifdef CONFIG_TCP_MD5SIG
1412 struct tcp_md5sig_key *key; 1419 struct tcp_md5sig_key *key;
1413#endif 1420#endif
1421 struct ip_options_rcu *inet_opt;
1414 1422
1415 if (sk_acceptq_is_full(sk)) 1423 if (sk_acceptq_is_full(sk))
1416 goto exit_overflow; 1424 goto exit_overflow;
1417 1425
1418 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1419 goto exit;
1420
1421 newsk = tcp_create_openreq_child(sk, req, skb); 1426 newsk = tcp_create_openreq_child(sk, req, skb);
1422 if (!newsk) 1427 if (!newsk)
1423 goto exit_nonewsk; 1428 goto exit_nonewsk;
1424 1429
1425 newsk->sk_gso_type = SKB_GSO_TCPV4; 1430 newsk->sk_gso_type = SKB_GSO_TCPV4;
1426 sk_setup_caps(newsk, dst);
1427 1431
1428 newtp = tcp_sk(newsk); 1432 newtp = tcp_sk(newsk);
1429 newinet = inet_sk(newsk); 1433 newinet = inet_sk(newsk);
@@ -1431,15 +1435,21 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1431 newinet->inet_daddr = ireq->rmt_addr; 1435 newinet->inet_daddr = ireq->rmt_addr;
1432 newinet->inet_rcv_saddr = ireq->loc_addr; 1436 newinet->inet_rcv_saddr = ireq->loc_addr;
1433 newinet->inet_saddr = ireq->loc_addr; 1437 newinet->inet_saddr = ireq->loc_addr;
1434 newinet->opt = ireq->opt; 1438 inet_opt = ireq->opt;
1439 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1435 ireq->opt = NULL; 1440 ireq->opt = NULL;
1436 newinet->mc_index = inet_iif(skb); 1441 newinet->mc_index = inet_iif(skb);
1437 newinet->mc_ttl = ip_hdr(skb)->ttl; 1442 newinet->mc_ttl = ip_hdr(skb)->ttl;
1438 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1443 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1439 if (newinet->opt) 1444 if (inet_opt)
1440 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1445 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1441 newinet->inet_id = newtp->write_seq ^ jiffies; 1446 newinet->inet_id = newtp->write_seq ^ jiffies;
1442 1447
1448 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1449 goto put_and_exit;
1450
1451 sk_setup_caps(newsk, dst);
1452
1443 tcp_mtup_init(newsk); 1453 tcp_mtup_init(newsk);
1444 tcp_sync_mss(newsk, dst_mtu(dst)); 1454 tcp_sync_mss(newsk, dst_mtu(dst));
1445 newtp->advmss = dst_metric_advmss(dst); 1455 newtp->advmss = dst_metric_advmss(dst);
@@ -1467,10 +1477,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1467 } 1477 }
1468#endif 1478#endif
1469 1479
1470 if (__inet_inherit_port(sk, newsk) < 0) { 1480 if (__inet_inherit_port(sk, newsk) < 0)
1471 sock_put(newsk); 1481 goto put_and_exit;
1472 goto exit;
1473 }
1474 __inet_hash_nolisten(newsk, NULL); 1482 __inet_hash_nolisten(newsk, NULL);
1475 1483
1476 return newsk; 1484 return newsk;
@@ -1482,6 +1490,9 @@ exit_nonewsk:
1482exit: 1490exit:
1483 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1484 return NULL; 1492 return NULL;
1493put_and_exit:
1494 sock_put(newsk);
1495 goto exit;
1485} 1496}
1486EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1497EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1487 1498
@@ -1764,12 +1775,13 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1764 struct inet_sock *inet = inet_sk(sk); 1775 struct inet_sock *inet = inet_sk(sk);
1765 struct inet_peer *peer; 1776 struct inet_peer *peer;
1766 1777
1767 if (!rt || rt->rt_dst != inet->inet_daddr) { 1778 if (!rt ||
1779 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1768 peer = inet_getpeer_v4(inet->inet_daddr, 1); 1780 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1769 *release_it = true; 1781 *release_it = true;
1770 } else { 1782 } else {
1771 if (!rt->peer) 1783 if (!rt->peer)
1772 rt_bind_peer(rt, 1); 1784 rt_bind_peer(rt, inet->inet_daddr, 1);
1773 peer = rt->peer; 1785 peer = rt->peer;
1774 *release_it = false; 1786 *release_it = false;
1775 } 1787 }
@@ -2359,7 +2371,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
2359 int ttd = req->expires - jiffies; 2371 int ttd = req->expires - jiffies;
2360 2372
2361 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2373 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2362 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", 2374 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2363 i, 2375 i,
2364 ireq->loc_addr, 2376 ireq->loc_addr,
2365 ntohs(inet_sk(sk)->inet_sport), 2377 ntohs(inet_sk(sk)->inet_sport),
@@ -2414,7 +2426,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2414 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2426 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2415 2427
2416 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2428 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2417 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", 2429 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2418 i, src, srcp, dest, destp, sk->sk_state, 2430 i, src, srcp, dest, destp, sk->sk_state,
2419 tp->write_seq - tp->snd_una, 2431 tp->write_seq - tp->snd_una,
2420 rx_queue, 2432 rx_queue,
@@ -2449,7 +2461,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw,
2449 srcp = ntohs(tw->tw_sport); 2461 srcp = ntohs(tw->tw_sport);
2450 2462
2451 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2463 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2452 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n", 2464 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2453 i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2465 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2454 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 2466 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2455 atomic_read(&tw->tw_refcnt), tw, len); 2467 atomic_read(&tw->tw_refcnt), tw, len);
@@ -2527,7 +2539,7 @@ void tcp4_proc_exit(void)
2527 2539
2528struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2540struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2529{ 2541{
2530 struct iphdr *iph = skb_gro_network_header(skb); 2542 const struct iphdr *iph = skb_gro_network_header(skb);
2531 2543
2532 switch (skb->ip_summed) { 2544 switch (skb->ip_summed) {
2533 case CHECKSUM_COMPLETE: 2545 case CHECKSUM_COMPLETE:
@@ -2548,7 +2560,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2548 2560
2549int tcp4_gro_complete(struct sk_buff *skb) 2561int tcp4_gro_complete(struct sk_buff *skb)
2550{ 2562{
2551 struct iphdr *iph = ip_hdr(skb); 2563 const struct iphdr *iph = ip_hdr(skb);
2552 struct tcphdr *th = tcp_hdr(skb); 2564 struct tcphdr *th = tcp_hdr(skb);
2553 2565
2554 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2566 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 17388c7f49c4..882e0b0964d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -899,7 +899,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
900 tcp_skb_pcount(skb)); 900 tcp_skb_pcount(skb));
901 901
902 err = icsk->icsk_af_ops->queue_xmit(skb); 902 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
903 if (likely(err <= 0)) 903 if (likely(err <= 0))
904 return err; 904 return err;
905 905
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f87a8eb76f3b..abca870d8ff6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -578,7 +578,7 @@ found:
578void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 578void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
579{ 579{
580 struct inet_sock *inet; 580 struct inet_sock *inet;
581 struct iphdr *iph = (struct iphdr *)skb->data; 581 const struct iphdr *iph = (const struct iphdr *)skb->data;
582 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 582 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
583 const int type = icmp_hdr(skb)->type; 583 const int type = icmp_hdr(skb)->type;
584 const int code = icmp_hdr(skb)->code; 584 const int code = icmp_hdr(skb)->code;
@@ -706,12 +706,11 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
706 } 706 }
707} 707}
708 708
709static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) 709static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
710{ 710{
711 struct sock *sk = skb->sk; 711 struct sock *sk = skb->sk;
712 struct inet_sock *inet = inet_sk(sk); 712 struct inet_sock *inet = inet_sk(sk);
713 struct udphdr *uh; 713 struct udphdr *uh;
714 struct rtable *rt = (struct rtable *)skb_dst(skb);
715 int err = 0; 714 int err = 0;
716 int is_udplite = IS_UDPLITE(sk); 715 int is_udplite = IS_UDPLITE(sk);
717 int offset = skb_transport_offset(skb); 716 int offset = skb_transport_offset(skb);
@@ -723,7 +722,7 @@ static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
723 */ 722 */
724 uh = udp_hdr(skb); 723 uh = udp_hdr(skb);
725 uh->source = inet->inet_sport; 724 uh->source = inet->inet_sport;
726 uh->dest = dport; 725 uh->dest = fl4->fl4_dport;
727 uh->len = htons(len); 726 uh->len = htons(len);
728 uh->check = 0; 727 uh->check = 0;
729 728
@@ -737,14 +736,14 @@ static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
737 736
738 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 737 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
739 738
740 udp4_hwcsum(skb, rt->rt_src, daddr); 739 udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
741 goto send; 740 goto send;
742 741
743 } else 742 } else
744 csum = udp_csum(skb); 743 csum = udp_csum(skb);
745 744
746 /* add protocol-dependent pseudo-header */ 745 /* add protocol-dependent pseudo-header */
747 uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, 746 uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
748 sk->sk_protocol, csum); 747 sk->sk_protocol, csum);
749 if (uh->check == 0) 748 if (uh->check == 0)
750 uh->check = CSUM_MANGLED_0; 749 uh->check = CSUM_MANGLED_0;
@@ -774,11 +773,11 @@ static int udp_push_pending_frames(struct sock *sk)
774 struct sk_buff *skb; 773 struct sk_buff *skb;
775 int err = 0; 774 int err = 0;
776 775
777 skb = ip_finish_skb(sk); 776 skb = ip_finish_skb(sk, fl4);
778 if (!skb) 777 if (!skb)
779 goto out; 778 goto out;
780 779
781 err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); 780 err = udp_send_skb(skb, fl4);
782 781
783out: 782out:
784 up->len = 0; 783 up->len = 0;
@@ -791,6 +790,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
791{ 790{
792 struct inet_sock *inet = inet_sk(sk); 791 struct inet_sock *inet = inet_sk(sk);
793 struct udp_sock *up = udp_sk(sk); 792 struct udp_sock *up = udp_sk(sk);
793 struct flowi4 fl4_stack;
794 struct flowi4 *fl4; 794 struct flowi4 *fl4;
795 int ulen = len; 795 int ulen = len;
796 struct ipcm_cookie ipc; 796 struct ipcm_cookie ipc;
@@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
806 struct sk_buff *skb; 806 struct sk_buff *skb;
807 struct ip_options_data opt_copy;
807 808
808 if (len > 0xFFFF) 809 if (len > 0xFFFF)
809 return -EMSGSIZE; 810 return -EMSGSIZE;
@@ -820,6 +821,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
820 821
821 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 822 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
822 823
824 fl4 = &inet->cork.fl.u.ip4;
823 if (up->pending) { 825 if (up->pending) {
824 /* 826 /*
825 * There are pending frames. 827 * There are pending frames.
@@ -877,22 +879,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
877 free = 1; 879 free = 1;
878 connected = 0; 880 connected = 0;
879 } 881 }
880 if (!ipc.opt) 882 if (!ipc.opt) {
881 ipc.opt = inet->opt; 883 struct ip_options_rcu *inet_opt;
884
885 rcu_read_lock();
886 inet_opt = rcu_dereference(inet->inet_opt);
887 if (inet_opt) {
888 memcpy(&opt_copy, inet_opt,
889 sizeof(*inet_opt) + inet_opt->opt.optlen);
890 ipc.opt = &opt_copy.opt;
891 }
892 rcu_read_unlock();
893 }
882 894
883 saddr = ipc.addr; 895 saddr = ipc.addr;
884 ipc.addr = faddr = daddr; 896 ipc.addr = faddr = daddr;
885 897
886 if (ipc.opt && ipc.opt->srr) { 898 if (ipc.opt && ipc.opt->opt.srr) {
887 if (!daddr) 899 if (!daddr)
888 return -EINVAL; 900 return -EINVAL;
889 faddr = ipc.opt->faddr; 901 faddr = ipc.opt->opt.faddr;
890 connected = 0; 902 connected = 0;
891 } 903 }
892 tos = RT_TOS(inet->tos); 904 tos = RT_TOS(inet->tos);
893 if (sock_flag(sk, SOCK_LOCALROUTE) || 905 if (sock_flag(sk, SOCK_LOCALROUTE) ||
894 (msg->msg_flags & MSG_DONTROUTE) || 906 (msg->msg_flags & MSG_DONTROUTE) ||
895 (ipc.opt && ipc.opt->is_strictroute)) { 907 (ipc.opt && ipc.opt->opt.is_strictroute)) {
896 tos |= RTO_ONLINK; 908 tos |= RTO_ONLINK;
897 connected = 0; 909 connected = 0;
898 } 910 }
@@ -909,22 +921,16 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
909 rt = (struct rtable *)sk_dst_check(sk, 0); 921 rt = (struct rtable *)sk_dst_check(sk, 0);
910 922
911 if (rt == NULL) { 923 if (rt == NULL) {
912 struct flowi4 fl4 = {
913 .flowi4_oif = ipc.oif,
914 .flowi4_mark = sk->sk_mark,
915 .daddr = faddr,
916 .saddr = saddr,
917 .flowi4_tos = tos,
918 .flowi4_proto = sk->sk_protocol,
919 .flowi4_flags = (inet_sk_flowi_flags(sk) |
920 FLOWI_FLAG_CAN_SLEEP),
921 .fl4_sport = inet->inet_sport,
922 .fl4_dport = dport,
923 };
924 struct net *net = sock_net(sk); 924 struct net *net = sock_net(sk);
925 925
926 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 926 fl4 = &fl4_stack;
927 rt = ip_route_output_flow(net, &fl4, sk); 927 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
928 RT_SCOPE_UNIVERSE, sk->sk_protocol,
929 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
930 faddr, saddr, dport, inet->inet_sport);
931
932 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
933 rt = ip_route_output_flow(net, fl4, sk);
928 if (IS_ERR(rt)) { 934 if (IS_ERR(rt)) {
929 err = PTR_ERR(rt); 935 err = PTR_ERR(rt);
930 rt = NULL; 936 rt = NULL;
@@ -945,18 +951,18 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
945 goto do_confirm; 951 goto do_confirm;
946back_from_confirm: 952back_from_confirm:
947 953
948 saddr = rt->rt_src; 954 saddr = fl4->saddr;
949 if (!ipc.addr) 955 if (!ipc.addr)
950 daddr = ipc.addr = rt->rt_dst; 956 daddr = ipc.addr = fl4->daddr;
951 957
952 /* Lockless fast path for the non-corking case. */ 958 /* Lockless fast path for the non-corking case. */
953 if (!corkreq) { 959 if (!corkreq) {
954 skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, 960 skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
955 sizeof(struct udphdr), &ipc, &rt, 961 sizeof(struct udphdr), &ipc, &rt,
956 msg->msg_flags); 962 msg->msg_flags);
957 err = PTR_ERR(skb); 963 err = PTR_ERR(skb);
958 if (skb && !IS_ERR(skb)) 964 if (skb && !IS_ERR(skb))
959 err = udp_send_skb(skb, daddr, dport); 965 err = udp_send_skb(skb, fl4);
960 goto out; 966 goto out;
961 } 967 }
962 968
@@ -982,9 +988,9 @@ back_from_confirm:
982 988
983do_append_data: 989do_append_data:
984 up->len += ulen; 990 up->len += ulen;
985 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, 991 err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
986 sizeof(struct udphdr), &ipc, &rt, 992 sizeof(struct udphdr), &ipc, &rt,
987 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 993 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
988 if (err) 994 if (err)
989 udp_flush_pending_frames(sk); 995 udp_flush_pending_frames(sk);
990 else if (!corkreq) 996 else if (!corkreq)
@@ -1024,6 +1030,7 @@ EXPORT_SYMBOL(udp_sendmsg);
1024int udp_sendpage(struct sock *sk, struct page *page, int offset, 1030int udp_sendpage(struct sock *sk, struct page *page, int offset,
1025 size_t size, int flags) 1031 size_t size, int flags)
1026{ 1032{
1033 struct inet_sock *inet = inet_sk(sk);
1027 struct udp_sock *up = udp_sk(sk); 1034 struct udp_sock *up = udp_sk(sk);
1028 int ret; 1035 int ret;
1029 1036
@@ -1048,7 +1055,8 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
1048 return -EINVAL; 1055 return -EINVAL;
1049 } 1056 }
1050 1057
1051 ret = ip_append_page(sk, page, offset, size, flags); 1058 ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
1059 page, offset, size, flags);
1052 if (ret == -EOPNOTSUPP) { 1060 if (ret == -EOPNOTSUPP) {
1053 release_sock(sk); 1061 release_sock(sk);
1054 return sock_no_sendpage(sk->sk_socket, page, offset, 1062 return sock_no_sendpage(sk->sk_socket, page, offset,
@@ -2082,7 +2090,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
2082 __u16 srcp = ntohs(inet->inet_sport); 2090 __u16 srcp = ntohs(inet->inet_sport);
2083 2091
2084 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 2092 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
2085 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", 2093 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
2086 bucket, src, srcp, dest, destp, sp->sk_state, 2094 bucket, src, srcp, dest, destp, sp->sk_state,
2087 sk_wmem_alloc_get(sp), 2095 sk_wmem_alloc_get(sp),
2088 sk_rmem_alloc_get(sp), 2096 sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d20a05e970d8..981e43eaf704 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -18,38 +18,46 @@
18 18
19static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 19static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
20 20
21static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, 21static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
22 const xfrm_address_t *saddr, 22 int tos,
23 const xfrm_address_t *daddr) 23 const xfrm_address_t *saddr,
24 const xfrm_address_t *daddr)
24{ 25{
25 struct flowi4 fl4 = {
26 .daddr = daddr->a4,
27 .flowi4_tos = tos,
28 };
29 struct rtable *rt; 26 struct rtable *rt;
30 27
28 memset(fl4, 0, sizeof(*fl4));
29 fl4->daddr = daddr->a4;
30 fl4->flowi4_tos = tos;
31 if (saddr) 31 if (saddr)
32 fl4.saddr = saddr->a4; 32 fl4->saddr = saddr->a4;
33 33
34 rt = __ip_route_output_key(net, &fl4); 34 rt = __ip_route_output_key(net, fl4);
35 if (!IS_ERR(rt)) 35 if (!IS_ERR(rt))
36 return &rt->dst; 36 return &rt->dst;
37 37
38 return ERR_CAST(rt); 38 return ERR_CAST(rt);
39} 39}
40 40
41static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
42 const xfrm_address_t *saddr,
43 const xfrm_address_t *daddr)
44{
45 struct flowi4 fl4;
46
47 return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);
48}
49
41static int xfrm4_get_saddr(struct net *net, 50static int xfrm4_get_saddr(struct net *net,
42 xfrm_address_t *saddr, xfrm_address_t *daddr) 51 xfrm_address_t *saddr, xfrm_address_t *daddr)
43{ 52{
44 struct dst_entry *dst; 53 struct dst_entry *dst;
45 struct rtable *rt; 54 struct flowi4 fl4;
46 55
47 dst = xfrm4_dst_lookup(net, 0, NULL, daddr); 56 dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);
48 if (IS_ERR(dst)) 57 if (IS_ERR(dst))
49 return -EHOSTUNREACH; 58 return -EHOSTUNREACH;
50 59
51 rt = (struct rtable *)dst; 60 saddr->a4 = fl4.saddr;
52 saddr->a4 = rt->rt_src;
53 dst_release(dst); 61 dst_release(dst);
54 return 0; 62 return 0;
55} 63}
@@ -73,7 +81,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
73 81
74 rt->rt_key_dst = fl4->daddr; 82 rt->rt_key_dst = fl4->daddr;
75 rt->rt_key_src = fl4->saddr; 83 rt->rt_key_src = fl4->saddr;
76 rt->rt_tos = fl4->flowi4_tos; 84 rt->rt_key_tos = fl4->flowi4_tos;
77 rt->rt_route_iif = fl4->flowi4_iif; 85 rt->rt_route_iif = fl4->flowi4_iif;
78 rt->rt_iif = fl4->flowi4_iif; 86 rt->rt_iif = fl4->flowi4_iif;
79 rt->rt_oif = fl4->flowi4_oif; 87 rt->rt_oif = fl4->flowi4_oif;
@@ -102,7 +110,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
102static void 110static void
103_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) 111_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
104{ 112{
105 struct iphdr *iph = ip_hdr(skb); 113 const struct iphdr *iph = ip_hdr(skb);
106 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 114 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
107 struct flowi4 *fl4 = &fl->u.ip4; 115 struct flowi4 *fl4 = &fl->u.ip4;
108 116
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 805d63ef4340..d9ac0a0058b5 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -55,7 +55,7 @@ xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
55 55
56int xfrm4_extract_header(struct sk_buff *skb) 56int xfrm4_extract_header(struct sk_buff *skb)
57{ 57{
58 struct iphdr *iph = ip_hdr(skb); 58 const struct iphdr *iph = ip_hdr(skb);
59 59
60 XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); 60 XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
61 XFRM_MODE_SKB_CB(skb)->id = iph->id; 61 XFRM_MODE_SKB_CB(skb)->id = iph->id;