aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-04-21 05:45:37 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-28 16:16:35 -0400
commitf6d8bd051c391c1c0458a30b2a7abcd939329259 (patch)
tree1dc4daecdeb0b42c2c6b59d7d6b41e091c11db5f
parent0a14842f5a3c0e88a1e59fac5c3025db39721f74 (diff)
inet: add RCU protection to inet->opt
We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_sock.h14
-rw-r--r--include/net/ip.h11
-rw-r--r--net/dccp/ipv4.c16
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/af_inet.c17
-rw-r--r--net/ipv4/cipso_ipv4.c113
-rw-r--r--net/ipv4/icmp.c23
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/ip_options.c38
-rw-r--r--net/ipv4/ip_output.c44
-rw-r--r--net/ipv4/ip_sockglue.c35
-rw-r--r--net/ipv4/raw.c19
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp_ipv4.c34
-rw-r--r--net/ipv4/udp.c21
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/l2tp/l2tp_ip.c10
17 files changed, 241 insertions, 168 deletions
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 7a37369f8ea3..ed2ba6eca724 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -57,7 +57,15 @@ struct ip_options {
57 unsigned char __data[0]; 57 unsigned char __data[0];
58}; 58};
59 59
60#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) 60struct ip_options_rcu {
61 struct rcu_head rcu;
62 struct ip_options opt;
63};
64
65struct ip_options_data {
66 struct ip_options_rcu opt;
67 char data[40];
68};
61 69
62struct inet_request_sock { 70struct inet_request_sock {
63 struct request_sock req; 71 struct request_sock req;
@@ -78,7 +86,7 @@ struct inet_request_sock {
78 acked : 1, 86 acked : 1,
79 no_srccheck: 1; 87 no_srccheck: 1;
80 kmemcheck_bitfield_end(flags); 88 kmemcheck_bitfield_end(flags);
81 struct ip_options *opt; 89 struct ip_options_rcu *opt;
82}; 90};
83 91
84static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) 92static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -140,7 +148,7 @@ struct inet_sock {
140 __be16 inet_sport; 148 __be16 inet_sport;
141 __u16 inet_id; 149 __u16 inet_id;
142 150
143 struct ip_options *opt; 151 struct ip_options_rcu __rcu *inet_opt;
144 __u8 tos; 152 __u8 tos;
145 __u8 min_ttl; 153 __u8 min_ttl;
146 __u8 mc_ttl; 154 __u8 mc_ttl;
diff --git a/include/net/ip.h b/include/net/ip.h
index 7c416583b710..3a59bf99aa3a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -52,7 +52,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
52struct ipcm_cookie { 52struct ipcm_cookie {
53 __be32 addr; 53 __be32 addr;
54 int oif; 54 int oif;
55 struct ip_options *opt; 55 struct ip_options_rcu *opt;
56 __u8 tx_flags; 56 __u8 tx_flags;
57}; 57};
58 58
@@ -92,7 +92,7 @@ extern int igmp_mc_proc_init(void);
92 92
93extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 93extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
94 __be32 saddr, __be32 daddr, 94 __be32 saddr, __be32 daddr,
95 struct ip_options *opt); 95 struct ip_options_rcu *opt);
96extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, 96extern int ip_rcv(struct sk_buff *skb, struct net_device *dev,
97 struct packet_type *pt, struct net_device *orig_dev); 97 struct packet_type *pt, struct net_device *orig_dev);
98extern int ip_local_deliver(struct sk_buff *skb); 98extern int ip_local_deliver(struct sk_buff *skb);
@@ -416,14 +416,15 @@ extern int ip_forward(struct sk_buff *skb);
416 * Functions provided by ip_options.c 416 * Functions provided by ip_options.c
417 */ 417 */
418 418
419extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); 419extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
420 __be32 daddr, struct rtable *rt, int is_frag);
420extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); 421extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
421extern void ip_options_fragment(struct sk_buff *skb); 422extern void ip_options_fragment(struct sk_buff *skb);
422extern int ip_options_compile(struct net *net, 423extern int ip_options_compile(struct net *net,
423 struct ip_options *opt, struct sk_buff *skb); 424 struct ip_options *opt, struct sk_buff *skb);
424extern int ip_options_get(struct net *net, struct ip_options **optp, 425extern int ip_options_get(struct net *net, struct ip_options_rcu **optp,
425 unsigned char *data, int optlen); 426 unsigned char *data, int optlen);
426extern int ip_options_get_from_user(struct net *net, struct ip_options **optp, 427extern int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
427 unsigned char __user *data, int optlen); 428 unsigned char __user *data, int optlen);
428extern void ip_options_undo(struct ip_options * opt); 429extern void ip_options_undo(struct ip_options * opt);
429extern void ip_forward_options(struct sk_buff *skb); 430extern void ip_forward_options(struct sk_buff *skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b92ab655d44e..cbbcc6c036e0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -48,6 +48,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
48 struct flowi4 fl4; 48 struct flowi4 fl4;
49 struct rtable *rt; 49 struct rtable *rt;
50 int err; 50 int err;
51 struct ip_options_rcu *inet_opt;
51 52
52 dp->dccps_role = DCCP_ROLE_CLIENT; 53 dp->dccps_role = DCCP_ROLE_CLIENT;
53 54
@@ -58,10 +59,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
58 return -EAFNOSUPPORT; 59 return -EAFNOSUPPORT;
59 60
60 nexthop = daddr = usin->sin_addr.s_addr; 61 nexthop = daddr = usin->sin_addr.s_addr;
61 if (inet->opt != NULL && inet->opt->srr) { 62
63 inet_opt = rcu_dereference_protected(inet->inet_opt,
64 sock_owned_by_user(sk));
65 if (inet_opt != NULL && inet_opt->opt.srr) {
62 if (daddr == 0) 66 if (daddr == 0)
63 return -EINVAL; 67 return -EINVAL;
64 nexthop = inet->opt->faddr; 68 nexthop = inet_opt->opt.faddr;
65 } 69 }
66 70
67 orig_sport = inet->inet_sport; 71 orig_sport = inet->inet_sport;
@@ -78,7 +82,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
78 return -ENETUNREACH; 82 return -ENETUNREACH;
79 } 83 }
80 84
81 if (inet->opt == NULL || !inet->opt->srr) 85 if (inet_opt == NULL || !inet_opt->opt.srr)
82 daddr = rt->rt_dst; 86 daddr = rt->rt_dst;
83 87
84 if (inet->inet_saddr == 0) 88 if (inet->inet_saddr == 0)
@@ -89,8 +93,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
89 inet->inet_daddr = daddr; 93 inet->inet_daddr = daddr;
90 94
91 inet_csk(sk)->icsk_ext_hdr_len = 0; 95 inet_csk(sk)->icsk_ext_hdr_len = 0;
92 if (inet->opt != NULL) 96 if (inet_opt)
93 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 97 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
94 /* 98 /*
95 * Socket identity is still unknown (sport may be zero). 99 * Socket identity is still unknown (sport may be zero).
96 * However we set state to DCCP_REQUESTING and not releasing socket 100 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -405,7 +409,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
405 newinet->inet_daddr = ireq->rmt_addr; 409 newinet->inet_daddr = ireq->rmt_addr;
406 newinet->inet_rcv_saddr = ireq->loc_addr; 410 newinet->inet_rcv_saddr = ireq->loc_addr;
407 newinet->inet_saddr = ireq->loc_addr; 411 newinet->inet_saddr = ireq->loc_addr;
408 newinet->opt = ireq->opt; 412 newinet->inet_opt = ireq->opt;
409 ireq->opt = NULL; 413 ireq->opt = NULL;
410 newinet->mc_index = inet_iif(skb); 414 newinet->mc_index = inet_iif(skb);
411 newinet->mc_ttl = ip_hdr(skb)->ttl; 415 newinet->mc_ttl = ip_hdr(skb)->ttl;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 73add2373247..8dc4348774a5 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -573,7 +573,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
573 573
574 First: no IPv4 options. 574 First: no IPv4 options.
575 */ 575 */
576 newinet->opt = NULL; 576 newinet->inet_opt = NULL;
577 577
578 /* Clone RX bits */ 578 /* Clone RX bits */
579 newnp->rxopt.all = np->rxopt.all; 579 newnp->rxopt.all = np->rxopt.all;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0413af3e2285..963a621e75c7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk)
153 WARN_ON(sk->sk_wmem_queued); 153 WARN_ON(sk->sk_wmem_queued);
154 WARN_ON(sk->sk_forward_alloc); 154 WARN_ON(sk->sk_forward_alloc);
155 155
156 kfree(inet->opt); 156 kfree(rcu_dereference_protected(inet->inet_opt, 1));
157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); 157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 158 sk_refcnt_debug_dec(sk);
159} 159}
@@ -1106,9 +1106,12 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1106 struct flowi4 fl4; 1106 struct flowi4 fl4;
1107 struct rtable *rt; 1107 struct rtable *rt;
1108 __be32 new_saddr; 1108 __be32 new_saddr;
1109 struct ip_options_rcu *inet_opt;
1109 1110
1110 if (inet->opt && inet->opt->srr) 1111 inet_opt = rcu_dereference_protected(inet->inet_opt,
1111 daddr = inet->opt->faddr; 1112 sock_owned_by_user(sk));
1113 if (inet_opt && inet_opt->opt.srr)
1114 daddr = inet_opt->opt.faddr;
1112 1115
1113 /* Query new route. */ 1116 /* Query new route. */
1114 rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk), 1117 rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
@@ -1148,6 +1151,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1148 struct inet_sock *inet = inet_sk(sk); 1151 struct inet_sock *inet = inet_sk(sk);
1149 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); 1152 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1150 __be32 daddr; 1153 __be32 daddr;
1154 struct ip_options_rcu *inet_opt;
1151 int err; 1155 int err;
1152 1156
1153 /* Route is OK, nothing to do. */ 1157 /* Route is OK, nothing to do. */
@@ -1155,9 +1159,12 @@ int inet_sk_rebuild_header(struct sock *sk)
1155 return 0; 1159 return 0;
1156 1160
1157 /* Reroute. */ 1161 /* Reroute. */
1162 rcu_read_lock();
1163 inet_opt = rcu_dereference(inet->inet_opt);
1158 daddr = inet->inet_daddr; 1164 daddr = inet->inet_daddr;
1159 if (inet->opt && inet->opt->srr) 1165 if (inet_opt && inet_opt->opt.srr)
1160 daddr = inet->opt->faddr; 1166 daddr = inet_opt->opt.faddr;
1167 rcu_read_unlock();
1161 rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, 1168 rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr,
1162 inet->inet_dport, inet->inet_sport, 1169 inet->inet_dport, inet->inet_sport,
1163 sk->sk_protocol, RT_CONN_FLAGS(sk), 1170 sk->sk_protocol, RT_CONN_FLAGS(sk),
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a0af7ea87870..2b3c23c287cd 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
1857 return CIPSO_V4_HDR_LEN + ret_val; 1857 return CIPSO_V4_HDR_LEN + ret_val;
1858} 1858}
1859 1859
1860static void opt_kfree_rcu(struct rcu_head *head)
1861{
1862 kfree(container_of(head, struct ip_options_rcu, rcu));
1863}
1864
1860/** 1865/**
1861 * cipso_v4_sock_setattr - Add a CIPSO option to a socket 1866 * cipso_v4_sock_setattr - Add a CIPSO option to a socket
1862 * @sk: the socket 1867 * @sk: the socket
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
1879 unsigned char *buf = NULL; 1884 unsigned char *buf = NULL;
1880 u32 buf_len; 1885 u32 buf_len;
1881 u32 opt_len; 1886 u32 opt_len;
1882 struct ip_options *opt = NULL; 1887 struct ip_options_rcu *old, *opt = NULL;
1883 struct inet_sock *sk_inet; 1888 struct inet_sock *sk_inet;
1884 struct inet_connection_sock *sk_conn; 1889 struct inet_connection_sock *sk_conn;
1885 1890
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
1915 ret_val = -ENOMEM; 1920 ret_val = -ENOMEM;
1916 goto socket_setattr_failure; 1921 goto socket_setattr_failure;
1917 } 1922 }
1918 memcpy(opt->__data, buf, buf_len); 1923 memcpy(opt->opt.__data, buf, buf_len);
1919 opt->optlen = opt_len; 1924 opt->opt.optlen = opt_len;
1920 opt->cipso = sizeof(struct iphdr); 1925 opt->opt.cipso = sizeof(struct iphdr);
1921 kfree(buf); 1926 kfree(buf);
1922 buf = NULL; 1927 buf = NULL;
1923 1928
1924 sk_inet = inet_sk(sk); 1929 sk_inet = inet_sk(sk);
1930
1931 old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk));
1925 if (sk_inet->is_icsk) { 1932 if (sk_inet->is_icsk) {
1926 sk_conn = inet_csk(sk); 1933 sk_conn = inet_csk(sk);
1927 if (sk_inet->opt) 1934 if (old)
1928 sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; 1935 sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
1929 sk_conn->icsk_ext_hdr_len += opt->optlen; 1936 sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
1930 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); 1937 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1931 } 1938 }
1932 opt = xchg(&sk_inet->opt, opt); 1939 rcu_assign_pointer(sk_inet->inet_opt, opt);
1933 kfree(opt); 1940 if (old)
1941 call_rcu(&old->rcu, opt_kfree_rcu);
1934 1942
1935 return 0; 1943 return 0;
1936 1944
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
1960 unsigned char *buf = NULL; 1968 unsigned char *buf = NULL;
1961 u32 buf_len; 1969 u32 buf_len;
1962 u32 opt_len; 1970 u32 opt_len;
1963 struct ip_options *opt = NULL; 1971 struct ip_options_rcu *opt = NULL;
1964 struct inet_request_sock *req_inet; 1972 struct inet_request_sock *req_inet;
1965 1973
1966 /* We allocate the maximum CIPSO option size here so we are probably 1974 /* We allocate the maximum CIPSO option size here so we are probably
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
1988 ret_val = -ENOMEM; 1996 ret_val = -ENOMEM;
1989 goto req_setattr_failure; 1997 goto req_setattr_failure;
1990 } 1998 }
1991 memcpy(opt->__data, buf, buf_len); 1999 memcpy(opt->opt.__data, buf, buf_len);
1992 opt->optlen = opt_len; 2000 opt->opt.optlen = opt_len;
1993 opt->cipso = sizeof(struct iphdr); 2001 opt->opt.cipso = sizeof(struct iphdr);
1994 kfree(buf); 2002 kfree(buf);
1995 buf = NULL; 2003 buf = NULL;
1996 2004
1997 req_inet = inet_rsk(req); 2005 req_inet = inet_rsk(req);
1998 opt = xchg(&req_inet->opt, opt); 2006 opt = xchg(&req_inet->opt, opt);
1999 kfree(opt); 2007 if (opt)
2008 call_rcu(&opt->rcu, opt_kfree_rcu);
2000 2009
2001 return 0; 2010 return 0;
2002 2011
@@ -2016,34 +2025,34 @@ req_setattr_failure:
2016 * values on failure. 2025 * values on failure.
2017 * 2026 *
2018 */ 2027 */
2019static int cipso_v4_delopt(struct ip_options **opt_ptr) 2028static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
2020{ 2029{
2021 int hdr_delta = 0; 2030 int hdr_delta = 0;
2022 struct ip_options *opt = *opt_ptr; 2031 struct ip_options_rcu *opt = *opt_ptr;
2023 2032
2024 if (opt->srr || opt->rr || opt->ts || opt->router_alert) { 2033 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
2025 u8 cipso_len; 2034 u8 cipso_len;
2026 u8 cipso_off; 2035 u8 cipso_off;
2027 unsigned char *cipso_ptr; 2036 unsigned char *cipso_ptr;
2028 int iter; 2037 int iter;
2029 int optlen_new; 2038 int optlen_new;
2030 2039
2031 cipso_off = opt->cipso - sizeof(struct iphdr); 2040 cipso_off = opt->opt.cipso - sizeof(struct iphdr);
2032 cipso_ptr = &opt->__data[cipso_off]; 2041 cipso_ptr = &opt->opt.__data[cipso_off];
2033 cipso_len = cipso_ptr[1]; 2042 cipso_len = cipso_ptr[1];
2034 2043
2035 if (opt->srr > opt->cipso) 2044 if (opt->opt.srr > opt->opt.cipso)
2036 opt->srr -= cipso_len; 2045 opt->opt.srr -= cipso_len;
2037 if (opt->rr > opt->cipso) 2046 if (opt->opt.rr > opt->opt.cipso)
2038 opt->rr -= cipso_len; 2047 opt->opt.rr -= cipso_len;
2039 if (opt->ts > opt->cipso) 2048 if (opt->opt.ts > opt->opt.cipso)
2040 opt->ts -= cipso_len; 2049 opt->opt.ts -= cipso_len;
2041 if (opt->router_alert > opt->cipso) 2050 if (opt->opt.router_alert > opt->opt.cipso)
2042 opt->router_alert -= cipso_len; 2051 opt->opt.router_alert -= cipso_len;
2043 opt->cipso = 0; 2052 opt->opt.cipso = 0;
2044 2053
2045 memmove(cipso_ptr, cipso_ptr + cipso_len, 2054 memmove(cipso_ptr, cipso_ptr + cipso_len,
2046 opt->optlen - cipso_off - cipso_len); 2055 opt->opt.optlen - cipso_off - cipso_len);
2047 2056
2048 /* determining the new total option length is tricky because of 2057 /* determining the new total option length is tricky because of
2049 * the padding necessary, the only thing i can think to do at 2058 * the padding necessary, the only thing i can think to do at
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2052 * from there we can determine the new total option length */ 2061 * from there we can determine the new total option length */
2053 iter = 0; 2062 iter = 0;
2054 optlen_new = 0; 2063 optlen_new = 0;
2055 while (iter < opt->optlen) 2064 while (iter < opt->opt.optlen)
2056 if (opt->__data[iter] != IPOPT_NOP) { 2065 if (opt->opt.__data[iter] != IPOPT_NOP) {
2057 iter += opt->__data[iter + 1]; 2066 iter += opt->opt.__data[iter + 1];
2058 optlen_new = iter; 2067 optlen_new = iter;
2059 } else 2068 } else
2060 iter++; 2069 iter++;
2061 hdr_delta = opt->optlen; 2070 hdr_delta = opt->opt.optlen;
2062 opt->optlen = (optlen_new + 3) & ~3; 2071 opt->opt.optlen = (optlen_new + 3) & ~3;
2063 hdr_delta -= opt->optlen; 2072 hdr_delta -= opt->opt.optlen;
2064 } else { 2073 } else {
2065 /* only the cipso option was present on the socket so we can 2074 /* only the cipso option was present on the socket so we can
2066 * remove the entire option struct */ 2075 * remove the entire option struct */
2067 *opt_ptr = NULL; 2076 *opt_ptr = NULL;
2068 hdr_delta = opt->optlen; 2077 hdr_delta = opt->opt.optlen;
2069 kfree(opt); 2078 call_rcu(&opt->rcu, opt_kfree_rcu);
2070 } 2079 }
2071 2080
2072 return hdr_delta; 2081 return hdr_delta;
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2083void cipso_v4_sock_delattr(struct sock *sk) 2092void cipso_v4_sock_delattr(struct sock *sk)
2084{ 2093{
2085 int hdr_delta; 2094 int hdr_delta;
2086 struct ip_options *opt; 2095 struct ip_options_rcu *opt;
2087 struct inet_sock *sk_inet; 2096 struct inet_sock *sk_inet;
2088 2097
2089 sk_inet = inet_sk(sk); 2098 sk_inet = inet_sk(sk);
2090 opt = sk_inet->opt; 2099 opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
2091 if (opt == NULL || opt->cipso == 0) 2100 if (opt == NULL || opt->opt.cipso == 0)
2092 return; 2101 return;
2093 2102
2094 hdr_delta = cipso_v4_delopt(&sk_inet->opt); 2103 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
2095 if (sk_inet->is_icsk && hdr_delta > 0) { 2104 if (sk_inet->is_icsk && hdr_delta > 0) {
2096 struct inet_connection_sock *sk_conn = inet_csk(sk); 2105 struct inet_connection_sock *sk_conn = inet_csk(sk);
2097 sk_conn->icsk_ext_hdr_len -= hdr_delta; 2106 sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
2109 */ 2118 */
2110void cipso_v4_req_delattr(struct request_sock *req) 2119void cipso_v4_req_delattr(struct request_sock *req)
2111{ 2120{
2112 struct ip_options *opt; 2121 struct ip_options_rcu *opt;
2113 struct inet_request_sock *req_inet; 2122 struct inet_request_sock *req_inet;
2114 2123
2115 req_inet = inet_rsk(req); 2124 req_inet = inet_rsk(req);
2116 opt = req_inet->opt; 2125 opt = req_inet->opt;
2117 if (opt == NULL || opt->cipso == 0) 2126 if (opt == NULL || opt->opt.cipso == 0)
2118 return; 2127 return;
2119 2128
2120 cipso_v4_delopt(&req_inet->opt); 2129 cipso_v4_delopt(&req_inet->opt);
@@ -2184,14 +2193,18 @@ getattr_return:
2184 */ 2193 */
2185int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) 2194int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
2186{ 2195{
2187 struct ip_options *opt; 2196 struct ip_options_rcu *opt;
2197 int res = -ENOMSG;
2188 2198
2189 opt = inet_sk(sk)->opt; 2199 rcu_read_lock();
2190 if (opt == NULL || opt->cipso == 0) 2200 opt = rcu_dereference(inet_sk(sk)->inet_opt);
2191 return -ENOMSG; 2201 if (opt && opt->opt.cipso)
2192 2202 res = cipso_v4_getattr(opt->opt.__data +
2193 return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), 2203 opt->opt.cipso -
2194 secattr); 2204 sizeof(struct iphdr),
2205 secattr);
2206 rcu_read_unlock();
2207 return res;
2195} 2208}
2196 2209
2197/** 2210/**
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 74e35e5736e2..cfeca3c2152d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -108,8 +108,7 @@ struct icmp_bxm {
108 __be32 times[3]; 108 __be32 times[3];
109 } data; 109 } data;
110 int head_len; 110 int head_len;
111 struct ip_options replyopts; 111 struct ip_options_data replyopts;
112 unsigned char optbuf[40];
113}; 112};
114 113
115/* An array of errno for error messages from dest unreach. */ 114/* An array of errno for error messages from dest unreach. */
@@ -333,7 +332,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
333 struct inet_sock *inet; 332 struct inet_sock *inet;
334 __be32 daddr; 333 __be32 daddr;
335 334
336 if (ip_options_echo(&icmp_param->replyopts, skb)) 335 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
337 return; 336 return;
338 337
339 sk = icmp_xmit_lock(net); 338 sk = icmp_xmit_lock(net);
@@ -347,10 +346,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
347 daddr = ipc.addr = rt->rt_src; 346 daddr = ipc.addr = rt->rt_src;
348 ipc.opt = NULL; 347 ipc.opt = NULL;
349 ipc.tx_flags = 0; 348 ipc.tx_flags = 0;
350 if (icmp_param->replyopts.optlen) { 349 if (icmp_param->replyopts.opt.opt.optlen) {
351 ipc.opt = &icmp_param->replyopts; 350 ipc.opt = &icmp_param->replyopts.opt;
352 if (ipc.opt->srr) 351 if (ipc.opt->opt.srr)
353 daddr = icmp_param->replyopts.faddr; 352 daddr = icmp_param->replyopts.opt.opt.faddr;
354 } 353 }
355 { 354 {
356 struct flowi4 fl4 = { 355 struct flowi4 fl4 = {
@@ -379,8 +378,8 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
379 struct icmp_bxm *param) 378 struct icmp_bxm *param)
380{ 379{
381 struct flowi4 fl4 = { 380 struct flowi4 fl4 = {
382 .daddr = (param->replyopts.srr ? 381 .daddr = (param->replyopts.opt.opt.srr ?
383 param->replyopts.faddr : iph->saddr), 382 param->replyopts.opt.opt.faddr : iph->saddr),
384 .saddr = saddr, 383 .saddr = saddr,
385 .flowi4_tos = RT_TOS(tos), 384 .flowi4_tos = RT_TOS(tos),
386 .flowi4_proto = IPPROTO_ICMP, 385 .flowi4_proto = IPPROTO_ICMP,
@@ -581,7 +580,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
581 IPTOS_PREC_INTERNETCONTROL) : 580 IPTOS_PREC_INTERNETCONTROL) :
582 iph->tos; 581 iph->tos;
583 582
584 if (ip_options_echo(&icmp_param.replyopts, skb_in)) 583 if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
585 goto out_unlock; 584 goto out_unlock;
586 585
587 586
@@ -597,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
597 icmp_param.offset = skb_network_offset(skb_in); 596 icmp_param.offset = skb_network_offset(skb_in);
598 inet_sk(sk)->tos = tos; 597 inet_sk(sk)->tos = tos;
599 ipc.addr = iph->saddr; 598 ipc.addr = iph->saddr;
600 ipc.opt = &icmp_param.replyopts; 599 ipc.opt = &icmp_param.replyopts.opt;
601 ipc.tx_flags = 0; 600 ipc.tx_flags = 0;
602 601
603 rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, 602 rt = icmp_route_lookup(net, skb_in, iph, saddr, tos,
@@ -613,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
613 room = dst_mtu(&rt->dst); 612 room = dst_mtu(&rt->dst);
614 if (room > 576) 613 if (room > 576)
615 room = 576; 614 room = 576;
616 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 615 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
617 room -= sizeof(struct icmphdr); 616 room -= sizeof(struct icmphdr);
618 617
619 icmp_param.data_len = skb_in->len - icmp_param.offset; 618 icmp_param.data_len = skb_in->len - icmp_param.offset;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8514db54a7f4..3282cb2de393 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -354,20 +354,20 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
354{ 354{
355 struct rtable *rt; 355 struct rtable *rt;
356 const struct inet_request_sock *ireq = inet_rsk(req); 356 const struct inet_request_sock *ireq = inet_rsk(req);
357 struct ip_options *opt = inet_rsk(req)->opt; 357 struct ip_options_rcu *opt = inet_rsk(req)->opt;
358 struct net *net = sock_net(sk); 358 struct net *net = sock_net(sk);
359 struct flowi4 fl4; 359 struct flowi4 fl4;
360 360
361 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, 361 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
362 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 362 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
363 sk->sk_protocol, inet_sk_flowi_flags(sk), 363 sk->sk_protocol, inet_sk_flowi_flags(sk),
364 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, 364 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
365 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); 365 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
366 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 366 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
367 rt = ip_route_output_flow(net, &fl4, sk); 367 rt = ip_route_output_flow(net, &fl4, sk);
368 if (IS_ERR(rt)) 368 if (IS_ERR(rt))
369 goto no_route; 369 goto no_route;
370 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 370 if (opt && opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway)
371 goto route_err; 371 goto route_err;
372 return &rt->dst; 372 return &rt->dst;
373 373
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2391b24e8251..01fc40965848 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -36,7 +36,7 @@
36 * saddr is address of outgoing interface. 36 * saddr is address of outgoing interface.
37 */ 37 */
38 38
39void ip_options_build(struct sk_buff * skb, struct ip_options * opt, 39void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
40 __be32 daddr, struct rtable *rt, int is_frag) 40 __be32 daddr, struct rtable *rt, int is_frag)
41{ 41{
42 unsigned char *iph = skb_network_header(skb); 42 unsigned char *iph = skb_network_header(skb);
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
83 * NOTE: dopt cannot point to skb. 83 * NOTE: dopt cannot point to skb.
84 */ 84 */
85 85
86int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 86int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
87{ 87{
88 struct ip_options *sopt; 88 const struct ip_options *sopt;
89 unsigned char *sptr, *dptr; 89 unsigned char *sptr, *dptr;
90 int soffset, doffset; 90 int soffset, doffset;
91 int optlen; 91 int optlen;
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
95 95
96 sopt = &(IPCB(skb)->opt); 96 sopt = &(IPCB(skb)->opt);
97 97
98 if (sopt->optlen == 0) { 98 if (sopt->optlen == 0)
99 dopt->optlen = 0;
100 return 0; 99 return 0;
101 }
102 100
103 sptr = skb_network_header(skb); 101 sptr = skb_network_header(skb);
104 dptr = dopt->__data; 102 dptr = dopt->__data;
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
157 dopt->optlen += optlen; 155 dopt->optlen += optlen;
158 } 156 }
159 if (sopt->srr) { 157 if (sopt->srr) {
160 unsigned char * start = sptr+sopt->srr; 158 unsigned char *start = sptr+sopt->srr;
161 __be32 faddr; 159 __be32 faddr;
162 160
163 optlen = start[1]; 161 optlen = start[1];
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt)
499 } 497 }
500} 498}
501 499
502static struct ip_options *ip_options_get_alloc(const int optlen) 500static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
503{ 501{
504 return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), 502 return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
505 GFP_KERNEL); 503 GFP_KERNEL);
506} 504}
507 505
508static int ip_options_get_finish(struct net *net, struct ip_options **optp, 506static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
509 struct ip_options *opt, int optlen) 507 struct ip_options_rcu *opt, int optlen)
510{ 508{
511 while (optlen & 3) 509 while (optlen & 3)
512 opt->__data[optlen++] = IPOPT_END; 510 opt->opt.__data[optlen++] = IPOPT_END;
513 opt->optlen = optlen; 511 opt->opt.optlen = optlen;
514 if (optlen && ip_options_compile(net, opt, NULL)) { 512 if (optlen && ip_options_compile(net, &opt->opt, NULL)) {
515 kfree(opt); 513 kfree(opt);
516 return -EINVAL; 514 return -EINVAL;
517 } 515 }
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp,
520 return 0; 518 return 0;
521} 519}
522 520
523int ip_options_get_from_user(struct net *net, struct ip_options **optp, 521int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
524 unsigned char __user *data, int optlen) 522 unsigned char __user *data, int optlen)
525{ 523{
526 struct ip_options *opt = ip_options_get_alloc(optlen); 524 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
527 525
528 if (!opt) 526 if (!opt)
529 return -ENOMEM; 527 return -ENOMEM;
530 if (optlen && copy_from_user(opt->__data, data, optlen)) { 528 if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
531 kfree(opt); 529 kfree(opt);
532 return -EFAULT; 530 return -EFAULT;
533 } 531 }
534 return ip_options_get_finish(net, optp, opt, optlen); 532 return ip_options_get_finish(net, optp, opt, optlen);
535} 533}
536 534
537int ip_options_get(struct net *net, struct ip_options **optp, 535int ip_options_get(struct net *net, struct ip_options_rcu **optp,
538 unsigned char *data, int optlen) 536 unsigned char *data, int optlen)
539{ 537{
540 struct ip_options *opt = ip_options_get_alloc(optlen); 538 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
541 539
542 if (!opt) 540 if (!opt)
543 return -ENOMEM; 541 return -ENOMEM;
544 if (optlen) 542 if (optlen)
545 memcpy(opt->__data, data, optlen); 543 memcpy(opt->opt.__data, data, optlen);
546 return ip_options_get_finish(net, optp, opt, optlen); 544 return ip_options_get_finish(net, optp, opt, optlen);
547} 545}
548 546
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bdad3d60aa82..362e66f7d2fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
140 * 140 *
141 */ 141 */
142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 __be32 saddr, __be32 daddr, struct ip_options *opt) 143 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
144{ 144{
145 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
146 struct rtable *rt = skb_rtable(skb); 146 struct rtable *rt = skb_rtable(skb);
147 struct iphdr *iph; 147 struct iphdr *iph;
148 148
149 /* Build the IP header. */ 149 /* Build the IP header. */
150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
151 skb_reset_network_header(skb); 151 skb_reset_network_header(skb);
152 iph = ip_hdr(skb); 152 iph = ip_hdr(skb);
153 iph->version = 4; 153 iph->version = 4;
@@ -163,9 +163,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
163 iph->protocol = sk->sk_protocol; 163 iph->protocol = sk->sk_protocol;
164 ip_select_ident(iph, &rt->dst, sk); 164 ip_select_ident(iph, &rt->dst, sk);
165 165
166 if (opt && opt->optlen) { 166 if (opt && opt->opt.optlen) {
167 iph->ihl += opt->optlen>>2; 167 iph->ihl += opt->opt.optlen>>2;
168 ip_options_build(skb, opt, daddr, rt, 0); 168 ip_options_build(skb, &opt->opt, daddr, rt, 0);
169 } 169 }
170 170
171 skb->priority = sk->sk_priority; 171 skb->priority = sk->sk_priority;
@@ -316,7 +316,7 @@ int ip_queue_xmit(struct sk_buff *skb)
316{ 316{
317 struct sock *sk = skb->sk; 317 struct sock *sk = skb->sk;
318 struct inet_sock *inet = inet_sk(sk); 318 struct inet_sock *inet = inet_sk(sk);
319 struct ip_options *opt = inet->opt; 319 struct ip_options_rcu *inet_opt;
320 struct rtable *rt; 320 struct rtable *rt;
321 struct iphdr *iph; 321 struct iphdr *iph;
322 int res; 322 int res;
@@ -325,6 +325,7 @@ int ip_queue_xmit(struct sk_buff *skb)
325 * f.e. by something like SCTP. 325 * f.e. by something like SCTP.
326 */ 326 */
327 rcu_read_lock(); 327 rcu_read_lock();
328 inet_opt = rcu_dereference(inet->inet_opt);
328 rt = skb_rtable(skb); 329 rt = skb_rtable(skb);
329 if (rt != NULL) 330 if (rt != NULL)
330 goto packet_routed; 331 goto packet_routed;
@@ -336,8 +337,8 @@ int ip_queue_xmit(struct sk_buff *skb)
336 337
337 /* Use correct destination address if we have options. */ 338 /* Use correct destination address if we have options. */
338 daddr = inet->inet_daddr; 339 daddr = inet->inet_daddr;
339 if(opt && opt->srr) 340 if (inet_opt && inet_opt->opt.srr)
340 daddr = opt->faddr; 341 daddr = inet_opt->opt.faddr;
341 342
342 /* If this fails, retransmit mechanism of transport layer will 343 /* If this fails, retransmit mechanism of transport layer will
343 * keep trying until route appears or the connection times 344 * keep trying until route appears or the connection times
@@ -357,11 +358,11 @@ int ip_queue_xmit(struct sk_buff *skb)
357 skb_dst_set_noref(skb, &rt->dst); 358 skb_dst_set_noref(skb, &rt->dst);
358 359
359packet_routed: 360packet_routed:
360 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 361 if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway)
361 goto no_route; 362 goto no_route;
362 363
363 /* OK, we know where to send it, allocate and build IP header. */ 364 /* OK, we know where to send it, allocate and build IP header. */
364 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 365 skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
365 skb_reset_network_header(skb); 366 skb_reset_network_header(skb);
366 iph = ip_hdr(skb); 367 iph = ip_hdr(skb);
367 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 368 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
@@ -375,9 +376,9 @@ packet_routed:
375 iph->daddr = rt->rt_dst; 376 iph->daddr = rt->rt_dst;
376 /* Transport layer set skb->h.foo itself. */ 377 /* Transport layer set skb->h.foo itself. */
377 378
378 if (opt && opt->optlen) { 379 if (inet_opt && inet_opt->opt.optlen) {
379 iph->ihl += opt->optlen >> 2; 380 iph->ihl += inet_opt->opt.optlen >> 2;
380 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 381 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
381 } 382 }
382 383
383 ip_select_ident_more(iph, &rt->dst, sk, 384 ip_select_ident_more(iph, &rt->dst, sk,
@@ -1033,7 +1034,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1033 struct ipcm_cookie *ipc, struct rtable **rtp) 1034 struct ipcm_cookie *ipc, struct rtable **rtp)
1034{ 1035{
1035 struct inet_sock *inet = inet_sk(sk); 1036 struct inet_sock *inet = inet_sk(sk);
1036 struct ip_options *opt; 1037 struct ip_options_rcu *opt;
1037 struct rtable *rt; 1038 struct rtable *rt;
1038 1039
1039 /* 1040 /*
@@ -1047,7 +1048,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1047 if (unlikely(cork->opt == NULL)) 1048 if (unlikely(cork->opt == NULL))
1048 return -ENOBUFS; 1049 return -ENOBUFS;
1049 } 1050 }
1050 memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); 1051 memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
1051 cork->flags |= IPCORK_OPT; 1052 cork->flags |= IPCORK_OPT;
1052 cork->addr = ipc->addr; 1053 cork->addr = ipc->addr;
1053 } 1054 }
@@ -1451,26 +1452,23 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1451 unsigned int len) 1452 unsigned int len)
1452{ 1453{
1453 struct inet_sock *inet = inet_sk(sk); 1454 struct inet_sock *inet = inet_sk(sk);
1454 struct { 1455 struct ip_options_data replyopts;
1455 struct ip_options opt;
1456 char data[40];
1457 } replyopts;
1458 struct ipcm_cookie ipc; 1456 struct ipcm_cookie ipc;
1459 __be32 daddr; 1457 __be32 daddr;
1460 struct rtable *rt = skb_rtable(skb); 1458 struct rtable *rt = skb_rtable(skb);
1461 1459
1462 if (ip_options_echo(&replyopts.opt, skb)) 1460 if (ip_options_echo(&replyopts.opt.opt, skb))
1463 return; 1461 return;
1464 1462
1465 daddr = ipc.addr = rt->rt_src; 1463 daddr = ipc.addr = rt->rt_src;
1466 ipc.opt = NULL; 1464 ipc.opt = NULL;
1467 ipc.tx_flags = 0; 1465 ipc.tx_flags = 0;
1468 1466
1469 if (replyopts.opt.optlen) { 1467 if (replyopts.opt.opt.optlen) {
1470 ipc.opt = &replyopts.opt; 1468 ipc.opt = &replyopts.opt;
1471 1469
1472 if (ipc.opt->srr) 1470 if (replyopts.opt.opt.srr)
1473 daddr = replyopts.opt.faddr; 1471 daddr = replyopts.opt.opt.faddr;
1474 } 1472 }
1475 1473
1476 { 1474 {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9640900309bb..ab0c9efd1efa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -451,6 +451,11 @@ out:
451} 451}
452 452
453 453
454static void opt_kfree_rcu(struct rcu_head *head)
455{
456 kfree(container_of(head, struct ip_options_rcu, rcu));
457}
458
454/* 459/*
455 * Socket option code for IP. This is the end of the line after any 460 * Socket option code for IP. This is the end of the line after any
456 * TCP,UDP etc options on an IP socket. 461 * TCP,UDP etc options on an IP socket.
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
497 switch (optname) { 502 switch (optname) {
498 case IP_OPTIONS: 503 case IP_OPTIONS:
499 { 504 {
500 struct ip_options *opt = NULL; 505 struct ip_options_rcu *old, *opt = NULL;
506
501 if (optlen > 40) 507 if (optlen > 40)
502 goto e_inval; 508 goto e_inval;
503 err = ip_options_get_from_user(sock_net(sk), &opt, 509 err = ip_options_get_from_user(sock_net(sk), &opt,
504 optval, optlen); 510 optval, optlen);
505 if (err) 511 if (err)
506 break; 512 break;
513 old = rcu_dereference_protected(inet->inet_opt,
514 sock_owned_by_user(sk));
507 if (inet->is_icsk) { 515 if (inet->is_icsk) {
508 struct inet_connection_sock *icsk = inet_csk(sk); 516 struct inet_connection_sock *icsk = inet_csk(sk);
509#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 517#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
512 (TCPF_LISTEN | TCPF_CLOSE)) && 520 (TCPF_LISTEN | TCPF_CLOSE)) &&
513 inet->inet_daddr != LOOPBACK4_IPV6)) { 521 inet->inet_daddr != LOOPBACK4_IPV6)) {
514#endif 522#endif
515 if (inet->opt) 523 if (old)
516 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 524 icsk->icsk_ext_hdr_len -= old->opt.optlen;
517 if (opt) 525 if (opt)
518 icsk->icsk_ext_hdr_len += opt->optlen; 526 icsk->icsk_ext_hdr_len += opt->opt.optlen;
519 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 527 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
520#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 528#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
521 } 529 }
522#endif 530#endif
523 } 531 }
524 opt = xchg(&inet->opt, opt); 532 rcu_assign_pointer(inet->inet_opt, opt);
525 kfree(opt); 533 if (old)
534 call_rcu(&old->rcu, opt_kfree_rcu);
526 break; 535 break;
527 } 536 }
528 case IP_PKTINFO: 537 case IP_PKTINFO:
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1081 case IP_OPTIONS: 1090 case IP_OPTIONS:
1082 { 1091 {
1083 unsigned char optbuf[sizeof(struct ip_options)+40]; 1092 unsigned char optbuf[sizeof(struct ip_options)+40];
1084 struct ip_options * opt = (struct ip_options *)optbuf; 1093 struct ip_options *opt = (struct ip_options *)optbuf;
1094 struct ip_options_rcu *inet_opt;
1095
1096 inet_opt = rcu_dereference_protected(inet->inet_opt,
1097 sock_owned_by_user(sk));
1085 opt->optlen = 0; 1098 opt->optlen = 0;
1086 if (inet->opt) 1099 if (inet_opt)
1087 memcpy(optbuf, inet->opt, 1100 memcpy(optbuf, &inet_opt->opt,
1088 sizeof(struct ip_options)+ 1101 sizeof(struct ip_options) +
1089 inet->opt->optlen); 1102 inet_opt->opt.optlen);
1090 release_sock(sk); 1103 release_sock(sk);
1091 1104
1092 if (opt->optlen == 0) 1105 if (opt->optlen == 0)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index abf14dbcb3b9..a8659e0c4a6e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -460,6 +460,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
460 __be32 saddr; 460 __be32 saddr;
461 u8 tos; 461 u8 tos;
462 int err; 462 int err;
463 struct ip_options_data opt_copy;
463 464
464 err = -EMSGSIZE; 465 err = -EMSGSIZE;
465 if (len > 0xFFFF) 466 if (len > 0xFFFF)
@@ -520,8 +521,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
520 saddr = ipc.addr; 521 saddr = ipc.addr;
521 ipc.addr = daddr; 522 ipc.addr = daddr;
522 523
523 if (!ipc.opt) 524 if (!ipc.opt) {
524 ipc.opt = inet->opt; 525 struct ip_options_rcu *inet_opt;
526
527 rcu_read_lock();
528 inet_opt = rcu_dereference(inet->inet_opt);
529 if (inet_opt) {
530 memcpy(&opt_copy, inet_opt,
531 sizeof(*inet_opt) + inet_opt->opt.optlen);
532 ipc.opt = &opt_copy.opt;
533 }
534 rcu_read_unlock();
535 }
525 536
526 if (ipc.opt) { 537 if (ipc.opt) {
527 err = -EINVAL; 538 err = -EINVAL;
@@ -530,10 +541,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
530 */ 541 */
531 if (inet->hdrincl) 542 if (inet->hdrincl)
532 goto done; 543 goto done;
533 if (ipc.opt->srr) { 544 if (ipc.opt->opt.srr) {
534 if (!daddr) 545 if (!daddr)
535 goto done; 546 goto done;
536 daddr = ipc.opt->faddr; 547 daddr = ipc.opt->opt.faddr;
537 } 548 }
538 } 549 }
539 tos = RT_CONN_FLAGS(sk); 550 tos = RT_CONN_FLAGS(sk);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 71e029691908..26461492a847 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 321 * the ACK carries the same options again (see RFC1122 4.2.3.8)
322 */ 322 */
323 if (opt && opt->optlen) { 323 if (opt && opt->optlen) {
324 int opt_size = sizeof(struct ip_options) + opt->optlen; 324 int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
325 325
326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC); 326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
327 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { 327 if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
328 kfree(ireq->opt); 328 kfree(ireq->opt);
329 ireq->opt = NULL; 329 ireq->opt = NULL;
330 } 330 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 310454c2f4d1..d60732fe5f21 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -154,6 +154,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
154 struct flowi4 fl4; 154 struct flowi4 fl4;
155 struct rtable *rt; 155 struct rtable *rt;
156 int err; 156 int err;
157 struct ip_options_rcu *inet_opt;
157 158
158 if (addr_len < sizeof(struct sockaddr_in)) 159 if (addr_len < sizeof(struct sockaddr_in))
159 return -EINVAL; 160 return -EINVAL;
@@ -162,10 +163,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
162 return -EAFNOSUPPORT; 163 return -EAFNOSUPPORT;
163 164
164 nexthop = daddr = usin->sin_addr.s_addr; 165 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) { 166 inet_opt = rcu_dereference_protected(inet->inet_opt,
167 sock_owned_by_user(sk));
168 if (inet_opt && inet_opt->opt.srr) {
166 if (!daddr) 169 if (!daddr)
167 return -EINVAL; 170 return -EINVAL;
168 nexthop = inet->opt->faddr; 171 nexthop = inet_opt->opt.faddr;
169 } 172 }
170 173
171 orig_sport = inet->inet_sport; 174 orig_sport = inet->inet_sport;
@@ -186,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
186 return -ENETUNREACH; 189 return -ENETUNREACH;
187 } 190 }
188 191
189 if (!inet->opt || !inet->opt->srr) 192 if (!inet_opt || !inet_opt->opt.srr)
190 daddr = rt->rt_dst; 193 daddr = rt->rt_dst;
191 194
192 if (!inet->inet_saddr) 195 if (!inet->inet_saddr)
@@ -222,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
222 inet->inet_daddr = daddr; 225 inet->inet_daddr = daddr;
223 226
224 inet_csk(sk)->icsk_ext_hdr_len = 0; 227 inet_csk(sk)->icsk_ext_hdr_len = 0;
225 if (inet->opt) 228 if (inet_opt)
226 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 229 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
227 230
228 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 231 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
229 232
@@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
820/* 823/*
821 * Save and compile IPv4 options into the request_sock if needed. 824 * Save and compile IPv4 options into the request_sock if needed.
822 */ 825 */
823static struct ip_options *tcp_v4_save_options(struct sock *sk, 826static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
824 struct sk_buff *skb) 827 struct sk_buff *skb)
825{ 828{
826 struct ip_options *opt = &(IPCB(skb)->opt); 829 const struct ip_options *opt = &(IPCB(skb)->opt);
827 struct ip_options *dopt = NULL; 830 struct ip_options_rcu *dopt = NULL;
828 831
829 if (opt && opt->optlen) { 832 if (opt && opt->optlen) {
830 int opt_size = optlength(opt); 833 int opt_size = sizeof(*dopt) + opt->optlen;
834
831 dopt = kmalloc(opt_size, GFP_ATOMIC); 835 dopt = kmalloc(opt_size, GFP_ATOMIC);
832 if (dopt) { 836 if (dopt) {
833 if (ip_options_echo(dopt, skb)) { 837 if (ip_options_echo(&dopt->opt, skb)) {
834 kfree(dopt); 838 kfree(dopt);
835 dopt = NULL; 839 dopt = NULL;
836 } 840 }
@@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1411#ifdef CONFIG_TCP_MD5SIG 1415#ifdef CONFIG_TCP_MD5SIG
1412 struct tcp_md5sig_key *key; 1416 struct tcp_md5sig_key *key;
1413#endif 1417#endif
1418 struct ip_options_rcu *inet_opt;
1414 1419
1415 if (sk_acceptq_is_full(sk)) 1420 if (sk_acceptq_is_full(sk))
1416 goto exit_overflow; 1421 goto exit_overflow;
@@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1431 newinet->inet_daddr = ireq->rmt_addr; 1436 newinet->inet_daddr = ireq->rmt_addr;
1432 newinet->inet_rcv_saddr = ireq->loc_addr; 1437 newinet->inet_rcv_saddr = ireq->loc_addr;
1433 newinet->inet_saddr = ireq->loc_addr; 1438 newinet->inet_saddr = ireq->loc_addr;
1434 newinet->opt = ireq->opt; 1439 inet_opt = ireq->opt;
1440 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1435 ireq->opt = NULL; 1441 ireq->opt = NULL;
1436 newinet->mc_index = inet_iif(skb); 1442 newinet->mc_index = inet_iif(skb);
1437 newinet->mc_ttl = ip_hdr(skb)->ttl; 1443 newinet->mc_ttl = ip_hdr(skb)->ttl;
1438 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1444 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1439 if (newinet->opt) 1445 if (inet_opt)
1440 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1446 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1441 newinet->inet_id = newtp->write_seq ^ jiffies; 1447 newinet->inet_id = newtp->write_seq ^ jiffies;
1442 1448
1443 tcp_mtup_init(newsk); 1449 tcp_mtup_init(newsk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bc0dab2593e0..544f435d1aff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
806 struct sk_buff *skb; 806 struct sk_buff *skb;
807 struct ip_options_data opt_copy;
807 808
808 if (len > 0xFFFF) 809 if (len > 0xFFFF)
809 return -EMSGSIZE; 810 return -EMSGSIZE;
@@ -877,22 +878,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
877 free = 1; 878 free = 1;
878 connected = 0; 879 connected = 0;
879 } 880 }
880 if (!ipc.opt) 881 if (!ipc.opt) {
881 ipc.opt = inet->opt; 882 struct ip_options_rcu *inet_opt;
883
884 rcu_read_lock();
885 inet_opt = rcu_dereference(inet->inet_opt);
886 if (inet_opt) {
887 memcpy(&opt_copy, inet_opt,
888 sizeof(*inet_opt) + inet_opt->opt.optlen);
889 ipc.opt = &opt_copy.opt;
890 }
891 rcu_read_unlock();
892 }
882 893
883 saddr = ipc.addr; 894 saddr = ipc.addr;
884 ipc.addr = faddr = daddr; 895 ipc.addr = faddr = daddr;
885 896
886 if (ipc.opt && ipc.opt->srr) { 897 if (ipc.opt && ipc.opt->opt.srr) {
887 if (!daddr) 898 if (!daddr)
888 return -EINVAL; 899 return -EINVAL;
889 faddr = ipc.opt->faddr; 900 faddr = ipc.opt->opt.faddr;
890 connected = 0; 901 connected = 0;
891 } 902 }
892 tos = RT_TOS(inet->tos); 903 tos = RT_TOS(inet->tos);
893 if (sock_flag(sk, SOCK_LOCALROUTE) || 904 if (sock_flag(sk, SOCK_LOCALROUTE) ||
894 (msg->msg_flags & MSG_DONTROUTE) || 905 (msg->msg_flags & MSG_DONTROUTE) ||
895 (ipc.opt && ipc.opt->is_strictroute)) { 906 (ipc.opt && ipc.opt->opt.is_strictroute)) {
896 tos |= RTO_ONLINK; 907 tos |= RTO_ONLINK;
897 connected = 0; 908 connected = 0;
898 } 909 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cb7658aceb6c..868366470b4a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1469,7 +1469,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1469 1469
1470 First: no IPv4 options. 1470 First: no IPv4 options.
1471 */ 1471 */
1472 newinet->opt = NULL; 1472 newinet->inet_opt = NULL;
1473 newnp->ipv6_fl_list = NULL; 1473 newnp->ipv6_fl_list = NULL;
1474 1474
1475 /* Clone RX bits */ 1475 /* Clone RX bits */
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index cc673677c5de..962a607b51da 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -416,7 +416,6 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
416 int rc; 416 int rc;
417 struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk); 417 struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
418 struct inet_sock *inet = inet_sk(sk); 418 struct inet_sock *inet = inet_sk(sk);
419 struct ip_options *opt = inet->opt;
420 struct rtable *rt = NULL; 419 struct rtable *rt = NULL;
421 int connected = 0; 420 int connected = 0;
422 __be32 daddr; 421 __be32 daddr;
@@ -471,9 +470,14 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
471 rt = (struct rtable *) __sk_dst_check(sk, 0); 470 rt = (struct rtable *) __sk_dst_check(sk, 0);
472 471
473 if (rt == NULL) { 472 if (rt == NULL) {
473 struct ip_options_rcu *inet_opt;
474
475 inet_opt = rcu_dereference_protected(inet->inet_opt,
476 sock_owned_by_user(sk));
477
474 /* Use correct destination address if we have options. */ 478 /* Use correct destination address if we have options. */
475 if (opt && opt->srr) 479 if (inet_opt && inet_opt->opt.srr)
476 daddr = opt->faddr; 480 daddr = inet_opt->opt.faddr;
477 481
478 /* If this fails, retransmit mechanism of transport layer will 482 /* If this fails, retransmit mechanism of transport layer will
479 * keep trying until route appears or the connection times 483 * keep trying until route appears or the connection times