diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-04-21 05:45:37 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-04-28 16:16:35 -0400 |
commit | f6d8bd051c391c1c0458a30b2a7abcd939329259 (patch) | |
tree | 1dc4daecdeb0b42c2c6b59d7d6b41e091c11db5f /net/ipv4 | |
parent | 0a14842f5a3c0e88a1e59fac5c3025db39721f74 (diff) |
inet: add RCU protection to inet->opt
We lack proper synchronization to manipulate inet->opt ip_options
Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.
Another thread can change inet->opt pointer and free old one under us.
Use RCU to protect inet->opt (changed to inet->inet_opt).
Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.
We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 17 | ||||
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 113 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 23 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 38 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 44 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 35 | ||||
-rw-r--r-- | net/ipv4/raw.c | 19 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 34 | ||||
-rw-r--r-- | net/ipv4/udp.c | 21 |
11 files changed, 205 insertions, 149 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0413af3e2285..963a621e75c7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) | |||
153 | WARN_ON(sk->sk_wmem_queued); | 153 | WARN_ON(sk->sk_wmem_queued); |
154 | WARN_ON(sk->sk_forward_alloc); | 154 | WARN_ON(sk->sk_forward_alloc); |
155 | 155 | ||
156 | kfree(inet->opt); | 156 | kfree(rcu_dereference_protected(inet->inet_opt, 1)); |
157 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); | 157 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); |
158 | sk_refcnt_debug_dec(sk); | 158 | sk_refcnt_debug_dec(sk); |
159 | } | 159 | } |
@@ -1106,9 +1106,12 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1106 | struct flowi4 fl4; | 1106 | struct flowi4 fl4; |
1107 | struct rtable *rt; | 1107 | struct rtable *rt; |
1108 | __be32 new_saddr; | 1108 | __be32 new_saddr; |
1109 | struct ip_options_rcu *inet_opt; | ||
1109 | 1110 | ||
1110 | if (inet->opt && inet->opt->srr) | 1111 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
1111 | daddr = inet->opt->faddr; | 1112 | sock_owned_by_user(sk)); |
1113 | if (inet_opt && inet_opt->opt.srr) | ||
1114 | daddr = inet_opt->opt.faddr; | ||
1112 | 1115 | ||
1113 | /* Query new route. */ | 1116 | /* Query new route. */ |
1114 | rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk), | 1117 | rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk), |
@@ -1148,6 +1151,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1148 | struct inet_sock *inet = inet_sk(sk); | 1151 | struct inet_sock *inet = inet_sk(sk); |
1149 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); | 1152 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); |
1150 | __be32 daddr; | 1153 | __be32 daddr; |
1154 | struct ip_options_rcu *inet_opt; | ||
1151 | int err; | 1155 | int err; |
1152 | 1156 | ||
1153 | /* Route is OK, nothing to do. */ | 1157 | /* Route is OK, nothing to do. */ |
@@ -1155,9 +1159,12 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1155 | return 0; | 1159 | return 0; |
1156 | 1160 | ||
1157 | /* Reroute. */ | 1161 | /* Reroute. */ |
1162 | rcu_read_lock(); | ||
1163 | inet_opt = rcu_dereference(inet->inet_opt); | ||
1158 | daddr = inet->inet_daddr; | 1164 | daddr = inet->inet_daddr; |
1159 | if (inet->opt && inet->opt->srr) | 1165 | if (inet_opt && inet_opt->opt.srr) |
1160 | daddr = inet->opt->faddr; | 1166 | daddr = inet_opt->opt.faddr; |
1167 | rcu_read_unlock(); | ||
1161 | rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, | 1168 | rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, |
1162 | inet->inet_dport, inet->inet_sport, | 1169 | inet->inet_dport, inet->inet_sport, |
1163 | sk->sk_protocol, RT_CONN_FLAGS(sk), | 1170 | sk->sk_protocol, RT_CONN_FLAGS(sk), |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a0af7ea87870..2b3c23c287cd 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, | |||
1857 | return CIPSO_V4_HDR_LEN + ret_val; | 1857 | return CIPSO_V4_HDR_LEN + ret_val; |
1858 | } | 1858 | } |
1859 | 1859 | ||
1860 | static void opt_kfree_rcu(struct rcu_head *head) | ||
1861 | { | ||
1862 | kfree(container_of(head, struct ip_options_rcu, rcu)); | ||
1863 | } | ||
1864 | |||
1860 | /** | 1865 | /** |
1861 | * cipso_v4_sock_setattr - Add a CIPSO option to a socket | 1866 | * cipso_v4_sock_setattr - Add a CIPSO option to a socket |
1862 | * @sk: the socket | 1867 | * @sk: the socket |
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1879 | unsigned char *buf = NULL; | 1884 | unsigned char *buf = NULL; |
1880 | u32 buf_len; | 1885 | u32 buf_len; |
1881 | u32 opt_len; | 1886 | u32 opt_len; |
1882 | struct ip_options *opt = NULL; | 1887 | struct ip_options_rcu *old, *opt = NULL; |
1883 | struct inet_sock *sk_inet; | 1888 | struct inet_sock *sk_inet; |
1884 | struct inet_connection_sock *sk_conn; | 1889 | struct inet_connection_sock *sk_conn; |
1885 | 1890 | ||
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1915 | ret_val = -ENOMEM; | 1920 | ret_val = -ENOMEM; |
1916 | goto socket_setattr_failure; | 1921 | goto socket_setattr_failure; |
1917 | } | 1922 | } |
1918 | memcpy(opt->__data, buf, buf_len); | 1923 | memcpy(opt->opt.__data, buf, buf_len); |
1919 | opt->optlen = opt_len; | 1924 | opt->opt.optlen = opt_len; |
1920 | opt->cipso = sizeof(struct iphdr); | 1925 | opt->opt.cipso = sizeof(struct iphdr); |
1921 | kfree(buf); | 1926 | kfree(buf); |
1922 | buf = NULL; | 1927 | buf = NULL; |
1923 | 1928 | ||
1924 | sk_inet = inet_sk(sk); | 1929 | sk_inet = inet_sk(sk); |
1930 | |||
1931 | old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk)); | ||
1925 | if (sk_inet->is_icsk) { | 1932 | if (sk_inet->is_icsk) { |
1926 | sk_conn = inet_csk(sk); | 1933 | sk_conn = inet_csk(sk); |
1927 | if (sk_inet->opt) | 1934 | if (old) |
1928 | sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; | 1935 | sk_conn->icsk_ext_hdr_len -= old->opt.optlen; |
1929 | sk_conn->icsk_ext_hdr_len += opt->optlen; | 1936 | sk_conn->icsk_ext_hdr_len += opt->opt.optlen; |
1930 | sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); | 1937 | sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); |
1931 | } | 1938 | } |
1932 | opt = xchg(&sk_inet->opt, opt); | 1939 | rcu_assign_pointer(sk_inet->inet_opt, opt); |
1933 | kfree(opt); | 1940 | if (old) |
1941 | call_rcu(&old->rcu, opt_kfree_rcu); | ||
1934 | 1942 | ||
1935 | return 0; | 1943 | return 0; |
1936 | 1944 | ||
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req, | |||
1960 | unsigned char *buf = NULL; | 1968 | unsigned char *buf = NULL; |
1961 | u32 buf_len; | 1969 | u32 buf_len; |
1962 | u32 opt_len; | 1970 | u32 opt_len; |
1963 | struct ip_options *opt = NULL; | 1971 | struct ip_options_rcu *opt = NULL; |
1964 | struct inet_request_sock *req_inet; | 1972 | struct inet_request_sock *req_inet; |
1965 | 1973 | ||
1966 | /* We allocate the maximum CIPSO option size here so we are probably | 1974 | /* We allocate the maximum CIPSO option size here so we are probably |
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req, | |||
1988 | ret_val = -ENOMEM; | 1996 | ret_val = -ENOMEM; |
1989 | goto req_setattr_failure; | 1997 | goto req_setattr_failure; |
1990 | } | 1998 | } |
1991 | memcpy(opt->__data, buf, buf_len); | 1999 | memcpy(opt->opt.__data, buf, buf_len); |
1992 | opt->optlen = opt_len; | 2000 | opt->opt.optlen = opt_len; |
1993 | opt->cipso = sizeof(struct iphdr); | 2001 | opt->opt.cipso = sizeof(struct iphdr); |
1994 | kfree(buf); | 2002 | kfree(buf); |
1995 | buf = NULL; | 2003 | buf = NULL; |
1996 | 2004 | ||
1997 | req_inet = inet_rsk(req); | 2005 | req_inet = inet_rsk(req); |
1998 | opt = xchg(&req_inet->opt, opt); | 2006 | opt = xchg(&req_inet->opt, opt); |
1999 | kfree(opt); | 2007 | if (opt) |
2008 | call_rcu(&opt->rcu, opt_kfree_rcu); | ||
2000 | 2009 | ||
2001 | return 0; | 2010 | return 0; |
2002 | 2011 | ||
@@ -2016,34 +2025,34 @@ req_setattr_failure: | |||
2016 | * values on failure. | 2025 | * values on failure. |
2017 | * | 2026 | * |
2018 | */ | 2027 | */ |
2019 | static int cipso_v4_delopt(struct ip_options **opt_ptr) | 2028 | static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) |
2020 | { | 2029 | { |
2021 | int hdr_delta = 0; | 2030 | int hdr_delta = 0; |
2022 | struct ip_options *opt = *opt_ptr; | 2031 | struct ip_options_rcu *opt = *opt_ptr; |
2023 | 2032 | ||
2024 | if (opt->srr || opt->rr || opt->ts || opt->router_alert) { | 2033 | if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { |
2025 | u8 cipso_len; | 2034 | u8 cipso_len; |
2026 | u8 cipso_off; | 2035 | u8 cipso_off; |
2027 | unsigned char *cipso_ptr; | 2036 | unsigned char *cipso_ptr; |
2028 | int iter; | 2037 | int iter; |
2029 | int optlen_new; | 2038 | int optlen_new; |
2030 | 2039 | ||
2031 | cipso_off = opt->cipso - sizeof(struct iphdr); | 2040 | cipso_off = opt->opt.cipso - sizeof(struct iphdr); |
2032 | cipso_ptr = &opt->__data[cipso_off]; | 2041 | cipso_ptr = &opt->opt.__data[cipso_off]; |
2033 | cipso_len = cipso_ptr[1]; | 2042 | cipso_len = cipso_ptr[1]; |
2034 | 2043 | ||
2035 | if (opt->srr > opt->cipso) | 2044 | if (opt->opt.srr > opt->opt.cipso) |
2036 | opt->srr -= cipso_len; | 2045 | opt->opt.srr -= cipso_len; |
2037 | if (opt->rr > opt->cipso) | 2046 | if (opt->opt.rr > opt->opt.cipso) |
2038 | opt->rr -= cipso_len; | 2047 | opt->opt.rr -= cipso_len; |
2039 | if (opt->ts > opt->cipso) | 2048 | if (opt->opt.ts > opt->opt.cipso) |
2040 | opt->ts -= cipso_len; | 2049 | opt->opt.ts -= cipso_len; |
2041 | if (opt->router_alert > opt->cipso) | 2050 | if (opt->opt.router_alert > opt->opt.cipso) |
2042 | opt->router_alert -= cipso_len; | 2051 | opt->opt.router_alert -= cipso_len; |
2043 | opt->cipso = 0; | 2052 | opt->opt.cipso = 0; |
2044 | 2053 | ||
2045 | memmove(cipso_ptr, cipso_ptr + cipso_len, | 2054 | memmove(cipso_ptr, cipso_ptr + cipso_len, |
2046 | opt->optlen - cipso_off - cipso_len); | 2055 | opt->opt.optlen - cipso_off - cipso_len); |
2047 | 2056 | ||
2048 | /* determining the new total option length is tricky because of | 2057 | /* determining the new total option length is tricky because of |
2049 | * the padding necessary, the only thing i can think to do at | 2058 | * the padding necessary, the only thing i can think to do at |
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) | |||
2052 | * from there we can determine the new total option length */ | 2061 | * from there we can determine the new total option length */ |
2053 | iter = 0; | 2062 | iter = 0; |
2054 | optlen_new = 0; | 2063 | optlen_new = 0; |
2055 | while (iter < opt->optlen) | 2064 | while (iter < opt->opt.optlen) |
2056 | if (opt->__data[iter] != IPOPT_NOP) { | 2065 | if (opt->opt.__data[iter] != IPOPT_NOP) { |
2057 | iter += opt->__data[iter + 1]; | 2066 | iter += opt->opt.__data[iter + 1]; |
2058 | optlen_new = iter; | 2067 | optlen_new = iter; |
2059 | } else | 2068 | } else |
2060 | iter++; | 2069 | iter++; |
2061 | hdr_delta = opt->optlen; | 2070 | hdr_delta = opt->opt.optlen; |
2062 | opt->optlen = (optlen_new + 3) & ~3; | 2071 | opt->opt.optlen = (optlen_new + 3) & ~3; |
2063 | hdr_delta -= opt->optlen; | 2072 | hdr_delta -= opt->opt.optlen; |
2064 | } else { | 2073 | } else { |
2065 | /* only the cipso option was present on the socket so we can | 2074 | /* only the cipso option was present on the socket so we can |
2066 | * remove the entire option struct */ | 2075 | * remove the entire option struct */ |
2067 | *opt_ptr = NULL; | 2076 | *opt_ptr = NULL; |
2068 | hdr_delta = opt->optlen; | 2077 | hdr_delta = opt->opt.optlen; |
2069 | kfree(opt); | 2078 | call_rcu(&opt->rcu, opt_kfree_rcu); |
2070 | } | 2079 | } |
2071 | 2080 | ||
2072 | return hdr_delta; | 2081 | return hdr_delta; |
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) | |||
2083 | void cipso_v4_sock_delattr(struct sock *sk) | 2092 | void cipso_v4_sock_delattr(struct sock *sk) |
2084 | { | 2093 | { |
2085 | int hdr_delta; | 2094 | int hdr_delta; |
2086 | struct ip_options *opt; | 2095 | struct ip_options_rcu *opt; |
2087 | struct inet_sock *sk_inet; | 2096 | struct inet_sock *sk_inet; |
2088 | 2097 | ||
2089 | sk_inet = inet_sk(sk); | 2098 | sk_inet = inet_sk(sk); |
2090 | opt = sk_inet->opt; | 2099 | opt = rcu_dereference_protected(sk_inet->inet_opt, 1); |
2091 | if (opt == NULL || opt->cipso == 0) | 2100 | if (opt == NULL || opt->opt.cipso == 0) |
2092 | return; | 2101 | return; |
2093 | 2102 | ||
2094 | hdr_delta = cipso_v4_delopt(&sk_inet->opt); | 2103 | hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); |
2095 | if (sk_inet->is_icsk && hdr_delta > 0) { | 2104 | if (sk_inet->is_icsk && hdr_delta > 0) { |
2096 | struct inet_connection_sock *sk_conn = inet_csk(sk); | 2105 | struct inet_connection_sock *sk_conn = inet_csk(sk); |
2097 | sk_conn->icsk_ext_hdr_len -= hdr_delta; | 2106 | sk_conn->icsk_ext_hdr_len -= hdr_delta; |
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk) | |||
2109 | */ | 2118 | */ |
2110 | void cipso_v4_req_delattr(struct request_sock *req) | 2119 | void cipso_v4_req_delattr(struct request_sock *req) |
2111 | { | 2120 | { |
2112 | struct ip_options *opt; | 2121 | struct ip_options_rcu *opt; |
2113 | struct inet_request_sock *req_inet; | 2122 | struct inet_request_sock *req_inet; |
2114 | 2123 | ||
2115 | req_inet = inet_rsk(req); | 2124 | req_inet = inet_rsk(req); |
2116 | opt = req_inet->opt; | 2125 | opt = req_inet->opt; |
2117 | if (opt == NULL || opt->cipso == 0) | 2126 | if (opt == NULL || opt->opt.cipso == 0) |
2118 | return; | 2127 | return; |
2119 | 2128 | ||
2120 | cipso_v4_delopt(&req_inet->opt); | 2129 | cipso_v4_delopt(&req_inet->opt); |
@@ -2184,14 +2193,18 @@ getattr_return: | |||
2184 | */ | 2193 | */ |
2185 | int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) | 2194 | int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) |
2186 | { | 2195 | { |
2187 | struct ip_options *opt; | 2196 | struct ip_options_rcu *opt; |
2197 | int res = -ENOMSG; | ||
2188 | 2198 | ||
2189 | opt = inet_sk(sk)->opt; | 2199 | rcu_read_lock(); |
2190 | if (opt == NULL || opt->cipso == 0) | 2200 | opt = rcu_dereference(inet_sk(sk)->inet_opt); |
2191 | return -ENOMSG; | 2201 | if (opt && opt->opt.cipso) |
2192 | 2202 | res = cipso_v4_getattr(opt->opt.__data + | |
2193 | return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), | 2203 | opt->opt.cipso - |
2194 | secattr); | 2204 | sizeof(struct iphdr), |
2205 | secattr); | ||
2206 | rcu_read_unlock(); | ||
2207 | return res; | ||
2195 | } | 2208 | } |
2196 | 2209 | ||
2197 | /** | 2210 | /** |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 74e35e5736e2..cfeca3c2152d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -108,8 +108,7 @@ struct icmp_bxm { | |||
108 | __be32 times[3]; | 108 | __be32 times[3]; |
109 | } data; | 109 | } data; |
110 | int head_len; | 110 | int head_len; |
111 | struct ip_options replyopts; | 111 | struct ip_options_data replyopts; |
112 | unsigned char optbuf[40]; | ||
113 | }; | 112 | }; |
114 | 113 | ||
115 | /* An array of errno for error messages from dest unreach. */ | 114 | /* An array of errno for error messages from dest unreach. */ |
@@ -333,7 +332,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
333 | struct inet_sock *inet; | 332 | struct inet_sock *inet; |
334 | __be32 daddr; | 333 | __be32 daddr; |
335 | 334 | ||
336 | if (ip_options_echo(&icmp_param->replyopts, skb)) | 335 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) |
337 | return; | 336 | return; |
338 | 337 | ||
339 | sk = icmp_xmit_lock(net); | 338 | sk = icmp_xmit_lock(net); |
@@ -347,10 +346,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
347 | daddr = ipc.addr = rt->rt_src; | 346 | daddr = ipc.addr = rt->rt_src; |
348 | ipc.opt = NULL; | 347 | ipc.opt = NULL; |
349 | ipc.tx_flags = 0; | 348 | ipc.tx_flags = 0; |
350 | if (icmp_param->replyopts.optlen) { | 349 | if (icmp_param->replyopts.opt.opt.optlen) { |
351 | ipc.opt = &icmp_param->replyopts; | 350 | ipc.opt = &icmp_param->replyopts.opt; |
352 | if (ipc.opt->srr) | 351 | if (ipc.opt->opt.srr) |
353 | daddr = icmp_param->replyopts.faddr; | 352 | daddr = icmp_param->replyopts.opt.opt.faddr; |
354 | } | 353 | } |
355 | { | 354 | { |
356 | struct flowi4 fl4 = { | 355 | struct flowi4 fl4 = { |
@@ -379,8 +378,8 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | |||
379 | struct icmp_bxm *param) | 378 | struct icmp_bxm *param) |
380 | { | 379 | { |
381 | struct flowi4 fl4 = { | 380 | struct flowi4 fl4 = { |
382 | .daddr = (param->replyopts.srr ? | 381 | .daddr = (param->replyopts.opt.opt.srr ? |
383 | param->replyopts.faddr : iph->saddr), | 382 | param->replyopts.opt.opt.faddr : iph->saddr), |
384 | .saddr = saddr, | 383 | .saddr = saddr, |
385 | .flowi4_tos = RT_TOS(tos), | 384 | .flowi4_tos = RT_TOS(tos), |
386 | .flowi4_proto = IPPROTO_ICMP, | 385 | .flowi4_proto = IPPROTO_ICMP, |
@@ -581,7 +580,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
581 | IPTOS_PREC_INTERNETCONTROL) : | 580 | IPTOS_PREC_INTERNETCONTROL) : |
582 | iph->tos; | 581 | iph->tos; |
583 | 582 | ||
584 | if (ip_options_echo(&icmp_param.replyopts, skb_in)) | 583 | if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) |
585 | goto out_unlock; | 584 | goto out_unlock; |
586 | 585 | ||
587 | 586 | ||
@@ -597,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
597 | icmp_param.offset = skb_network_offset(skb_in); | 596 | icmp_param.offset = skb_network_offset(skb_in); |
598 | inet_sk(sk)->tos = tos; | 597 | inet_sk(sk)->tos = tos; |
599 | ipc.addr = iph->saddr; | 598 | ipc.addr = iph->saddr; |
600 | ipc.opt = &icmp_param.replyopts; | 599 | ipc.opt = &icmp_param.replyopts.opt; |
601 | ipc.tx_flags = 0; | 600 | ipc.tx_flags = 0; |
602 | 601 | ||
603 | rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, | 602 | rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, |
@@ -613,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
613 | room = dst_mtu(&rt->dst); | 612 | room = dst_mtu(&rt->dst); |
614 | if (room > 576) | 613 | if (room > 576) |
615 | room = 576; | 614 | room = 576; |
616 | room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; | 615 | room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; |
617 | room -= sizeof(struct icmphdr); | 616 | room -= sizeof(struct icmphdr); |
618 | 617 | ||
619 | icmp_param.data_len = skb_in->len - icmp_param.offset; | 618 | icmp_param.data_len = skb_in->len - icmp_param.offset; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8514db54a7f4..3282cb2de393 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -354,20 +354,20 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
354 | { | 354 | { |
355 | struct rtable *rt; | 355 | struct rtable *rt; |
356 | const struct inet_request_sock *ireq = inet_rsk(req); | 356 | const struct inet_request_sock *ireq = inet_rsk(req); |
357 | struct ip_options *opt = inet_rsk(req)->opt; | 357 | struct ip_options_rcu *opt = inet_rsk(req)->opt; |
358 | struct net *net = sock_net(sk); | 358 | struct net *net = sock_net(sk); |
359 | struct flowi4 fl4; | 359 | struct flowi4 fl4; |
360 | 360 | ||
361 | flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, | 361 | flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, |
362 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | 362 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
363 | sk->sk_protocol, inet_sk_flowi_flags(sk), | 363 | sk->sk_protocol, inet_sk_flowi_flags(sk), |
364 | (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, | 364 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, |
365 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); | 365 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); |
366 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | 366 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
367 | rt = ip_route_output_flow(net, &fl4, sk); | 367 | rt = ip_route_output_flow(net, &fl4, sk); |
368 | if (IS_ERR(rt)) | 368 | if (IS_ERR(rt)) |
369 | goto no_route; | 369 | goto no_route; |
370 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 370 | if (opt && opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway) |
371 | goto route_err; | 371 | goto route_err; |
372 | return &rt->dst; | 372 | return &rt->dst; |
373 | 373 | ||
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2391b24e8251..01fc40965848 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -36,7 +36,7 @@ | |||
36 | * saddr is address of outgoing interface. | 36 | * saddr is address of outgoing interface. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | 39 | void ip_options_build(struct sk_buff *skb, struct ip_options *opt, |
40 | __be32 daddr, struct rtable *rt, int is_frag) | 40 | __be32 daddr, struct rtable *rt, int is_frag) |
41 | { | 41 | { |
42 | unsigned char *iph = skb_network_header(skb); | 42 | unsigned char *iph = skb_network_header(skb); |
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | |||
83 | * NOTE: dopt cannot point to skb. | 83 | * NOTE: dopt cannot point to skb. |
84 | */ | 84 | */ |
85 | 85 | ||
86 | int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | 86 | int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) |
87 | { | 87 | { |
88 | struct ip_options *sopt; | 88 | const struct ip_options *sopt; |
89 | unsigned char *sptr, *dptr; | 89 | unsigned char *sptr, *dptr; |
90 | int soffset, doffset; | 90 | int soffset, doffset; |
91 | int optlen; | 91 | int optlen; |
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
95 | 95 | ||
96 | sopt = &(IPCB(skb)->opt); | 96 | sopt = &(IPCB(skb)->opt); |
97 | 97 | ||
98 | if (sopt->optlen == 0) { | 98 | if (sopt->optlen == 0) |
99 | dopt->optlen = 0; | ||
100 | return 0; | 99 | return 0; |
101 | } | ||
102 | 100 | ||
103 | sptr = skb_network_header(skb); | 101 | sptr = skb_network_header(skb); |
104 | dptr = dopt->__data; | 102 | dptr = dopt->__data; |
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
157 | dopt->optlen += optlen; | 155 | dopt->optlen += optlen; |
158 | } | 156 | } |
159 | if (sopt->srr) { | 157 | if (sopt->srr) { |
160 | unsigned char * start = sptr+sopt->srr; | 158 | unsigned char *start = sptr+sopt->srr; |
161 | __be32 faddr; | 159 | __be32 faddr; |
162 | 160 | ||
163 | optlen = start[1]; | 161 | optlen = start[1]; |
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt) | |||
499 | } | 497 | } |
500 | } | 498 | } |
501 | 499 | ||
502 | static struct ip_options *ip_options_get_alloc(const int optlen) | 500 | static struct ip_options_rcu *ip_options_get_alloc(const int optlen) |
503 | { | 501 | { |
504 | return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), | 502 | return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), |
505 | GFP_KERNEL); | 503 | GFP_KERNEL); |
506 | } | 504 | } |
507 | 505 | ||
508 | static int ip_options_get_finish(struct net *net, struct ip_options **optp, | 506 | static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, |
509 | struct ip_options *opt, int optlen) | 507 | struct ip_options_rcu *opt, int optlen) |
510 | { | 508 | { |
511 | while (optlen & 3) | 509 | while (optlen & 3) |
512 | opt->__data[optlen++] = IPOPT_END; | 510 | opt->opt.__data[optlen++] = IPOPT_END; |
513 | opt->optlen = optlen; | 511 | opt->opt.optlen = optlen; |
514 | if (optlen && ip_options_compile(net, opt, NULL)) { | 512 | if (optlen && ip_options_compile(net, &opt->opt, NULL)) { |
515 | kfree(opt); | 513 | kfree(opt); |
516 | return -EINVAL; | 514 | return -EINVAL; |
517 | } | 515 | } |
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp, | |||
520 | return 0; | 518 | return 0; |
521 | } | 519 | } |
522 | 520 | ||
523 | int ip_options_get_from_user(struct net *net, struct ip_options **optp, | 521 | int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, |
524 | unsigned char __user *data, int optlen) | 522 | unsigned char __user *data, int optlen) |
525 | { | 523 | { |
526 | struct ip_options *opt = ip_options_get_alloc(optlen); | 524 | struct ip_options_rcu *opt = ip_options_get_alloc(optlen); |
527 | 525 | ||
528 | if (!opt) | 526 | if (!opt) |
529 | return -ENOMEM; | 527 | return -ENOMEM; |
530 | if (optlen && copy_from_user(opt->__data, data, optlen)) { | 528 | if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { |
531 | kfree(opt); | 529 | kfree(opt); |
532 | return -EFAULT; | 530 | return -EFAULT; |
533 | } | 531 | } |
534 | return ip_options_get_finish(net, optp, opt, optlen); | 532 | return ip_options_get_finish(net, optp, opt, optlen); |
535 | } | 533 | } |
536 | 534 | ||
537 | int ip_options_get(struct net *net, struct ip_options **optp, | 535 | int ip_options_get(struct net *net, struct ip_options_rcu **optp, |
538 | unsigned char *data, int optlen) | 536 | unsigned char *data, int optlen) |
539 | { | 537 | { |
540 | struct ip_options *opt = ip_options_get_alloc(optlen); | 538 | struct ip_options_rcu *opt = ip_options_get_alloc(optlen); |
541 | 539 | ||
542 | if (!opt) | 540 | if (!opt) |
543 | return -ENOMEM; | 541 | return -ENOMEM; |
544 | if (optlen) | 542 | if (optlen) |
545 | memcpy(opt->__data, data, optlen); | 543 | memcpy(opt->opt.__data, data, optlen); |
546 | return ip_options_get_finish(net, optp, opt, optlen); | 544 | return ip_options_get_finish(net, optp, opt, optlen); |
547 | } | 545 | } |
548 | 546 | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index bdad3d60aa82..362e66f7d2fb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | |||
140 | * | 140 | * |
141 | */ | 141 | */ |
142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | 142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, |
143 | __be32 saddr, __be32 daddr, struct ip_options *opt) | 143 | __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) |
144 | { | 144 | { |
145 | struct inet_sock *inet = inet_sk(sk); | 145 | struct inet_sock *inet = inet_sk(sk); |
146 | struct rtable *rt = skb_rtable(skb); | 146 | struct rtable *rt = skb_rtable(skb); |
147 | struct iphdr *iph; | 147 | struct iphdr *iph; |
148 | 148 | ||
149 | /* Build the IP header. */ | 149 | /* Build the IP header. */ |
150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); |
151 | skb_reset_network_header(skb); | 151 | skb_reset_network_header(skb); |
152 | iph = ip_hdr(skb); | 152 | iph = ip_hdr(skb); |
153 | iph->version = 4; | 153 | iph->version = 4; |
@@ -163,9 +163,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
163 | iph->protocol = sk->sk_protocol; | 163 | iph->protocol = sk->sk_protocol; |
164 | ip_select_ident(iph, &rt->dst, sk); | 164 | ip_select_ident(iph, &rt->dst, sk); |
165 | 165 | ||
166 | if (opt && opt->optlen) { | 166 | if (opt && opt->opt.optlen) { |
167 | iph->ihl += opt->optlen>>2; | 167 | iph->ihl += opt->opt.optlen>>2; |
168 | ip_options_build(skb, opt, daddr, rt, 0); | 168 | ip_options_build(skb, &opt->opt, daddr, rt, 0); |
169 | } | 169 | } |
170 | 170 | ||
171 | skb->priority = sk->sk_priority; | 171 | skb->priority = sk->sk_priority; |
@@ -316,7 +316,7 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
316 | { | 316 | { |
317 | struct sock *sk = skb->sk; | 317 | struct sock *sk = skb->sk; |
318 | struct inet_sock *inet = inet_sk(sk); | 318 | struct inet_sock *inet = inet_sk(sk); |
319 | struct ip_options *opt = inet->opt; | 319 | struct ip_options_rcu *inet_opt; |
320 | struct rtable *rt; | 320 | struct rtable *rt; |
321 | struct iphdr *iph; | 321 | struct iphdr *iph; |
322 | int res; | 322 | int res; |
@@ -325,6 +325,7 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
325 | * f.e. by something like SCTP. | 325 | * f.e. by something like SCTP. |
326 | */ | 326 | */ |
327 | rcu_read_lock(); | 327 | rcu_read_lock(); |
328 | inet_opt = rcu_dereference(inet->inet_opt); | ||
328 | rt = skb_rtable(skb); | 329 | rt = skb_rtable(skb); |
329 | if (rt != NULL) | 330 | if (rt != NULL) |
330 | goto packet_routed; | 331 | goto packet_routed; |
@@ -336,8 +337,8 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
336 | 337 | ||
337 | /* Use correct destination address if we have options. */ | 338 | /* Use correct destination address if we have options. */ |
338 | daddr = inet->inet_daddr; | 339 | daddr = inet->inet_daddr; |
339 | if(opt && opt->srr) | 340 | if (inet_opt && inet_opt->opt.srr) |
340 | daddr = opt->faddr; | 341 | daddr = inet_opt->opt.faddr; |
341 | 342 | ||
342 | /* If this fails, retransmit mechanism of transport layer will | 343 | /* If this fails, retransmit mechanism of transport layer will |
343 | * keep trying until route appears or the connection times | 344 | * keep trying until route appears or the connection times |
@@ -357,11 +358,11 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
357 | skb_dst_set_noref(skb, &rt->dst); | 358 | skb_dst_set_noref(skb, &rt->dst); |
358 | 359 | ||
359 | packet_routed: | 360 | packet_routed: |
360 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 361 | if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway) |
361 | goto no_route; | 362 | goto no_route; |
362 | 363 | ||
363 | /* OK, we know where to send it, allocate and build IP header. */ | 364 | /* OK, we know where to send it, allocate and build IP header. */ |
364 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 365 | skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); |
365 | skb_reset_network_header(skb); | 366 | skb_reset_network_header(skb); |
366 | iph = ip_hdr(skb); | 367 | iph = ip_hdr(skb); |
367 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 368 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
@@ -375,9 +376,9 @@ packet_routed: | |||
375 | iph->daddr = rt->rt_dst; | 376 | iph->daddr = rt->rt_dst; |
376 | /* Transport layer set skb->h.foo itself. */ | 377 | /* Transport layer set skb->h.foo itself. */ |
377 | 378 | ||
378 | if (opt && opt->optlen) { | 379 | if (inet_opt && inet_opt->opt.optlen) { |
379 | iph->ihl += opt->optlen >> 2; | 380 | iph->ihl += inet_opt->opt.optlen >> 2; |
380 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); | 381 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); |
381 | } | 382 | } |
382 | 383 | ||
383 | ip_select_ident_more(iph, &rt->dst, sk, | 384 | ip_select_ident_more(iph, &rt->dst, sk, |
@@ -1033,7 +1034,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1033 | struct ipcm_cookie *ipc, struct rtable **rtp) | 1034 | struct ipcm_cookie *ipc, struct rtable **rtp) |
1034 | { | 1035 | { |
1035 | struct inet_sock *inet = inet_sk(sk); | 1036 | struct inet_sock *inet = inet_sk(sk); |
1036 | struct ip_options *opt; | 1037 | struct ip_options_rcu *opt; |
1037 | struct rtable *rt; | 1038 | struct rtable *rt; |
1038 | 1039 | ||
1039 | /* | 1040 | /* |
@@ -1047,7 +1048,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1047 | if (unlikely(cork->opt == NULL)) | 1048 | if (unlikely(cork->opt == NULL)) |
1048 | return -ENOBUFS; | 1049 | return -ENOBUFS; |
1049 | } | 1050 | } |
1050 | memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); | 1051 | memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); |
1051 | cork->flags |= IPCORK_OPT; | 1052 | cork->flags |= IPCORK_OPT; |
1052 | cork->addr = ipc->addr; | 1053 | cork->addr = ipc->addr; |
1053 | } | 1054 | } |
@@ -1451,26 +1452,23 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1451 | unsigned int len) | 1452 | unsigned int len) |
1452 | { | 1453 | { |
1453 | struct inet_sock *inet = inet_sk(sk); | 1454 | struct inet_sock *inet = inet_sk(sk); |
1454 | struct { | 1455 | struct ip_options_data replyopts; |
1455 | struct ip_options opt; | ||
1456 | char data[40]; | ||
1457 | } replyopts; | ||
1458 | struct ipcm_cookie ipc; | 1456 | struct ipcm_cookie ipc; |
1459 | __be32 daddr; | 1457 | __be32 daddr; |
1460 | struct rtable *rt = skb_rtable(skb); | 1458 | struct rtable *rt = skb_rtable(skb); |
1461 | 1459 | ||
1462 | if (ip_options_echo(&replyopts.opt, skb)) | 1460 | if (ip_options_echo(&replyopts.opt.opt, skb)) |
1463 | return; | 1461 | return; |
1464 | 1462 | ||
1465 | daddr = ipc.addr = rt->rt_src; | 1463 | daddr = ipc.addr = rt->rt_src; |
1466 | ipc.opt = NULL; | 1464 | ipc.opt = NULL; |
1467 | ipc.tx_flags = 0; | 1465 | ipc.tx_flags = 0; |
1468 | 1466 | ||
1469 | if (replyopts.opt.optlen) { | 1467 | if (replyopts.opt.opt.optlen) { |
1470 | ipc.opt = &replyopts.opt; | 1468 | ipc.opt = &replyopts.opt; |
1471 | 1469 | ||
1472 | if (ipc.opt->srr) | 1470 | if (replyopts.opt.opt.srr) |
1473 | daddr = replyopts.opt.faddr; | 1471 | daddr = replyopts.opt.opt.faddr; |
1474 | } | 1472 | } |
1475 | 1473 | ||
1476 | { | 1474 | { |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 9640900309bb..ab0c9efd1efa 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -451,6 +451,11 @@ out: | |||
451 | } | 451 | } |
452 | 452 | ||
453 | 453 | ||
454 | static void opt_kfree_rcu(struct rcu_head *head) | ||
455 | { | ||
456 | kfree(container_of(head, struct ip_options_rcu, rcu)); | ||
457 | } | ||
458 | |||
454 | /* | 459 | /* |
455 | * Socket option code for IP. This is the end of the line after any | 460 | * Socket option code for IP. This is the end of the line after any |
456 | * TCP,UDP etc options on an IP socket. | 461 | * TCP,UDP etc options on an IP socket. |
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
497 | switch (optname) { | 502 | switch (optname) { |
498 | case IP_OPTIONS: | 503 | case IP_OPTIONS: |
499 | { | 504 | { |
500 | struct ip_options *opt = NULL; | 505 | struct ip_options_rcu *old, *opt = NULL; |
506 | |||
501 | if (optlen > 40) | 507 | if (optlen > 40) |
502 | goto e_inval; | 508 | goto e_inval; |
503 | err = ip_options_get_from_user(sock_net(sk), &opt, | 509 | err = ip_options_get_from_user(sock_net(sk), &opt, |
504 | optval, optlen); | 510 | optval, optlen); |
505 | if (err) | 511 | if (err) |
506 | break; | 512 | break; |
513 | old = rcu_dereference_protected(inet->inet_opt, | ||
514 | sock_owned_by_user(sk)); | ||
507 | if (inet->is_icsk) { | 515 | if (inet->is_icsk) { |
508 | struct inet_connection_sock *icsk = inet_csk(sk); | 516 | struct inet_connection_sock *icsk = inet_csk(sk); |
509 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 517 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
512 | (TCPF_LISTEN | TCPF_CLOSE)) && | 520 | (TCPF_LISTEN | TCPF_CLOSE)) && |
513 | inet->inet_daddr != LOOPBACK4_IPV6)) { | 521 | inet->inet_daddr != LOOPBACK4_IPV6)) { |
514 | #endif | 522 | #endif |
515 | if (inet->opt) | 523 | if (old) |
516 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; | 524 | icsk->icsk_ext_hdr_len -= old->opt.optlen; |
517 | if (opt) | 525 | if (opt) |
518 | icsk->icsk_ext_hdr_len += opt->optlen; | 526 | icsk->icsk_ext_hdr_len += opt->opt.optlen; |
519 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); | 527 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); |
520 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 528 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
521 | } | 529 | } |
522 | #endif | 530 | #endif |
523 | } | 531 | } |
524 | opt = xchg(&inet->opt, opt); | 532 | rcu_assign_pointer(inet->inet_opt, opt); |
525 | kfree(opt); | 533 | if (old) |
534 | call_rcu(&old->rcu, opt_kfree_rcu); | ||
526 | break; | 535 | break; |
527 | } | 536 | } |
528 | case IP_PKTINFO: | 537 | case IP_PKTINFO: |
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1081 | case IP_OPTIONS: | 1090 | case IP_OPTIONS: |
1082 | { | 1091 | { |
1083 | unsigned char optbuf[sizeof(struct ip_options)+40]; | 1092 | unsigned char optbuf[sizeof(struct ip_options)+40]; |
1084 | struct ip_options * opt = (struct ip_options *)optbuf; | 1093 | struct ip_options *opt = (struct ip_options *)optbuf; |
1094 | struct ip_options_rcu *inet_opt; | ||
1095 | |||
1096 | inet_opt = rcu_dereference_protected(inet->inet_opt, | ||
1097 | sock_owned_by_user(sk)); | ||
1085 | opt->optlen = 0; | 1098 | opt->optlen = 0; |
1086 | if (inet->opt) | 1099 | if (inet_opt) |
1087 | memcpy(optbuf, inet->opt, | 1100 | memcpy(optbuf, &inet_opt->opt, |
1088 | sizeof(struct ip_options)+ | 1101 | sizeof(struct ip_options) + |
1089 | inet->opt->optlen); | 1102 | inet_opt->opt.optlen); |
1090 | release_sock(sk); | 1103 | release_sock(sk); |
1091 | 1104 | ||
1092 | if (opt->optlen == 0) | 1105 | if (opt->optlen == 0) |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index abf14dbcb3b9..a8659e0c4a6e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -460,6 +460,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
460 | __be32 saddr; | 460 | __be32 saddr; |
461 | u8 tos; | 461 | u8 tos; |
462 | int err; | 462 | int err; |
463 | struct ip_options_data opt_copy; | ||
463 | 464 | ||
464 | err = -EMSGSIZE; | 465 | err = -EMSGSIZE; |
465 | if (len > 0xFFFF) | 466 | if (len > 0xFFFF) |
@@ -520,8 +521,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
520 | saddr = ipc.addr; | 521 | saddr = ipc.addr; |
521 | ipc.addr = daddr; | 522 | ipc.addr = daddr; |
522 | 523 | ||
523 | if (!ipc.opt) | 524 | if (!ipc.opt) { |
524 | ipc.opt = inet->opt; | 525 | struct ip_options_rcu *inet_opt; |
526 | |||
527 | rcu_read_lock(); | ||
528 | inet_opt = rcu_dereference(inet->inet_opt); | ||
529 | if (inet_opt) { | ||
530 | memcpy(&opt_copy, inet_opt, | ||
531 | sizeof(*inet_opt) + inet_opt->opt.optlen); | ||
532 | ipc.opt = &opt_copy.opt; | ||
533 | } | ||
534 | rcu_read_unlock(); | ||
535 | } | ||
525 | 536 | ||
526 | if (ipc.opt) { | 537 | if (ipc.opt) { |
527 | err = -EINVAL; | 538 | err = -EINVAL; |
@@ -530,10 +541,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
530 | */ | 541 | */ |
531 | if (inet->hdrincl) | 542 | if (inet->hdrincl) |
532 | goto done; | 543 | goto done; |
533 | if (ipc.opt->srr) { | 544 | if (ipc.opt->opt.srr) { |
534 | if (!daddr) | 545 | if (!daddr) |
535 | goto done; | 546 | goto done; |
536 | daddr = ipc.opt->faddr; | 547 | daddr = ipc.opt->opt.faddr; |
537 | } | 548 | } |
538 | } | 549 | } |
539 | tos = RT_CONN_FLAGS(sk); | 550 | tos = RT_CONN_FLAGS(sk); |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 71e029691908..26461492a847 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
321 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 321 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
322 | */ | 322 | */ |
323 | if (opt && opt->optlen) { | 323 | if (opt && opt->optlen) { |
324 | int opt_size = sizeof(struct ip_options) + opt->optlen; | 324 | int opt_size = sizeof(struct ip_options_rcu) + opt->optlen; |
325 | 325 | ||
326 | ireq->opt = kmalloc(opt_size, GFP_ATOMIC); | 326 | ireq->opt = kmalloc(opt_size, GFP_ATOMIC); |
327 | if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { | 327 | if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) { |
328 | kfree(ireq->opt); | 328 | kfree(ireq->opt); |
329 | ireq->opt = NULL; | 329 | ireq->opt = NULL; |
330 | } | 330 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 310454c2f4d1..d60732fe5f21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -154,6 +154,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
154 | struct flowi4 fl4; | 154 | struct flowi4 fl4; |
155 | struct rtable *rt; | 155 | struct rtable *rt; |
156 | int err; | 156 | int err; |
157 | struct ip_options_rcu *inet_opt; | ||
157 | 158 | ||
158 | if (addr_len < sizeof(struct sockaddr_in)) | 159 | if (addr_len < sizeof(struct sockaddr_in)) |
159 | return -EINVAL; | 160 | return -EINVAL; |
@@ -162,10 +163,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
162 | return -EAFNOSUPPORT; | 163 | return -EAFNOSUPPORT; |
163 | 164 | ||
164 | nexthop = daddr = usin->sin_addr.s_addr; | 165 | nexthop = daddr = usin->sin_addr.s_addr; |
165 | if (inet->opt && inet->opt->srr) { | 166 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
167 | sock_owned_by_user(sk)); | ||
168 | if (inet_opt && inet_opt->opt.srr) { | ||
166 | if (!daddr) | 169 | if (!daddr) |
167 | return -EINVAL; | 170 | return -EINVAL; |
168 | nexthop = inet->opt->faddr; | 171 | nexthop = inet_opt->opt.faddr; |
169 | } | 172 | } |
170 | 173 | ||
171 | orig_sport = inet->inet_sport; | 174 | orig_sport = inet->inet_sport; |
@@ -186,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
186 | return -ENETUNREACH; | 189 | return -ENETUNREACH; |
187 | } | 190 | } |
188 | 191 | ||
189 | if (!inet->opt || !inet->opt->srr) | 192 | if (!inet_opt || !inet_opt->opt.srr) |
190 | daddr = rt->rt_dst; | 193 | daddr = rt->rt_dst; |
191 | 194 | ||
192 | if (!inet->inet_saddr) | 195 | if (!inet->inet_saddr) |
@@ -222,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
222 | inet->inet_daddr = daddr; | 225 | inet->inet_daddr = daddr; |
223 | 226 | ||
224 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 227 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
225 | if (inet->opt) | 228 | if (inet_opt) |
226 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; | 229 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
227 | 230 | ||
228 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; | 231 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; |
229 | 232 | ||
@@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb) | |||
820 | /* | 823 | /* |
821 | * Save and compile IPv4 options into the request_sock if needed. | 824 | * Save and compile IPv4 options into the request_sock if needed. |
822 | */ | 825 | */ |
823 | static struct ip_options *tcp_v4_save_options(struct sock *sk, | 826 | static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, |
824 | struct sk_buff *skb) | 827 | struct sk_buff *skb) |
825 | { | 828 | { |
826 | struct ip_options *opt = &(IPCB(skb)->opt); | 829 | const struct ip_options *opt = &(IPCB(skb)->opt); |
827 | struct ip_options *dopt = NULL; | 830 | struct ip_options_rcu *dopt = NULL; |
828 | 831 | ||
829 | if (opt && opt->optlen) { | 832 | if (opt && opt->optlen) { |
830 | int opt_size = optlength(opt); | 833 | int opt_size = sizeof(*dopt) + opt->optlen; |
834 | |||
831 | dopt = kmalloc(opt_size, GFP_ATOMIC); | 835 | dopt = kmalloc(opt_size, GFP_ATOMIC); |
832 | if (dopt) { | 836 | if (dopt) { |
833 | if (ip_options_echo(dopt, skb)) { | 837 | if (ip_options_echo(&dopt->opt, skb)) { |
834 | kfree(dopt); | 838 | kfree(dopt); |
835 | dopt = NULL; | 839 | dopt = NULL; |
836 | } | 840 | } |
@@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1411 | #ifdef CONFIG_TCP_MD5SIG | 1415 | #ifdef CONFIG_TCP_MD5SIG |
1412 | struct tcp_md5sig_key *key; | 1416 | struct tcp_md5sig_key *key; |
1413 | #endif | 1417 | #endif |
1418 | struct ip_options_rcu *inet_opt; | ||
1414 | 1419 | ||
1415 | if (sk_acceptq_is_full(sk)) | 1420 | if (sk_acceptq_is_full(sk)) |
1416 | goto exit_overflow; | 1421 | goto exit_overflow; |
@@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1431 | newinet->inet_daddr = ireq->rmt_addr; | 1436 | newinet->inet_daddr = ireq->rmt_addr; |
1432 | newinet->inet_rcv_saddr = ireq->loc_addr; | 1437 | newinet->inet_rcv_saddr = ireq->loc_addr; |
1433 | newinet->inet_saddr = ireq->loc_addr; | 1438 | newinet->inet_saddr = ireq->loc_addr; |
1434 | newinet->opt = ireq->opt; | 1439 | inet_opt = ireq->opt; |
1440 | rcu_assign_pointer(newinet->inet_opt, inet_opt); | ||
1435 | ireq->opt = NULL; | 1441 | ireq->opt = NULL; |
1436 | newinet->mc_index = inet_iif(skb); | 1442 | newinet->mc_index = inet_iif(skb); |
1437 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 1443 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1438 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1444 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1439 | if (newinet->opt) | 1445 | if (inet_opt) |
1440 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; | 1446 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
1441 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1447 | newinet->inet_id = newtp->write_seq ^ jiffies; |
1442 | 1448 | ||
1443 | tcp_mtup_init(newsk); | 1449 | tcp_mtup_init(newsk); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bc0dab2593e0..544f435d1aff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
804 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 804 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
805 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 805 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
806 | struct sk_buff *skb; | 806 | struct sk_buff *skb; |
807 | struct ip_options_data opt_copy; | ||
807 | 808 | ||
808 | if (len > 0xFFFF) | 809 | if (len > 0xFFFF) |
809 | return -EMSGSIZE; | 810 | return -EMSGSIZE; |
@@ -877,22 +878,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
877 | free = 1; | 878 | free = 1; |
878 | connected = 0; | 879 | connected = 0; |
879 | } | 880 | } |
880 | if (!ipc.opt) | 881 | if (!ipc.opt) { |
881 | ipc.opt = inet->opt; | 882 | struct ip_options_rcu *inet_opt; |
883 | |||
884 | rcu_read_lock(); | ||
885 | inet_opt = rcu_dereference(inet->inet_opt); | ||
886 | if (inet_opt) { | ||
887 | memcpy(&opt_copy, inet_opt, | ||
888 | sizeof(*inet_opt) + inet_opt->opt.optlen); | ||
889 | ipc.opt = &opt_copy.opt; | ||
890 | } | ||
891 | rcu_read_unlock(); | ||
892 | } | ||
882 | 893 | ||
883 | saddr = ipc.addr; | 894 | saddr = ipc.addr; |
884 | ipc.addr = faddr = daddr; | 895 | ipc.addr = faddr = daddr; |
885 | 896 | ||
886 | if (ipc.opt && ipc.opt->srr) { | 897 | if (ipc.opt && ipc.opt->opt.srr) { |
887 | if (!daddr) | 898 | if (!daddr) |
888 | return -EINVAL; | 899 | return -EINVAL; |
889 | faddr = ipc.opt->faddr; | 900 | faddr = ipc.opt->opt.faddr; |
890 | connected = 0; | 901 | connected = 0; |
891 | } | 902 | } |
892 | tos = RT_TOS(inet->tos); | 903 | tos = RT_TOS(inet->tos); |
893 | if (sock_flag(sk, SOCK_LOCALROUTE) || | 904 | if (sock_flag(sk, SOCK_LOCALROUTE) || |
894 | (msg->msg_flags & MSG_DONTROUTE) || | 905 | (msg->msg_flags & MSG_DONTROUTE) || |
895 | (ipc.opt && ipc.opt->is_strictroute)) { | 906 | (ipc.opt && ipc.opt->opt.is_strictroute)) { |
896 | tos |= RTO_ONLINK; | 907 | tos |= RTO_ONLINK; |
897 | connected = 0; | 908 | connected = 0; |
898 | } | 909 | } |