aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-04-21 05:45:37 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-28 16:16:35 -0400
commitf6d8bd051c391c1c0458a30b2a7abcd939329259 (patch)
tree1dc4daecdeb0b42c2c6b59d7d6b41e091c11db5f /net/ipv4/tcp_ipv4.c
parent0a14842f5a3c0e88a1e59fac5c3025db39721f74 (diff)
inet: add RCU protection to inet->opt
We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c34
1 files changed, 20 insertions, 14 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 310454c2f4d1..d60732fe5f21 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -154,6 +154,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
154 struct flowi4 fl4; 154 struct flowi4 fl4;
155 struct rtable *rt; 155 struct rtable *rt;
156 int err; 156 int err;
157 struct ip_options_rcu *inet_opt;
157 158
158 if (addr_len < sizeof(struct sockaddr_in)) 159 if (addr_len < sizeof(struct sockaddr_in))
159 return -EINVAL; 160 return -EINVAL;
@@ -162,10 +163,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
162 return -EAFNOSUPPORT; 163 return -EAFNOSUPPORT;
163 164
164 nexthop = daddr = usin->sin_addr.s_addr; 165 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) { 166 inet_opt = rcu_dereference_protected(inet->inet_opt,
167 sock_owned_by_user(sk));
168 if (inet_opt && inet_opt->opt.srr) {
166 if (!daddr) 169 if (!daddr)
167 return -EINVAL; 170 return -EINVAL;
168 nexthop = inet->opt->faddr; 171 nexthop = inet_opt->opt.faddr;
169 } 172 }
170 173
171 orig_sport = inet->inet_sport; 174 orig_sport = inet->inet_sport;
@@ -186,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
186 return -ENETUNREACH; 189 return -ENETUNREACH;
187 } 190 }
188 191
189 if (!inet->opt || !inet->opt->srr) 192 if (!inet_opt || !inet_opt->opt.srr)
190 daddr = rt->rt_dst; 193 daddr = rt->rt_dst;
191 194
192 if (!inet->inet_saddr) 195 if (!inet->inet_saddr)
@@ -222,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
222 inet->inet_daddr = daddr; 225 inet->inet_daddr = daddr;
223 226
224 inet_csk(sk)->icsk_ext_hdr_len = 0; 227 inet_csk(sk)->icsk_ext_hdr_len = 0;
225 if (inet->opt) 228 if (inet_opt)
226 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 229 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
227 230
228 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 231 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
229 232
@@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
820/* 823/*
821 * Save and compile IPv4 options into the request_sock if needed. 824 * Save and compile IPv4 options into the request_sock if needed.
822 */ 825 */
823static struct ip_options *tcp_v4_save_options(struct sock *sk, 826static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
824 struct sk_buff *skb) 827 struct sk_buff *skb)
825{ 828{
826 struct ip_options *opt = &(IPCB(skb)->opt); 829 const struct ip_options *opt = &(IPCB(skb)->opt);
827 struct ip_options *dopt = NULL; 830 struct ip_options_rcu *dopt = NULL;
828 831
829 if (opt && opt->optlen) { 832 if (opt && opt->optlen) {
830 int opt_size = optlength(opt); 833 int opt_size = sizeof(*dopt) + opt->optlen;
834
831 dopt = kmalloc(opt_size, GFP_ATOMIC); 835 dopt = kmalloc(opt_size, GFP_ATOMIC);
832 if (dopt) { 836 if (dopt) {
833 if (ip_options_echo(dopt, skb)) { 837 if (ip_options_echo(&dopt->opt, skb)) {
834 kfree(dopt); 838 kfree(dopt);
835 dopt = NULL; 839 dopt = NULL;
836 } 840 }
@@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1411#ifdef CONFIG_TCP_MD5SIG 1415#ifdef CONFIG_TCP_MD5SIG
1412 struct tcp_md5sig_key *key; 1416 struct tcp_md5sig_key *key;
1413#endif 1417#endif
1418 struct ip_options_rcu *inet_opt;
1414 1419
1415 if (sk_acceptq_is_full(sk)) 1420 if (sk_acceptq_is_full(sk))
1416 goto exit_overflow; 1421 goto exit_overflow;
@@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1431 newinet->inet_daddr = ireq->rmt_addr; 1436 newinet->inet_daddr = ireq->rmt_addr;
1432 newinet->inet_rcv_saddr = ireq->loc_addr; 1437 newinet->inet_rcv_saddr = ireq->loc_addr;
1433 newinet->inet_saddr = ireq->loc_addr; 1438 newinet->inet_saddr = ireq->loc_addr;
1434 newinet->opt = ireq->opt; 1439 inet_opt = ireq->opt;
1440 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1435 ireq->opt = NULL; 1441 ireq->opt = NULL;
1436 newinet->mc_index = inet_iif(skb); 1442 newinet->mc_index = inet_iif(skb);
1437 newinet->mc_ttl = ip_hdr(skb)->ttl; 1443 newinet->mc_ttl = ip_hdr(skb)->ttl;
1438 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1444 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1439 if (newinet->opt) 1445 if (inet_opt)
1440 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1446 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1441 newinet->inet_id = newtp->write_seq ^ jiffies; 1447 newinet->inet_id = newtp->write_seq ^ jiffies;
1442 1448
1443 tcp_mtup_init(newsk); 1449 tcp_mtup_init(newsk);