diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-04-21 05:45:37 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-04-28 16:16:35 -0400 |
commit | f6d8bd051c391c1c0458a30b2a7abcd939329259 (patch) | |
tree | 1dc4daecdeb0b42c2c6b59d7d6b41e091c11db5f /net/ipv4/tcp_ipv4.c | |
parent | 0a14842f5a3c0e88a1e59fac5c3025db39721f74 (diff) |
inet: add RCU protection to inet->opt
We lack proper synchronization to manipulate inet->opt ip_options
Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.
Another thread can change inet->opt pointer and free old one under us.
Use RCU to protect inet->opt (changed to inet->inet_opt).
Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.
We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 34 |
1 files changed, 20 insertions, 14 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 310454c2f4d1..d60732fe5f21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -154,6 +154,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
154 | struct flowi4 fl4; | 154 | struct flowi4 fl4; |
155 | struct rtable *rt; | 155 | struct rtable *rt; |
156 | int err; | 156 | int err; |
157 | struct ip_options_rcu *inet_opt; | ||
157 | 158 | ||
158 | if (addr_len < sizeof(struct sockaddr_in)) | 159 | if (addr_len < sizeof(struct sockaddr_in)) |
159 | return -EINVAL; | 160 | return -EINVAL; |
@@ -162,10 +163,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
162 | return -EAFNOSUPPORT; | 163 | return -EAFNOSUPPORT; |
163 | 164 | ||
164 | nexthop = daddr = usin->sin_addr.s_addr; | 165 | nexthop = daddr = usin->sin_addr.s_addr; |
165 | if (inet->opt && inet->opt->srr) { | 166 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
167 | sock_owned_by_user(sk)); | ||
168 | if (inet_opt && inet_opt->opt.srr) { | ||
166 | if (!daddr) | 169 | if (!daddr) |
167 | return -EINVAL; | 170 | return -EINVAL; |
168 | nexthop = inet->opt->faddr; | 171 | nexthop = inet_opt->opt.faddr; |
169 | } | 172 | } |
170 | 173 | ||
171 | orig_sport = inet->inet_sport; | 174 | orig_sport = inet->inet_sport; |
@@ -186,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
186 | return -ENETUNREACH; | 189 | return -ENETUNREACH; |
187 | } | 190 | } |
188 | 191 | ||
189 | if (!inet->opt || !inet->opt->srr) | 192 | if (!inet_opt || !inet_opt->opt.srr) |
190 | daddr = rt->rt_dst; | 193 | daddr = rt->rt_dst; |
191 | 194 | ||
192 | if (!inet->inet_saddr) | 195 | if (!inet->inet_saddr) |
@@ -222,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
222 | inet->inet_daddr = daddr; | 225 | inet->inet_daddr = daddr; |
223 | 226 | ||
224 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 227 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
225 | if (inet->opt) | 228 | if (inet_opt) |
226 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; | 229 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
227 | 230 | ||
228 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; | 231 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; |
229 | 232 | ||
@@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb) | |||
820 | /* | 823 | /* |
821 | * Save and compile IPv4 options into the request_sock if needed. | 824 | * Save and compile IPv4 options into the request_sock if needed. |
822 | */ | 825 | */ |
823 | static struct ip_options *tcp_v4_save_options(struct sock *sk, | 826 | static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, |
824 | struct sk_buff *skb) | 827 | struct sk_buff *skb) |
825 | { | 828 | { |
826 | struct ip_options *opt = &(IPCB(skb)->opt); | 829 | const struct ip_options *opt = &(IPCB(skb)->opt); |
827 | struct ip_options *dopt = NULL; | 830 | struct ip_options_rcu *dopt = NULL; |
828 | 831 | ||
829 | if (opt && opt->optlen) { | 832 | if (opt && opt->optlen) { |
830 | int opt_size = optlength(opt); | 833 | int opt_size = sizeof(*dopt) + opt->optlen; |
834 | |||
831 | dopt = kmalloc(opt_size, GFP_ATOMIC); | 835 | dopt = kmalloc(opt_size, GFP_ATOMIC); |
832 | if (dopt) { | 836 | if (dopt) { |
833 | if (ip_options_echo(dopt, skb)) { | 837 | if (ip_options_echo(&dopt->opt, skb)) { |
834 | kfree(dopt); | 838 | kfree(dopt); |
835 | dopt = NULL; | 839 | dopt = NULL; |
836 | } | 840 | } |
@@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1411 | #ifdef CONFIG_TCP_MD5SIG | 1415 | #ifdef CONFIG_TCP_MD5SIG |
1412 | struct tcp_md5sig_key *key; | 1416 | struct tcp_md5sig_key *key; |
1413 | #endif | 1417 | #endif |
1418 | struct ip_options_rcu *inet_opt; | ||
1414 | 1419 | ||
1415 | if (sk_acceptq_is_full(sk)) | 1420 | if (sk_acceptq_is_full(sk)) |
1416 | goto exit_overflow; | 1421 | goto exit_overflow; |
@@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1431 | newinet->inet_daddr = ireq->rmt_addr; | 1436 | newinet->inet_daddr = ireq->rmt_addr; |
1432 | newinet->inet_rcv_saddr = ireq->loc_addr; | 1437 | newinet->inet_rcv_saddr = ireq->loc_addr; |
1433 | newinet->inet_saddr = ireq->loc_addr; | 1438 | newinet->inet_saddr = ireq->loc_addr; |
1434 | newinet->opt = ireq->opt; | 1439 | inet_opt = ireq->opt; |
1440 | rcu_assign_pointer(newinet->inet_opt, inet_opt); | ||
1435 | ireq->opt = NULL; | 1441 | ireq->opt = NULL; |
1436 | newinet->mc_index = inet_iif(skb); | 1442 | newinet->mc_index = inet_iif(skb); |
1437 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 1443 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1438 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1444 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1439 | if (newinet->opt) | 1445 | if (inet_opt) |
1440 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; | 1446 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
1441 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1447 | newinet->inet_id = newtp->write_seq ^ jiffies; |
1442 | 1448 | ||
1443 | tcp_mtup_init(newsk); | 1449 | tcp_mtup_init(newsk); |