aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-04-21 05:45:37 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-28 16:16:35 -0400
commitf6d8bd051c391c1c0458a30b2a7abcd939329259 (patch)
tree1dc4daecdeb0b42c2c6b59d7d6b41e091c11db5f /net/ipv4/ip_output.c
parent0a14842f5a3c0e88a1e59fac5c3025db39721f74 (diff)
inet: add RCU protection to inet->opt
We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c44
1 files changed, 21 insertions, 23 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bdad3d60aa82..362e66f7d2fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
140 * 140 *
141 */ 141 */
142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 __be32 saddr, __be32 daddr, struct ip_options *opt) 143 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
144{ 144{
145 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
146 struct rtable *rt = skb_rtable(skb); 146 struct rtable *rt = skb_rtable(skb);
147 struct iphdr *iph; 147 struct iphdr *iph;
148 148
149 /* Build the IP header. */ 149 /* Build the IP header. */
150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
151 skb_reset_network_header(skb); 151 skb_reset_network_header(skb);
152 iph = ip_hdr(skb); 152 iph = ip_hdr(skb);
153 iph->version = 4; 153 iph->version = 4;
@@ -163,9 +163,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
163 iph->protocol = sk->sk_protocol; 163 iph->protocol = sk->sk_protocol;
164 ip_select_ident(iph, &rt->dst, sk); 164 ip_select_ident(iph, &rt->dst, sk);
165 165
166 if (opt && opt->optlen) { 166 if (opt && opt->opt.optlen) {
167 iph->ihl += opt->optlen>>2; 167 iph->ihl += opt->opt.optlen>>2;
168 ip_options_build(skb, opt, daddr, rt, 0); 168 ip_options_build(skb, &opt->opt, daddr, rt, 0);
169 } 169 }
170 170
171 skb->priority = sk->sk_priority; 171 skb->priority = sk->sk_priority;
@@ -316,7 +316,7 @@ int ip_queue_xmit(struct sk_buff *skb)
316{ 316{
317 struct sock *sk = skb->sk; 317 struct sock *sk = skb->sk;
318 struct inet_sock *inet = inet_sk(sk); 318 struct inet_sock *inet = inet_sk(sk);
319 struct ip_options *opt = inet->opt; 319 struct ip_options_rcu *inet_opt;
320 struct rtable *rt; 320 struct rtable *rt;
321 struct iphdr *iph; 321 struct iphdr *iph;
322 int res; 322 int res;
@@ -325,6 +325,7 @@ int ip_queue_xmit(struct sk_buff *skb)
325 * f.e. by something like SCTP. 325 * f.e. by something like SCTP.
326 */ 326 */
327 rcu_read_lock(); 327 rcu_read_lock();
328 inet_opt = rcu_dereference(inet->inet_opt);
328 rt = skb_rtable(skb); 329 rt = skb_rtable(skb);
329 if (rt != NULL) 330 if (rt != NULL)
330 goto packet_routed; 331 goto packet_routed;
@@ -336,8 +337,8 @@ int ip_queue_xmit(struct sk_buff *skb)
336 337
337 /* Use correct destination address if we have options. */ 338 /* Use correct destination address if we have options. */
338 daddr = inet->inet_daddr; 339 daddr = inet->inet_daddr;
339 if(opt && opt->srr) 340 if (inet_opt && inet_opt->opt.srr)
340 daddr = opt->faddr; 341 daddr = inet_opt->opt.faddr;
341 342
342 /* If this fails, retransmit mechanism of transport layer will 343 /* If this fails, retransmit mechanism of transport layer will
343 * keep trying until route appears or the connection times 344 * keep trying until route appears or the connection times
@@ -357,11 +358,11 @@ int ip_queue_xmit(struct sk_buff *skb)
357 skb_dst_set_noref(skb, &rt->dst); 358 skb_dst_set_noref(skb, &rt->dst);
358 359
359packet_routed: 360packet_routed:
360 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 361 if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway)
361 goto no_route; 362 goto no_route;
362 363
363 /* OK, we know where to send it, allocate and build IP header. */ 364 /* OK, we know where to send it, allocate and build IP header. */
364 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 365 skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
365 skb_reset_network_header(skb); 366 skb_reset_network_header(skb);
366 iph = ip_hdr(skb); 367 iph = ip_hdr(skb);
367 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 368 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
@@ -375,9 +376,9 @@ packet_routed:
375 iph->daddr = rt->rt_dst; 376 iph->daddr = rt->rt_dst;
376 /* Transport layer set skb->h.foo itself. */ 377 /* Transport layer set skb->h.foo itself. */
377 378
378 if (opt && opt->optlen) { 379 if (inet_opt && inet_opt->opt.optlen) {
379 iph->ihl += opt->optlen >> 2; 380 iph->ihl += inet_opt->opt.optlen >> 2;
380 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 381 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
381 } 382 }
382 383
383 ip_select_ident_more(iph, &rt->dst, sk, 384 ip_select_ident_more(iph, &rt->dst, sk,
@@ -1033,7 +1034,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1033 struct ipcm_cookie *ipc, struct rtable **rtp) 1034 struct ipcm_cookie *ipc, struct rtable **rtp)
1034{ 1035{
1035 struct inet_sock *inet = inet_sk(sk); 1036 struct inet_sock *inet = inet_sk(sk);
1036 struct ip_options *opt; 1037 struct ip_options_rcu *opt;
1037 struct rtable *rt; 1038 struct rtable *rt;
1038 1039
1039 /* 1040 /*
@@ -1047,7 +1048,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1047 if (unlikely(cork->opt == NULL)) 1048 if (unlikely(cork->opt == NULL))
1048 return -ENOBUFS; 1049 return -ENOBUFS;
1049 } 1050 }
1050 memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); 1051 memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
1051 cork->flags |= IPCORK_OPT; 1052 cork->flags |= IPCORK_OPT;
1052 cork->addr = ipc->addr; 1053 cork->addr = ipc->addr;
1053 } 1054 }
@@ -1451,26 +1452,23 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1451 unsigned int len) 1452 unsigned int len)
1452{ 1453{
1453 struct inet_sock *inet = inet_sk(sk); 1454 struct inet_sock *inet = inet_sk(sk);
1454 struct { 1455 struct ip_options_data replyopts;
1455 struct ip_options opt;
1456 char data[40];
1457 } replyopts;
1458 struct ipcm_cookie ipc; 1456 struct ipcm_cookie ipc;
1459 __be32 daddr; 1457 __be32 daddr;
1460 struct rtable *rt = skb_rtable(skb); 1458 struct rtable *rt = skb_rtable(skb);
1461 1459
1462 if (ip_options_echo(&replyopts.opt, skb)) 1460 if (ip_options_echo(&replyopts.opt.opt, skb))
1463 return; 1461 return;
1464 1462
1465 daddr = ipc.addr = rt->rt_src; 1463 daddr = ipc.addr = rt->rt_src;
1466 ipc.opt = NULL; 1464 ipc.opt = NULL;
1467 ipc.tx_flags = 0; 1465 ipc.tx_flags = 0;
1468 1466
1469 if (replyopts.opt.optlen) { 1467 if (replyopts.opt.opt.optlen) {
1470 ipc.opt = &replyopts.opt; 1468 ipc.opt = &replyopts.opt;
1471 1469
1472 if (ipc.opt->srr) 1470 if (replyopts.opt.opt.srr)
1473 daddr = replyopts.opt.faddr; 1471 daddr = replyopts.opt.opt.faddr;
1474 } 1472 }
1475 1473
1476 { 1474 {