From 462fb2af9788a82a534f8184abfde31574e1cfa0 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Sun, 19 Sep 2010 09:34:33 +0000 Subject: bridge : Sanitize skb before it enters the IP stack Related dicussion here : http://lkml.org/lkml/2010/9/3/16 Introduce a function br_parse_ip_options that will audit the skb and possibly refill IP options before a packet enters the IP stack. If no options are present, the function will zero out the skb cb area so that it is not misinterpreted as options by some unsuspecting IP layer routine. If packet consistency fails, drop it. Signed-off-by: Bandan Das Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ba9836c488ed..1906fa35860c 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -466,7 +466,7 @@ error: } return -EINVAL; } - +EXPORT_SYMBOL(ip_options_compile); /* * Undo all the changes done by ip_options_compile(). @@ -646,3 +646,4 @@ int ip_options_rcv_srr(struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL(ip_options_rcv_srr); -- cgit v1.2.2 From 8628bd8af7c4c14f40f5183f80f5744c4e682439 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Thu, 24 Mar 2011 07:44:22 +0000 Subject: ipv4: Fix IP timestamp option (IPOPT_TS_PRESPEC) handling in ip_options_echo() The current handling of echoed IP timestamp options with prespecified addresses is rather broken since the 2.2.x kernels. As far as i understand it, it should behave like when originating packets. Currently it will only timestamp the next free slot if: - there is space for *two* timestamps - some random data from the echoed packet taken as an IP is *not* a local IP This first is caused by an off-by-one error. 'soffset' points to the next free slot and so we only need to have 'soffset + 7 <= optlen'. The second bug is using sptr as the start of the option, when it really is set to 'skb_network_header(skb)'. I just use dptr instead which points to the timestamp option. Finally it would only timestamp for non-local IPs, which we shouldn't do. So instead we exclude all unicast destinations, similar to what we do in ip_options_compile(). Signed-off-by: Jan Luebbe Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1906fa35860c..28a736f3442f 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) } else { dopt->ts_needtime = 0; - if (soffset + 8 <= optlen) { + if (soffset + 7 <= optlen) { __be32 addr; - memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { + memcpy(&addr, dptr+soffset-1, 4); + if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { dopt->ts_needtime = 1; soffset += 8; } -- cgit v1.2.2 From c65353daf137dd41f3ede3baf62d561fca076228 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Apr 2011 05:55:37 +0000 Subject: ip: ip_options_compile() resilient to NULL skb route Scot Doyle demonstrated ip_options_compile() could be called with an skb without an attached route, using a setup involving a bridge, netfilter, and forged IP packets. Let's make ip_options_compile() and ip_options_rcv_srr() a bit more robust, instead of changing bridge/netfilter code. With help from Hiroaki SHIMODA. Reported-by: Scot Doyle Tested-by: Scot Doyle Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Acked-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 28a736f3442f..2391b24e8251 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -329,7 +329,7 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - if (skb) { + if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); opt->is_changed = 1; } @@ -371,7 +371,7 @@ int ip_options_compile(struct net *net, goto error; } opt->ts = optptr - iph; - if (skb) { + if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); timeptr = (__be32*)&optptr[optptr[2]+3]; } @@ -603,7 +603,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned long orefdst; int err; - if (!opt->srr) + if (!opt->srr || !rt) return 0; if (skb->pkt_type != PACKET_HOST) -- cgit v1.2.2 From f6d8bd051c391c1c0458a30b2a7abcd939329259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Apr 2011 09:45:37 +0000 Subject: inet: add RCU protection to inet->opt We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2391b24e8251..01fc40965848 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -36,7 +36,7 @@ * saddr is address of outgoing interface. */ -void ip_options_build(struct sk_buff * skb, struct ip_options * opt, +void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag) { unsigned char *iph = skb_network_header(skb); @@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, * NOTE: dopt cannot point to skb. */ -int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) +int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) { - struct ip_options *sopt; + const struct ip_options *sopt; unsigned char *sptr, *dptr; int soffset, doffset; int optlen; @@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) sopt = &(IPCB(skb)->opt); - if (sopt->optlen == 0) { - dopt->optlen = 0; + if (sopt->optlen == 0) return 0; - } sptr = skb_network_header(skb); dptr = dopt->__data; @@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->optlen += optlen; } if (sopt->srr) { - unsigned char * start = sptr+sopt->srr; + unsigned char *start = sptr+sopt->srr; __be32 faddr; optlen = start[1]; @@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt) } } -static struct ip_options *ip_options_get_alloc(const int optlen) +static struct ip_options_rcu *ip_options_get_alloc(const int optlen) { - return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), + return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), GFP_KERNEL); } -static int ip_options_get_finish(struct net *net, struct ip_options **optp, - struct ip_options *opt, int optlen) +static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, + struct ip_options_rcu *opt, int optlen) { while (optlen & 3) - opt->__data[optlen++] = IPOPT_END; - opt->optlen = optlen; - if (optlen && ip_options_compile(net, opt, NULL)) { + opt->opt.__data[optlen++] = IPOPT_END; + opt->opt.optlen = optlen; + if (optlen && ip_options_compile(net, &opt->opt, NULL)) { kfree(opt); return -EINVAL; } @@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp, return 0; } -int ip_options_get_from_user(struct net *net, struct ip_options **optp, +int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; - if (optlen && copy_from_user(opt->__data, data, optlen)) { + if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { kfree(opt); return -EFAULT; } return ip_options_get_finish(net, optp, opt, optlen); } -int ip_options_get(struct net *net, struct ip_options **optp, +int ip_options_get(struct net *net, struct ip_options_rcu **optp, unsigned char *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; if (optlen) - memcpy(opt->__data, data, optlen); + memcpy(opt->opt.__data, data, optlen); return ip_options_get_finish(net, optp, opt, optlen); } -- cgit v1.2.2 From 10949550bd1e50cc91c0f5085f7080a44b0871fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:26:57 -0400 Subject: ipv4: Kill spurious opt->srr check in ip_options_rcv_srr(). All call sites conditionalize the call to ip_options_rcv_srr() with a check of opt->srr, so no need to check it again there. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 01fc40965848..e82c304806bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -601,7 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned long orefdst; int err; - if (!opt->srr || !rt) + if (!rt) return 0; if (skb->pkt_type != PACKET_HOST) -- cgit v1.2.2 From c30883bdff0b3544900a5c4aba18b8985436878f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:30:58 -0400 Subject: ipv4: Simplify iph->daddr overwrite in ip_options_rcv_srr(). We already copy the 4-byte nexthop from the options block into local variable "nexthop" for the route lookup. Re-use that variable instead of memcpy()'ing again when assigning to iph->daddr after the route lookup succeeds. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e82c304806bb..c5c26192b057 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -635,7 +635,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) if (rt2->rt_type != RTN_LOCAL) break; /* Superfast 8) loopback forward */ - memcpy(&iph->daddr, &optptr[srrptr-1], 4); + iph->daddr = nexthop; opt->is_changed = 1; } if (srrptr <= srrspace) { -- cgit v1.2.2 From 0374d9ceb02eb12fcd65be9dd5df9c911ef93424 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 May 2011 17:15:50 -0400 Subject: ipv4: Kill spurious write to iph->daddr in ip_forward_options(). This code block executes when opt->srr_is_hit is set. It will be set only by ip_options_rcv_srr(). ip_options_rcv_srr() walks until it hits a matching nexthop in the SRR option addresses, and when it matches one 1) looks up the route for that nexthop and 2) on route lookup success it writes that nexthop value into iph->daddr. ip_forward_options() runs later, and again walks the SRR option addresses looking for the option matching the destination of the route stored in skb_rtable(). This route will be the same exact one looked up for the nexthop by ip_options_rcv_srr(). Therefore "rt->rt_dst == iph->daddr" must be true. All it really needs to do is record the route's source address in the matching SRR option adddress. It need not write iph->daddr again, since that has already been done by ip_options_rcv_srr() as detailed above. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c5c26192b057..c6474cd1cbfa 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -573,7 +573,6 @@ void ip_forward_options(struct sk_buff *skb) if (srrptr + 3 <= srrspace) { opt->is_changed = 1; ip_rt_get_source(&optptr[srrptr-1], rt); - ip_hdr(skb)->daddr = rt->rt_dst; optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); -- cgit v1.2.2 From 8e36360ae876995e92d3a7538dda70548e64e685 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 May 2011 17:29:41 -0400 Subject: ipv4: Remove route key identity dependencies in ip_rt_get_source(). Pass in the sk_buff so that we can fetch the necessary keys from the packet header when working with input routes. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c6474cd1cbfa..89268baabc87 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -37,7 +37,7 @@ */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag) + __be32 daddr, struct rtable *rt, int is_frag) { unsigned char *iph = skb_network_header(skb); @@ -50,9 +50,9 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, if (!is_frag) { if (opt->rr_needaddr) - ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); + ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt); if (opt->ts_needaddr) - ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); + ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt); if (opt->ts_needtime) { struct timespec tv; __be32 midtime; @@ -553,7 +553,7 @@ void ip_forward_options(struct sk_buff *skb) if (opt->rr_needaddr) { optptr = (unsigned char *)raw + opt->rr; - ip_rt_get_source(&optptr[optptr[2]-5], rt); + ip_rt_get_source(&optptr[optptr[2]-5], skb, rt); opt->is_changed = 1; } if (opt->srr_is_hit) { @@ -572,13 +572,13 @@ void ip_forward_options(struct sk_buff *skb) } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; - ip_rt_get_source(&optptr[srrptr-1], rt); + ip_rt_get_source(&optptr[srrptr-1], skb, rt); optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); if (opt->ts_needaddr) { optptr = raw + opt->ts; - ip_rt_get_source(&optptr[optptr[2]-9], rt); + ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); opt->is_changed = 1; } } -- cgit v1.2.2 From 7be799a70ba3dd90a59e8d2c72bbe06020005b3f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 May 2011 17:31:02 -0400 Subject: ipv4: Remove rt->rt_dst reference from ip_forward_options(). At this point iph->daddr equals what rt->rt_dst would hold. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 89268baabc87..c3118e1cd3bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -567,7 +567,7 @@ void ip_forward_options(struct sk_buff *skb) ) { if (srrptr + 3 > srrspace) break; - if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) + if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) break; } if (srrptr + 3 <= srrspace) { -- cgit v1.2.2 From 48bdf072c3f1f8f739f76d19c74f4c79605cac46 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sun, 29 May 2011 10:55:44 +0000 Subject: ip_options_compile: properly handle unaligned pointer The current code takes an unaligned pointer and does htonl() on it to make it big-endian, then does a memcpy(). The problem is that the compiler decides that since the pointer is to a __be32, it is legal to optimize the copy into a processor word store. However, on an architecture that does not handled unaligned writes in kernel space, this produces an unaligned exception fault. The solution is to track the pointer as a "char *" (which removes a bunch of unpleasant casts in any case), and then just use put_unaligned_be32() to write the value to memory. Signed-off-by: Chris Metcalf Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c3118e1cd3bb..ec93335901dd 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -350,7 +351,7 @@ int ip_options_compile(struct net *net, goto error; } if (optptr[2] <= optlen) { - __be32 *timeptr = NULL; + unsigned char *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -359,7 +360,7 @@ int ip_options_compile(struct net *net, case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__be32*)&optptr[optptr[2]-1]; + timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -371,7 +372,7 @@ int ip_options_compile(struct net *net, opt->ts = optptr - iph; if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -389,7 +390,7 @@ int ip_options_compile(struct net *net, if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -403,10 +404,10 @@ int ip_options_compile(struct net *net, } if (timeptr) { struct timespec tv; - __be32 midtime; + u32 midtime; getnstimeofday(&tv); - midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); - memcpy(timeptr, &midtime, sizeof(__be32)); + midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; + put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } } else { -- cgit v1.2.2