From c6153b5b77650879d78dec76414213c76dd8d574 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 Aug 2008 13:44:31 -0700 Subject: ipv4: Disable route secret interval on zero interval Let me first state that disabling the route cache hash rebuild should not be done without extensive analysis on the risk profile and careful deliberation. However, there are times when this can be done safely or for testing. For example, when you have mechanisms for ensuring that offending parties do not exist in your network. This patch lets the user disable the rebuild if the interval is set to zero. This also incidentally fixes a divide-by-zero error with name-spaces. In addition, this patch makes the effect of an interval change immediate rather than it taking effect at the next rebuild as is currently the case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/route.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 16fc6f454a31..cca921ea8550 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2914,6 +2914,68 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } +static void rt_secret_reschedule(int old) +{ + struct net *net; + int new = ip_rt_secret_interval; + int diff = new - old; + + if (!diff) + return; + + rtnl_lock(); + for_each_net(net) { + int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + + if (!new) + continue; + + if (deleted) { + long time = net->ipv4.rt_secret_timer.expires - jiffies; + + if (time <= 0 || (time += diff) <= 0) + time = 0; + + net->ipv4.rt_secret_timer.expires = time; + } else + net->ipv4.rt_secret_timer.expires = new; + + net->ipv4.rt_secret_timer.expires += jiffies; + add_timer(&net->ipv4.rt_secret_timer); + } + rtnl_unlock(); +} + +static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old = ip_rt_secret_interval; + int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + + rt_secret_reschedule(old); + + return ret; +} + +static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, + int __user *name, + int nlen, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, + size_t newlen) +{ + int old = ip_rt_secret_interval; + int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, + newlen); + + rt_secret_reschedule(old); + + return ret; +} + static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, @@ -3048,8 +3110,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = &ipv4_sysctl_rt_secret_interval, + .strategy = &ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; @@ -3126,10 +3188,12 @@ static __net_init int rt_secret_timer_init(struct net *net) net->ipv4.rt_secret_timer.data = (unsigned long)net; init_timer_deferrable(&net->ipv4.rt_secret_timer); - net->ipv4.rt_secret_timer.expires = - jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } return 0; } -- cgit v1.2.2 From 46faec9858e8943226464dac50e205bf210d9174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Grafstr=C3=B6m?= Date: Mon, 18 Aug 2008 21:29:57 -0700 Subject: netfilter: ipt_addrtype: Fix matching of inverted destination address type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes matching of inverted destination address type. Signed-off-by: Anders Grafström Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_addrtype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 49587a497229..462a22c97877 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type(dev, iph->daddr, info->dest) ^ - (info->flags & IPT_ADDRTYPE_INVERT_DEST); + !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } -- cgit v1.2.2 From 9f593653742d1dd816c4e94c6e5154a57ccba6d1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 18 Aug 2008 21:32:32 -0700 Subject: nf_nat: use secure_ipv4_port_ephemeral() for NAT port randomization Use incoming network tuple as seed for NAT port randomization. This avoids concerns of leaking net_random() bits, and also gives better port distribution. Don't have NAT server, compile tested only. Signed-off-by: Stephen Hemminger [ added missing EXPORT_SYMBOL_GPL ] Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_proto_common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 91537f11273f..6c4f11f51446 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -73,9 +73,13 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - off = *rover; if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) - off = net_random(); + off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, + maniptype == IP_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; for (i = 0; i < range_size; i++, off++) { *portptr = htons(min + off % range_size); -- cgit v1.2.2 From fdc0bde90a689b9145f2b6f271c03f4c99d09667 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sat, 23 Aug 2008 04:43:33 -0700 Subject: icmp: icmp_sk() should not use smp_processor_id() in preemptible code Pass namespace into icmp_xmit_lock, obtain socket inside and return it as a result for caller. Thanks Alexey Dobryan for this report: Steps to reproduce: CONFIG_PREEMPT=y CONFIG_DEBUG_PREEMPT=y tracepath BUG: using smp_processor_id() in preemptible [00000000] code: tracepath/3205 caller is icmp_sk+0x15/0x30 Pid: 3205, comm: tracepath Not tainted 2.6.27-rc4 #1 Call Trace: [] debug_smp_processor_id+0xe4/0xf0 [] icmp_sk+0x15/0x30 [] icmp_send+0x4b/0x3f0 [] ? trace_hardirqs_on_caller+0xd5/0x160 [] ? trace_hardirqs_on+0xd/0x10 [] ? local_bh_enable_ip+0x95/0x110 [] ? _spin_unlock_bh+0x39/0x40 [] ? mark_held_locks+0x4c/0x90 [] ? trace_hardirqs_on+0xd/0x10 [] ? trace_hardirqs_on_caller+0xd5/0x160 [] ip_fragment+0x8d4/0x900 [] ? ip_finish_output2+0x0/0x290 [] ? ip_finish_output+0x0/0x60 [] ? dst_output+0x0/0x10 [] ip_finish_output+0x4c/0x60 [] ip_output+0xa3/0xf0 [] ip_local_out+0x20/0x30 [] ip_push_pending_frames+0x27f/0x400 [] udp_push_pending_frames+0x233/0x3d0 [] udp_sendmsg+0x321/0x6f0 [] inet_sendmsg+0x45/0x80 [] sock_sendmsg+0xdf/0x110 [] ? autoremove_wake_function+0x0/0x40 [] ? validate_chain+0x415/0x1010 [] ? __do_fault+0x140/0x450 [] ? __lock_acquire+0x260/0x590 [] ? sockfd_lookup_light+0x45/0x80 [] sys_sendto+0xea/0x120 [] ? _spin_unlock_irqrestore+0x42/0x80 [] ? __up_read+0x4c/0xb0 [] ? up_read+0x26/0x30 [] system_call_fastpath+0x16/0x1b icmp6_sk() is similar. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 860558633b2c..55c355e63234 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -204,18 +204,22 @@ static struct sock *icmp_sk(struct net *net) return net->ipv4.icmp_sk[smp_processor_id()]; } -static inline int icmp_xmit_lock(struct sock *sk) +static inline struct sock *icmp_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmp_sk(net); + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static inline void icmp_xmit_unlock(struct sock *sk) @@ -354,15 +358,17 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb->rtable; struct net *net = dev_net(rt->u.dst.dev); - struct sock *sk = icmp_sk(net); - struct inet_sock *inet = inet_sk(sk); + struct sock *sk; + struct inet_sock *inet; __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; + inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; @@ -419,7 +425,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; net = dev_net(rt->u.dst.dev); - sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -483,7 +488,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; /* -- cgit v1.2.2 From 2f4520d35d89ca6c5cd129c38e3b11f0283b7d1b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Aug 2008 15:17:44 -0700 Subject: ipv4: sysctl fixes net.ipv4.neigh should be a part of skeleton to avoid ordering problems Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/ipv4/route.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cca921ea8550..e91bafeb32f4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3116,14 +3116,23 @@ static ctl_table ipv4_route_table[] = { { .ctl_name = 0 } }; -static __net_initdata struct ctl_path ipv4_route_path[] = { +static struct ctl_table empty[1]; + +static struct ctl_table ipv4_skeleton[] = +{ + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + .child = ipv4_route_table}, + { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + .child = empty}, + { } +}; + +static __net_initdata struct ctl_path ipv4_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, { }, }; - static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = NET_IPV4_ROUTE_FLUSH, @@ -3136,6 +3145,13 @@ static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = 0 }, }; +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; @@ -3287,7 +3303,7 @@ int __init ip_rt_init(void) */ void __init ip_static_sysctl_init(void) { - register_sysctl_paths(ipv4_route_path, ipv4_route_table); + register_sysctl_paths(ipv4_path, ipv4_skeleton); } #endif -- cgit v1.2.2 From 7982d5e1b350acb96aa156916c44c25ef87bb809 Mon Sep 17 00:00:00 2001 From: Philip Love Date: Wed, 27 Aug 2008 02:33:50 -0700 Subject: tcp: fix tcp header size miscalculation when window scale is unused The size of the TCP header is miscalculated when the window scale ends up being 0. Additionally, this can be induced by sending a SYN to a passive open port with a window scale option with value 0. Signed-off-by: Philip Love Signed-off-by: Adam Langley Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a00532de2a8c..8165f5aa8c71 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -468,7 +468,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; @@ -509,7 +510,8 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; -- cgit v1.2.2 From d994af0d50efc96b2077978fe9f066992639d525 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 27 Aug 2008 02:35:18 -0700 Subject: ipv4: mode 0555 in ipv4_skeleton vpnc on today's kernel says Cannot open "/proc/sys/net/ipv4/route/flush": d--------- 0 root root 0 2008-08-26 11:32 /proc/sys/net/ipv4/route d--------- 0 root root 0 2008-08-26 19:16 /proc/sys/net/ipv4/neigh Signed-off-by: Hugh Dickins Acked-by: Al Viro Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e91bafeb32f4..6ee5354c9aa1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3121,9 +3121,9 @@ static struct ctl_table empty[1]; static struct ctl_table ipv4_skeleton[] = { { .procname = "route", .ctl_name = NET_IPV4_ROUTE, - .child = ipv4_route_table}, + .mode = 0555, .child = ipv4_route_table}, { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, - .child = empty}, + .mode = 0555, .child = empty}, { } }; -- cgit v1.2.2