From 48b28b8db9a74cc5c43e76485dc397e22bea2984 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 19 Nov 2011 13:23:32 +0100 Subject: Bluetooth: cmtp: Fix module reference We cannot call module_put(THIS_MODULE) if this is our last reference. Otherwise, this call may cleanup our module before it returns. Gladly, the kthread API provides a simple wrapper for us. So lets use module_put_and_exit() to avoid a race condition with the module cleanup code. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 7d00ddf9e9dc..5a6e634f7fca 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -67,14 +67,12 @@ static struct cmtp_session *__cmtp_get_session(bdaddr_t *bdaddr) static void __cmtp_link_session(struct cmtp_session *session) { - __module_get(THIS_MODULE); list_add(&session->list, &cmtp_session_list); } static void __cmtp_unlink_session(struct cmtp_session *session) { list_del(&session->list); - module_put(THIS_MODULE); } static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci) @@ -327,6 +325,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); + module_put_and_exit(0); return 0; } @@ -376,9 +375,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) __cmtp_link_session(session); + __module_get(THIS_MODULE); session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d", session->num); if (IS_ERR(session->task)) { + module_put(THIS_MODULE); err = PTR_ERR(session->task); goto unlink; } -- cgit v1.2.2 From 9b338c3dd12918f7f7df2b882f63f71e9efbcb41 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 19 Nov 2011 13:23:33 +0100 Subject: Bluetooth: bnep: Fix module reference We cannot call module_put(THIS_MODULE) if this is our last reference. Otherwise, this call may cleanup our module before it returns. Gladly, the kthread API provides a simple wrapper for us. So lets use module_put_and_exit() to avoid a race condition with the module cleanup code. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 91bcd3a961ec..1eea8208b2cc 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -79,17 +79,12 @@ static struct bnep_session *__bnep_get_session(u8 *dst) static void __bnep_link_session(struct bnep_session *s) { - /* It's safe to call __module_get() here because sessions are added - by the socket layer which has to hold the reference to this module. - */ - __module_get(THIS_MODULE); list_add(&s->list, &bnep_session_list); } static void __bnep_unlink_session(struct bnep_session *s) { list_del(&s->list); - module_put(THIS_MODULE); } static int bnep_send(struct bnep_session *s, void *data, size_t len) @@ -530,6 +525,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); + module_put_and_exit(0); return 0; } @@ -616,9 +612,11 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) __bnep_link_session(s); + __module_get(THIS_MODULE); s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name); if (IS_ERR(s->task)) { /* Session thread start failed, gotta cleanup. */ + module_put(THIS_MODULE); unregister_netdev(dev); __bnep_unlink_session(s); err = PTR_ERR(s->task); -- cgit v1.2.2 From 648ae8e53d58ed1b667db173a2d4ff2132a3b529 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 9 Nov 2011 19:37:35 +0100 Subject: netfilter: ipset: suppress compile-time warnings in ip_set_hash_ipport*.c warning: 'ip_to' may be used uninitialized in this function Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipport.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6ee10f5d59bd..37d667e3f6f8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -158,7 +158,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fb90e344e907..e69e2718fbe1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -162,7 +162,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index deb3e3dfa5fc..64199b4e93c9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -184,7 +184,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to, ip2_last, ip2; u32 timeout = h->timeout; bool with_ports = false; -- cgit v1.2.2 From 5e2afba4ecd7931ea06e6fa116ab28e6943dbd42 Mon Sep 17 00:00:00 2001 From: Paul Guo Date: Mon, 14 Nov 2011 19:00:54 +0800 Subject: netfilter: possible unaligned packet header in ip_route_me_harder This patch tries to fix the following issue in netfilter: In ip_route_me_harder(), we invoke pskb_expand_head() that rellocates new header with additional head room which can break the alignment of the original packet header. In one of my NAT test case, the NIC port for internal hosts is configured with vlan and the port for external hosts is with general configuration. If we ping an external "unknown" hosts from an internal host, an icmp packet will be sent. We find that in icmp_send()->...->ip_route_me_harder()->pskb_expand_head(), hh_len=18 and current headroom (skb_headroom(skb)) of the packet is 16. After calling pskb_expand_head() the packet header becomes to be unaligned and then our system (arch/tile) panics immediately. Signed-off-by: Paul Guo Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 9899619ab9b8..4f47e064e262 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -64,7 +64,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) return -1; return 0; -- cgit v1.2.2 From 904603f9b78f94d5a5fe29cf2f9694ef7f6f4420 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 17 Nov 2011 14:53:36 -0800 Subject: mac80211: Fix AMSDU rate printout in debugfs. It was flipped. See section 7.3.2.56 of the 802.11n spec for details. Signed-off-by: Ben Greear Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/debugfs_sta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index c5f341798c16..3110cbdc501b 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -274,9 +274,9 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, PRINT_HT_CAP((htc->cap & BIT(10)), "HT Delayed Block Ack"); - PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " - "3839 bytes"); PRINT_HT_CAP(!(htc->cap & BIT(11)), "Max AMSDU length: " + "3839 bytes"); + PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " "7935 bytes"); /* -- cgit v1.2.2 From 9c8f2c42c93f415771d6d7b87a0881ba0bb72824 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 18 Nov 2011 17:02:16 +0100 Subject: mac80211: Fix endian bug in radiotap header generation I intoduced this bug in commit a2fe81667410723d941a688e1958a49d67ca3346 "mac80211: Build TX radiotap header dynamically" Signed-off-by: Helmut Schaa Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/status.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/status.c b/net/mac80211/status.c index df643cedf9b9..5533a74e9bb3 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -259,7 +259,7 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_radiotap_header *rthdr; unsigned char *pos; - __le16 txflags; + u16 txflags; rthdr = (struct ieee80211_radiotap_header *) skb_push(skb, rtap_len); @@ -289,13 +289,13 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band txflags = 0; if (!(info->flags & IEEE80211_TX_STAT_ACK) && !is_multicast_ether_addr(hdr->addr1)) - txflags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); + txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) - txflags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); + txflags |= IEEE80211_RADIOTAP_F_TX_CTS; else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) - txflags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + txflags |= IEEE80211_RADIOTAP_F_TX_RTS; put_unaligned_le16(txflags, pos); pos += 2; -- cgit v1.2.2 From de3584bd62d87b4c250129fbc46ca52c80330add Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 21 Nov 2011 10:44:00 +0100 Subject: cfg80211: fix regulatory NULL dereference By the time userspace returns with a response to the regulatory domain request, the wiphy causing the request might have gone away. If this is so, reject the update but mark the request as having been processed anyway. Cc: Luis R. Rodriguez Signed-off-by: Johannes Berg Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- net/wireless/reg.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bc1ec2c26fd0..186b7f2a27b6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2035,6 +2035,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (!request_wiphy) { + reg_set_request_processed(); + return -ENODEV; + } if (!last_request->intersect) { int r; -- cgit v1.2.2 From 525c6465d449cdec04b3c91733e46027a4d08422 Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Mon, 21 Nov 2011 16:45:26 -0500 Subject: dccp: fix error propagation in dccp_v4_connect The errcode is not updated when ip_route_newports() fails. Signed-off-by: RongQing.Li Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 90a919afbed7..3f4e5414c8e5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -111,6 +111,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { + err = PTR_ERR(rt); rt = NULL; goto failure; } -- cgit v1.2.2 From f23aa62545c18728eb2b9434aa258be27e07dd49 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 21 Nov 2011 16:46:24 -0500 Subject: caif: fix endian conversion in cffrml_transmit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "tmp" variable here is used to store the result of cpu_to_le16() so it should be an __le16 instead of an int. We want the high bits set and the current code works on little endian systems but not on big endian systems. Signed-off-by: Dan Carpenter Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cffrml.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index f39921171d0d..d3ca87bf23b7 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -136,20 +136,21 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) { - int tmp; u16 chks; u16 len; + __le16 data; + struct cffrml *this = container_obj(layr); if (this->dofcs) { chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); - tmp = cpu_to_le16(chks); - cfpkt_add_trail(pkt, &tmp, 2); + data = cpu_to_le16(chks); + cfpkt_add_trail(pkt, &data, 2); } else { cfpkt_pad_trail(pkt, 2); } len = cfpkt_getlen(pkt); - tmp = cpu_to_le16(len); - cfpkt_add_head(pkt, &tmp, 2); + data = cpu_to_le16(len); + cfpkt_add_head(pkt, &data, 2); cfpkt_info(pkt)->hdr_len += 2; if (cfpkt_erroneous(pkt)) { pr_err("Packet is erroneous!\n"); -- cgit v1.2.2 From 70e9942f17a6193e9172a804e6569a8806633d6b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Nov 2011 00:16:51 +0100 Subject: netfilter: nf_conntrack: make event callback registration per-netns This patch fixes an oops that can be triggered following this recipe: 0) make sure nf_conntrack_netlink and nf_conntrack_ipv4 are loaded. 1) container is started. 2) connect to it via lxc-console. 3) generate some traffic with the container to create some conntrack entries in its table. 4) stop the container: you hit one oops because the conntrack table cleanup tries to report the destroy event to user-space but the per-netns nfnetlink socket has already gone (as the nfnetlink socket is per-netns but event callback registration is global). To fix this situation, we make the ctnl_notifier per-netns so the callback is registered/unregistered if the container is created/destroyed. Alex Bligh and Alexey Dobriyan originally proposed one small patch to check if the nfnetlink socket is gone in nfnetlink_has_listeners, but this is a very visited path for events, thus, it may reduce performance and it looks a bit hackish to check for the nfnetlink socket only to workaround this situation. As a result, I decided to follow the bigger path choice, which seems to look nicer to me. Cc: Alexey Dobriyan Reported-by: Alex Bligh Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ecache.c | 37 +++++++++--------- net/netfilter/nf_conntrack_netlink.c | 73 +++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 6b368be937c6..b62c4148b921 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -27,22 +27,17 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); - -struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; -EXPORT_SYMBOL_GPL(nf_expect_event_cb); - /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); unsigned long events; struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; rcu_read_lock(); - notify = rcu_dereference(nf_conntrack_event_cb); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (notify == NULL) goto out_unlock; @@ -83,19 +78,20 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); -int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) +int nf_conntrack_register_notifier(struct net *net, + struct nf_ct_event_notifier *new) { int ret = 0; struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_conntrack_event_cb, + notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); if (notify != NULL) { ret = -EBUSY; goto out_unlock; } - RCU_INIT_POINTER(nf_conntrack_event_cb, new); + RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -105,32 +101,34 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) +void nf_conntrack_unregister_notifier(struct net *net, + struct nf_ct_event_notifier *new) { struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_conntrack_event_cb, + notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - RCU_INIT_POINTER(nf_conntrack_event_cb, NULL); + RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) +int nf_ct_expect_register_notifier(struct net *net, + struct nf_exp_event_notifier *new) { int ret = 0; struct nf_exp_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_expect_event_cb, + notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); if (notify != NULL) { ret = -EBUSY; goto out_unlock; } - RCU_INIT_POINTER(nf_expect_event_cb, new); + RCU_INIT_POINTER(net->ct.nf_expect_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -140,15 +138,16 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) +void nf_ct_expect_unregister_notifier(struct net *net, + struct nf_exp_event_notifier *new) { struct nf_exp_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_expect_event_cb, + notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - RCU_INIT_POINTER(nf_expect_event_cb, NULL); + RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e58aa9b1fe8a..ef21b221f036 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist * (C) 2002-2006 by Harald Welte * (C) 2003 by Patrick Mchardy - * (C) 2005-2008 by Pablo Neira Ayuso + * (C) 2005-2011 by Pablo Neira Ayuso * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -2163,6 +2163,54 @@ MODULE_ALIAS("ip_conntrack_netlink"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); +static int __net_init ctnetlink_net_init(struct net *net) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int ret; + + ret = nf_conntrack_register_notifier(net, &ctnl_notifier); + if (ret < 0) { + pr_err("ctnetlink_init: cannot register notifier.\n"); + goto err_out; + } + + ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); + if (ret < 0) { + pr_err("ctnetlink_init: cannot expect register notifier.\n"); + goto err_unreg_notifier; + } +#endif + return 0; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +err_unreg_notifier: + nf_conntrack_unregister_notifier(net, &ctnl_notifier); +err_out: + return ret; +#endif +} + +static void ctnetlink_net_exit(struct net *net) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); + nf_conntrack_unregister_notifier(net, &ctnl_notifier); +#endif +} + +static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) +{ + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) + ctnetlink_net_exit(net); +} + +static struct pernet_operations ctnetlink_net_ops = { + .init = ctnetlink_net_init, + .exit_batch = ctnetlink_net_exit_batch, +}; + static int __init ctnetlink_init(void) { int ret; @@ -2180,28 +2228,15 @@ static int __init ctnetlink_init(void) goto err_unreg_subsys; } -#ifdef CONFIG_NF_CONNTRACK_EVENTS - ret = nf_conntrack_register_notifier(&ctnl_notifier); - if (ret < 0) { - pr_err("ctnetlink_init: cannot register notifier.\n"); + if (register_pernet_subsys(&ctnetlink_net_ops)) { + pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } - ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp); - if (ret < 0) { - pr_err("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; - } -#endif - return 0; -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: - nf_conntrack_unregister_notifier(&ctnl_notifier); err_unreg_exp_subsys: nfnetlink_subsys_unregister(&ctnl_exp_subsys); -#endif err_unreg_subsys: nfnetlink_subsys_unregister(&ctnl_subsys); err_out: @@ -2213,11 +2248,7 @@ static void __exit ctnetlink_exit(void) pr_info("ctnetlink: unregistering from nfnetlink.\n"); nf_ct_remove_userspace_expectations(); -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); - nf_conntrack_unregister_notifier(&ctnl_notifier); -#endif - + unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); } -- cgit v1.2.2 From 717b6d83664646963c71d014c71babaa802333b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 22 Nov 2011 16:03:10 -0500 Subject: net-netlink: fix diag to export IPv4 tos for dual-stack IPv6 sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 68e8ac514383..ccee270a9b65 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -108,9 +108,6 @@ static int inet_csk_diag_fill(struct sock *sk, icsk->icsk_ca_ops->name); } - if ((ext & (1 << (INET_DIAG_TOS - 1))) && (sk->sk_family != AF_INET6)) - RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); - r->idiag_family = sk->sk_family; r->idiag_state = sk->sk_state; r->idiag_timer = 0; @@ -125,16 +122,23 @@ static int inet_csk_diag_fill(struct sock *sk, r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, + * hence this needs to be included regardless of socket family. + */ + if (ext & (1 << (INET_DIAG_TOS - 1))) + RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); + #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); + if (ext & (1 << (INET_DIAG_TCLASS - 1))) + RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &np->rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &np->daddr); - if (ext & (1 << (INET_DIAG_TCLASS - 1))) - RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); } #endif -- cgit v1.2.2 From 20e994a05b33b186a22a3b9e922df4cce644daac Mon Sep 17 00:00:00 2001 From: Feng King Date: Mon, 21 Nov 2011 01:47:11 +0000 Subject: net: correct comments of skb_shift when skb_shift, we want to shift paged data from skb to tgt frag area. Original comments revert the shift order Signed-off-by: Feng King Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 18a3cebb753d..3c30ee4a5710 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2230,7 +2230,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb) * @shiftlen: shift up to this many bytes * * Attempts to shift up to shiftlen worth of bytes, which may be less than - * the length of the skb, from tgt to skb. Returns number bytes shifted. + * the length of the skb, from skb to tgt. Returns number bytes shifted. * It's up to caller to free skb if everything was shifted. * * If @tgt runs out of frags, the whole operation is aborted. -- cgit v1.2.2 From 24ca9a847791fd53d9b217330b15f3c285827a18 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Nov 2011 14:44:28 +0200 Subject: SUNRPC: Ensure we return EAGAIN in xs_nospace if congestion is cleared By returning '0' instead of 'EAGAIN' when the tests in xs_nospace() fail to find evidence of socket congestion, we are making the RPC engine believe that the message was incorrectly sent and so it disconnects the socket instead of just retrying. The bug appears to have been introduced by commit 5e3771ce2d6a69e10fcc870cdf226d121d868491 (SUNRPC: Ensure that xs_nospace return values are propagated). Reported-by: Andrew Cooper Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 2.6.30] Tested-by: Andrew Cooper --- net/sunrpc/xprtsock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2d78d95955ab..55472c48825e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -496,7 +496,7 @@ static int xs_nospace(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int ret = 0; + int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -508,7 +508,6 @@ static int xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { - ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size -- cgit v1.2.2 From 4d65a2465f6f2694de67777a8aedb1272f473979 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 23 Nov 2011 03:51:54 -0500 Subject: ipv6: fix a bug in ndisc_send_redirect Release skb when transmit rate limit _not_ allow Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 44e5b7f2a6c1..0cb78d7ddaf5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1571,7 +1571,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, } if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); - if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) goto release; if (dev->addr_len) { -- cgit v1.2.2 From c16a98ed91597b40b22b540c6517103497ef8e74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Nov 2011 15:49:31 -0500 Subject: ipv6: tcp: fix panic in SYN processing commit 72a3effaf633bc ([NET]: Size listen hash tables using backlog hint) added a bug allowing inet6_synq_hash() to return an out of bound array index, because of u16 overflow. Bug can happen if system admins set net.core.somaxconn & net.ipv4.tcp_max_syn_backlog sysctls to values greater than 65536 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index fee46d5a2f12..1567fb120392 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -85,7 +85,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, * request_sock (formerly open request) hash tables. */ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, - const u32 rnd, const u16 synq_hsize) + const u32 rnd, const u32 synq_hsize) { u32 c; -- cgit v1.2.2 From 46a246c4dff9f248913e791b69f2336cd8d4ec41 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Nov 2011 16:07:00 -0500 Subject: netfilter: Remove NOTRACK/RAW dependency on NETFILTER_ADVANCED. Distributions are using this in their default scripts, so don't hide them behind the advanced setting. Reported-by: Linus Torvalds Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 1 - net/ipv6/netfilter/Kconfig | 1 - net/netfilter/Kconfig | 1 - 3 files changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1dfc18a03fd4..f19f2182894c 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -325,7 +325,6 @@ config IP_NF_TARGET_TTL # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on NETFILTER_ADVANCED help This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 448464844a25..f792b34cbe9c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -186,7 +186,6 @@ config IP6_NF_MANGLE config IP6_NF_RAW tristate 'raw table support (required for TRACE)' - depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8260b13d93c9..e8f379692294 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -542,7 +542,6 @@ config NETFILTER_XT_TARGET_NOTRACK tristate '"NOTRACK" target support' depends on IP_NF_RAW || IP6_NF_RAW depends on NF_CONNTRACK - depends on NETFILTER_ADVANCED help The NOTRACK target allows a select rule to specify which packets *not* to enter the conntrack/NAT -- cgit v1.2.2 From 4d0fe50c75a547088e4304e5eb5f521514dfae46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Nov 2011 17:29:23 -0500 Subject: ipv6: tcp: fix tcp_v6_conn_request() Since linux 2.6.26 (commit c6aefafb7ec6 : Add IPv6 support to TCP SYN cookies), we can drop a SYN packet reusing a TIME_WAIT socket. (As a matter of fact we fail to send the SYNACK answer) As the client resends its SYN packet after a one second timeout, we accept it, because first packet removed the TIME_WAIT socket before being dropped. This probably explains why nobody ever noticed or complained. Reported-by: Jesse Young Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 36131d122a6f..2dea4bb7b54a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1255,6 +1255,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1264,12 +1271,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); -- cgit v1.2.2 From 685f94e6db8496399c881218018166515445a914 Mon Sep 17 00:00:00 2001 From: Jun Zhao Date: Tue, 22 Nov 2011 17:19:03 +0000 Subject: ipv4 : igmp : fix error handle in ip_mc_add_src() When add sources to interface failure, need to roll back the sfcount[MODE] to before state. We need to match it corresponding. Acked-by: David L Stevens Acked-by: Eric Dumazet Signed-off-by: Jun Zhao Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c7472eff2d51..b2ca095cb9da 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1716,7 +1716,8 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (err) { int j; - pmc->sfcount[sfmode]--; + if (!delta) + pmc->sfcount[sfmode]--; for (j=0; jsfcount[MCAST_EXCLUDE] != 0)) { -- cgit v1.2.2 From ac8a48106be49c422575ddc7531b776f8eb49610 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 22 Nov 2011 23:33:10 +0000 Subject: ipv4: Save nexthop address of LSRR/SSRR option to IPCB. We can not update iph->daddr in ip_options_rcv_srr(), It is too early. When some exception ocurred later (eg. in ip_forward() when goto sr_failed) we need the ip header be identical to the original one as ICMP need it. Add a field 'nexthop' in struct ip_options to save nexthop of LSRR or SSRR option. Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_options.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3b34d1c86270..29a07b6c7168 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway) + if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 05d20cca9d66..1e60f7679075 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -568,12 +568,13 @@ void ip_forward_options(struct sk_buff *skb) ) { if (srrptr + 3 > srrspace) break; - if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) + if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0) break; } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; ip_rt_get_source(&optptr[srrptr-1], skb, rt); + ip_hdr(skb)->daddr = opt->nexthop; optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); @@ -640,7 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) } if (srrptr <= srrspace) { opt->srr_is_hit = 1; - iph->daddr = nexthop; + opt->nexthop = nexthop; opt->is_changed = 1; } return 0; -- cgit v1.2.2 From 42ca0203fd59aa9be7b241be1fbc3bef1f903f9c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Nov 2011 21:18:20 +0000 Subject: net/netlabel: copy and paste bug in netlbl_cfg_unlbl_map_add() This was copy and pasted from the IPv4 code. We're calling the ip4 version of that function and map4 is NULL. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/netlabel/netlabel_kapi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 9c24de10a657..7b372a763c60 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -162,8 +162,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3]; ipv6_addr_copy(&map6->list.mask, mask6); map6->list.valid = 1; - ret_val = netlbl_af4list_add(&map4->list, - &addrmap->list4); + ret_val = netlbl_af6list_add(&map6->list, + &addrmap->list6); if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; -- cgit v1.2.2 From df07a94cf50eb73d09bf2350c3fe2598e4cbeee1 Mon Sep 17 00:00:00 2001 From: "Jorge Boncompte [DTI2]" Date: Fri, 25 Nov 2011 13:24:49 -0500 Subject: netns: fix proxy ARP entries listing on a netns Skip entries from foreign network namespaces. Signed-off-by: Jorge Boncompte [DTI2] Signed-off-by: David S. Miller --- net/core/neighbour.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 039d51e6c284..5ac07d31fbc9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2397,7 +2397,10 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; - pn = pn->next; + do { + pn = pn->next; + } while (pn && !net_eq(pneigh_net(pn), net)); + while (!pn) { if (++state->bucket > PNEIGH_HASHMASK) break; -- cgit v1.2.2 From 6b600b26c0215bf9ed04062ecfacf0bc20e2588c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:12:13 +0000 Subject: route: Use the device mtu as the default for blackhole routes As it is, we return null as the default mtu of blackhole routes. This may lead to a propagation of a bogus pmtu if the default_mtu method of a blackhole route is invoked. So return dst->dev->mtu as the default mtu instead. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- net/ipv6/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0c74da8a0473..5b17bf124a33 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2757,7 +2757,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) { - return 0; + return dst->dev->mtu; } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8473016bba4a..d8fbd18c9467 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -157,7 +157,7 @@ static struct dst_ops ip6_dst_ops_template = { static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) { - return 0; + return dst->dev->mtu; } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) -- cgit v1.2.2 From ebb762f27fed083cb993a0816393aba4615f6544 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:12:51 +0000 Subject: net: Rename the dst_opt default_mtu method to mtu We plan to invoke the dst_opt->default_mtu() method unconditioally from dst_mtu(). So rename the method to dst_opt->mtu() to match the name with the new meaning. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 6 +++--- net/ipv4/route.c | 10 +++++----- net/ipv6/route.c | 10 +++++----- net/xfrm/xfrm_policy.c | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a77d16158eb6..db4867963247 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -112,7 +112,7 @@ static unsigned long dn_rt_deadline; static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); @@ -135,7 +135,7 @@ static struct dst_ops dn_dst_ops = { .gc = dn_dst_gc, .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, - .default_mtu = dn_dst_default_mtu, + .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, @@ -825,7 +825,7 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); } -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) +static unsigned int dn_dst_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b17bf124a33..f1ac3efc5524 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -138,7 +138,7 @@ static int rt_chain_length_max __read_mostly = 20; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); -static unsigned int ipv4_default_mtu(const struct dst_entry *dst); +static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); @@ -193,7 +193,7 @@ static struct dst_ops ipv4_dst_ops = { .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, - .default_mtu = ipv4_default_mtu, + .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, @@ -1814,7 +1814,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) return advmss; } -static unsigned int ipv4_default_mtu(const struct dst_entry *dst) +static unsigned int ipv4_mtu(const struct dst_entry *dst) { unsigned int mtu = dst->dev->mtu; @@ -2755,7 +2755,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo return NULL; } -static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } @@ -2775,7 +2775,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, - .default_mtu = ipv4_blackhole_default_mtu, + .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .cow_metrics = ipv4_rt_blackhole_cow_metrics, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d8fbd18c9467..76645d7077ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -77,7 +77,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); -static unsigned int ip6_default_mtu(const struct dst_entry *dst); +static unsigned int ip6_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -144,7 +144,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc_thresh = 1024, .check = ip6_dst_check, .default_advmss = ip6_default_advmss, - .default_mtu = ip6_default_mtu, + .mtu = ip6_mtu, .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, @@ -155,7 +155,7 @@ static struct dst_ops ip6_dst_ops_template = { .neigh_lookup = ip6_neigh_lookup, }; -static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } @@ -175,7 +175,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, - .default_mtu = ip6_blackhole_default_mtu, + .mtu = ip6_blackhole_mtu, .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .cow_metrics = ip6_rt_blackhole_cow_metrics, @@ -1041,7 +1041,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } -static unsigned int ip6_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_mtu(const struct dst_entry *dst) { unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 552df27dcf53..b8be51eb7e29 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2382,7 +2382,7 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst) return dst_metric_advmss(dst->path); } -static unsigned int xfrm_default_mtu(const struct dst_entry *dst) +static unsigned int xfrm_mtu(const struct dst_entry *dst) { return dst_mtu(dst->path); } @@ -2411,8 +2411,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->check = xfrm_dst_check; if (likely(dst_ops->default_advmss == NULL)) dst_ops->default_advmss = xfrm_default_advmss; - if (likely(dst_ops->default_mtu == NULL)) - dst_ops->default_mtu = xfrm_default_mtu; + if (likely(dst_ops->mtu == NULL)) + dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) -- cgit v1.2.2 From 618f9bc74a039da76fa027ac2600c5b785b964c5 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:13:31 +0000 Subject: net: Move mtu handling down to the protocol depended handlers We move all mtu handling from dst_mtu() down to the protocol layer. So each protocol can implement the mtu handling in a different manner. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 4 +++- net/ipv4/route.c | 11 +++++++++-- net/ipv6/route.c | 11 +++++++++-- net/xfrm/xfrm_policy.c | 4 +++- 4 files changed, 24 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index db4867963247..94f4ec036669 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -827,7 +827,9 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) static unsigned int dn_dst_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f1ac3efc5524..11d1b2080a16 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1816,7 +1816,12 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { - unsigned int mtu = dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) + return mtu; + + mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { const struct rtable *rt = (const struct rtable *) dst; @@ -2757,7 +2762,9 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 76645d7077ff..3399dd326287 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -157,7 +157,9 @@ static struct dst_ops ip6_dst_ops_template = { static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -1043,8 +1045,13 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) + return mtu; + + mtu = IPV6_MIN_MTU; rcu_read_lock(); idev = __in6_dev_get(dst->dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b8be51eb7e29..2118d6446630 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2384,7 +2384,9 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst) static unsigned int xfrm_mtu(const struct dst_entry *dst) { - return dst_mtu(dst->path); + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst_mtu(dst->path); } static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) -- cgit v1.2.2 From 261663b0ee2ee8e3947f4c11c1a08be18cd2cea1 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:14:50 +0000 Subject: ipv4: Don't use the cached pmtu informations for input routes The pmtu informations on the inetpeer are visible for output and input routes. On packet forwarding, we might propagate a learned pmtu to the sender. As we update the pmtu informations of the inetpeer on demand, the original sender of the forwarded packets might never notice when the pmtu to that inetpeer increases. So use the mtu of the outgoing device on packet forwarding instead of the pmtu to the final destination. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 11d1b2080a16..fb47c8f0cd86 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1816,15 +1816,15 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { + const struct rtable *rt = (const struct rtable *) dst; unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) + if (mtu && rt_is_output_route(rt)) return mtu; mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - const struct rtable *rt = (const struct rtable *) dst; if (rt->rt_gateway != rt->rt_dst && mtu > 576) mtu = 576; -- cgit v1.2.2 From 8a6e77d5209e459a9ec5c268c39800c06cd1dc86 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Nov 2011 00:21:55 +0000 Subject: decnet: proper socket refcounting Better use sk_reset_timer() / sk_stop_timer() helpers to make sure we dont access already freed/reused memory later. Reported-by: Sasha Levin Signed-off-by: Eric Dumazet Tested-by: Sasha Levin Signed-off-by: David S. Miller --- net/decnet/dn_timer.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index 67f691bd4acf..d9c150cc59a9 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -36,16 +36,13 @@ static void dn_slow_timer(unsigned long arg); void dn_start_slow_timer(struct sock *sk) { - sk->sk_timer.expires = jiffies + SLOW_INTERVAL; - sk->sk_timer.function = dn_slow_timer; - sk->sk_timer.data = (unsigned long)sk; - - add_timer(&sk->sk_timer); + setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk); + sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); } void dn_stop_slow_timer(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } static void dn_slow_timer(unsigned long arg) @@ -53,12 +50,10 @@ static void dn_slow_timer(unsigned long arg) struct sock *sk = (struct sock *)arg; struct dn_scp *scp = DN_SK(sk); - sock_hold(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk->sk_timer.expires = jiffies + HZ / 10; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10); goto out; } @@ -100,9 +95,7 @@ static void dn_slow_timer(unsigned long arg) scp->keepalive_fxn(sk); } - sk->sk_timer.expires = jiffies + SLOW_INTERVAL; - - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); out: bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.2 From 71b1391a41289735676be02e35239e5aa9fe6ba6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 25 Nov 2011 06:47:16 +0000 Subject: l2tp: ensure sk->dst is still valid When using l2tp over ipsec, the tunnel will hang when rekeying occurs. Reason is that the transformer bundle attached to the dst entry is now in STATE_DEAD and thus xfrm_output_one() drops all packets (XfrmOutStateExpired increases). Fix this by calling __sk_dst_check (which drops the stale dst if xfrm dst->check callback finds that the bundle is no longer valid). Cc: James Chapman Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index cf0f308abf5e..89ff8c67943e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1072,7 +1072,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Get routing info from the tunnel socket */ skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(__sk_dst_get(sk))); + skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); inet = inet_sk(sk); fl = &inet->cork.fl; -- cgit v1.2.2 From 0884d7aa24e15e72b3c07f7da910a13bb7df3592 Mon Sep 17 00:00:00 2001 From: Alexey Moiseytsev Date: Mon, 21 Nov 2011 13:35:25 +0000 Subject: AF_UNIX: Fix poll blocking problem when reading from a stream socket poll() call may be blocked by concurrent reading from the same stream socket. Signed-off-by: Alexey Moiseytsev Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 466fbcc5cf77..b595a3d8679f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1957,6 +1957,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } } else { @@ -1974,6 +1975,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, chunk = min_t(unsigned int, skb->len, size); if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); if (copied == 0) copied = -EFAULT; break; @@ -1991,6 +1993,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, /* put the skb back if we didn't use it up.. */ if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } @@ -2006,6 +2009,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } } while (size); -- cgit v1.2.2 From de68dca1816660b0d3ac89fa59ffb410007a143f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 26 Nov 2011 12:13:44 +0000 Subject: inet: add a redirect generation id in inetpeer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now inetpeer is the place where we cache redirect information for ipv4 destinations, we must be able to invalidate informations when a route is added/removed on host. As inetpeer is not yet namespace aware, this patch adds a shared redirect_genid, and a per inetpeer redirect_genid. This might be changed later if inetpeer becomes ns aware. Cache information for one inerpeer is valid as long as its redirect_genid has the same value than global redirect_genid. Reported-by: Arkadiusz Miśkiewicz Tested-by: Arkadiusz Miśkiewicz Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fb47c8f0cd86..5c2847247f51 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -131,6 +131,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; +static int redirect_genid; /* * Interface to generic destination cache. @@ -837,6 +838,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); + redirect_genid++; } /* @@ -1391,8 +1393,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, peer = rt->peer; if (peer) { - if (peer->redirect_learned.a4 != new_gw) { + if (peer->redirect_learned.a4 != new_gw || + peer->redirect_genid != redirect_genid) { peer->redirect_learned.a4 = new_gw; + peer->redirect_genid = redirect_genid; atomic_inc(&__rt_peer_genid); } check_peer_redir(&rt->dst, peer); @@ -1701,6 +1705,8 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) if (peer) { check_peer_pmtu(dst, peer); + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { if (check_peer_redir(dst, peer)) @@ -1857,6 +1863,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, peer->metrics, false); check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; -- cgit v1.2.2 From e007b857e88097c96c45620bf3b04a4e309053d1 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 24 Nov 2011 18:13:56 +0200 Subject: nl80211: fix MAC address validation MAC addresses have a fixed length. The current policy allows passing < ETH_ALEN bytes, which might result in reading beyond the buffer. Cc: stable@vger.kernel.org Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b3a476fe8272..ffafda5022c2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -89,8 +89,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, - [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN }, - [NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [NL80211_ATTR_MAC] = { .len = ETH_ALEN }, + [NL80211_ATTR_PREV_BSSID] = { .len = ETH_ALEN }, [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, -- cgit v1.2.2 From 24f50a9d165745fd0701c6e089d35f58a229ea69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Nov 2011 20:06:14 +0100 Subject: mac80211: don't stop a single aggregation session twice Nikolay noticed (by code review) that mac80211 can attempt to stop an aggregation session while it is already being stopped. So to fix it, check whether stop is already being done and bail out if so. Also move setting the STOPPING state into the lock so things are properly atomic. Cc: stable@vger.kernel.org Reported-by: Nikolay Martynov Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 2ac033989e01..674b345ade81 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -160,6 +160,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return -ENOENT; } + /* if we're already stopping ignore any new requests to stop */ + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { + spin_unlock_bh(&sta->lock); + return -EALREADY; + } + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { /* not even started yet! */ ieee80211_assign_tid_tx(sta, tid, NULL); @@ -168,6 +174,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return 0; } + set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -175,8 +183,6 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); - del_timer_sync(&tid_tx->addba_resp_timer); /* -- cgit v1.2.2 From d305a6557b2c4dca0110f05ffe745b1ef94adb80 Mon Sep 17 00:00:00 2001 From: Nikolay Martynov Date: Mon, 28 Nov 2011 09:18:00 +0100 Subject: mac80211: fix race condition caused by late addBA response If addBA responses comes in just after addba_resp_timer has expired mac80211 will still accept it and try to open the aggregation session. This causes drivers to be confused and in some cases even crash. This patch fixes the race condition and makes sure that if addba_resp_timer has expired addBA response is not longer accepted and we do not try to open half-closed session. Cc: stable@vger.kernel.org Signed-off-by: Nikolay Martynov [some adjustments] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 674b345ade81..eea6e5c8d168 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -762,11 +762,27 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; } - del_timer(&tid_tx->addba_resp_timer); + del_timer_sync(&tid_tx->addba_resp_timer); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid); #endif + + /* + * addba_resp_timer may have fired before we got here, and + * caused WANT_STOP to be set. If the stop then was already + * processed further, STOPPING might be set. + */ + if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || + test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG + "got addBA resp for tid %d but we already gave up\n", + tid); +#endif + goto out; + } + /* * IEEE 802.11-2007 7.3.1.14: * In an ADDBA Response frame, when the Status Code field -- cgit v1.2.2 From 2a1e0fd175dcfd72096ba9291d31e3b1b5342e60 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 27 Nov 2011 15:29:44 +0200 Subject: mac80211: fix race between the AGG SM and the Tx data path When a packet is supposed to sent be as an a-MPDU, mac80211 sets IEEE80211_TX_CTL_AMPDU to let the driver know. On the other hand, mac80211 configures the driver for aggregration with the ampdu_action callback. There is race between these two mechanisms since the following scenario can occur when the BA agreement is torn down: Tx softIRQ drv configuration ========== ================= check OPERATIONAL bit Set the TX_CTL_AMPDU bit in the packet clear OPERATIONAL bit stop Tx AGG Pass Tx packet to the driver. In that case the driver would get a packet with TX_CTL_AMPDU set although it has already been notified that the BA session has been torn down. To fix this, we need to synchronize all the Qdisc activity after we cleared the OPERATIONAL bit. After that step, all the following packets will be buffered until the driver reports it is ready to get new packets for this RA / TID. This buffering allows not to run into another race that would send packets with TX_CTL_AMPDU unset while the driver hasn't been requested to tear down the BA session yet. This race occurs in practice and iwlwifi complains with a WARN_ON when it happens. Cc: stable@kernel.org Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index eea6e5c8d168..331472ce038c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -192,6 +192,20 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); + /* + * There might be a few packets being processed right now (on + * another CPU) that have already gotten past the aggregation + * check when it was still OPERATIONAL and consequently have + * IEEE80211_TX_CTL_AMPDU set. In that case, this code might + * call into the driver at the same time or even before the + * TX paths calls into it, which could confuse the driver. + * + * Wait for all currently running TX paths to finish before + * telling the driver. New packets will not go through since + * the aggregation session is no longer OPERATIONAL. + */ + synchronize_net(); + tid_tx->stop_initiator = initiator; tid_tx->tx_stop = tx; -- cgit v1.2.2 From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 27 Nov 2011 21:14:46 +0000 Subject: net: Fix corruption in /proc/*/net/dev_mcast I just hit this during my testing. Isn't there another bug lurking? BUG kmalloc-8: Redzone overwritten INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896 .__kmalloc+0x1e0/0x2d0 .__seq_open_private+0x30/0xa0 .seq_open_net+0x60/0xe0 .dev_mc_seq_open+0x4c/0x70 .proc_reg_open+0xd8/0x260 .__dentry_open.clone.11+0x2b8/0x400 .do_last+0xf4/0x950 .path_openat+0xf8/0x480 .do_filp_open+0x48/0xc0 .do_sys_open+0x140/0x250 syscall_exit+0x0/0x40 dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates sizeof(struct seq_net_private) of private data, whereas it expects sizeof(struct dev_iter_state): struct dev_iter_state { struct seq_net_private p; unsigned int pos; /* bucket << BUCKET_SPACE + offset */ }; Create dev_seq_open_ops and use it so we don't have to expose struct dev_iter_state. [ Problem added by commit f04565ddf52e4 (dev: use name hash for dev_seq_ops) -Eric ] Signed-off-by: Anton Blanchard Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ net/core/dev_addr_lists.c | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6ba50a1e404c..1482eea0bbf0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4282,6 +4282,12 @@ static int dev_seq_open(struct inode *inode, struct file *file) sizeof(struct dev_iter_state)); } +int dev_seq_open_ops(struct inode *inode, struct file *file, + const struct seq_operations *ops) +{ + return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state)); +} + static const struct file_operations dev_seq_fops = { .owner = THIS_MODULE, .open = dev_seq_open, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 277faef9148d..febba516db62 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); + return dev_seq_open_ops(inode, file, &dev_mc_seq_ops); } static const struct file_operations dev_mc_seq_fops = { -- cgit v1.2.2 From 2a38e6d5aed24bb7f0211e0819fac8c32c2b5791 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Sun, 27 Nov 2011 21:33:34 +0000 Subject: ipv6: Set mcast_hops to IPV6_DEFAULT_MCASTHOPS when -1 was given. We need to set np->mcast_hops to it's default value at this moment otherwise when we use it and found it's value is -1, the logic to get default hop limit doesn't take multicast into account and will return wrong hop limit(IPV6_DEFAULT_HOPLIMIT) which is for unicast. Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c99e3ee9781f..26cb08c84b74 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -503,7 +503,7 @@ done: goto e_inval; if (val > 255 || val < -1) goto e_inval; - np->mcast_hops = val; + np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); retv = 0; break; -- cgit v1.2.2 From c89304b8ea34ab48ba6ae10e06a8b1b8c8212307 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 29 Nov 2011 09:26:30 +0000 Subject: sctp: better integer overflow check in sctp_auth_create_key() The check from commit 30c2235c is incomplete and cannot prevent cases like key_len = 0x80000000 (INT_MAX + 1). In that case, the left-hand side of the check (INT_MAX - key_len), which is unsigned, becomes 0xffffffff (UINT_MAX) and bypasses the check. However this shouldn't be a security issue. The function is called from the following two code paths: 1) setsockopt() 2) sctp_auth_asoc_set_secret() In case (1), sca_keylength is never going to exceed 65535 since it's bounded by a u16 from the user API. As such, the key length will never overflow. In case (2), sca_keylength is computed based on the user key (1 short) and 2 * key_vector (3 shorts) for a total of 7 * USHRT_MAX, which still will not overflow. In other words, this overflow check is not really necessary. Just make it more correct. Signed-off-by: Xi Wang Cc: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 865e68fef21c..bf812048cf6f 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -82,7 +82,7 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) struct sctp_auth_bytes *key; /* Verify that we are not going to overflow INT_MAX */ - if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes)) + if (key_len > (INT_MAX - sizeof(struct sctp_auth_bytes))) return NULL; /* Allocate the shared key */ -- cgit v1.2.2 From 1281bc256543eb610b14fa95ce1967397931a120 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2011 10:10:54 +0000 Subject: netlabel: Fix build problems when IPv6 is not enabled A recent fix to the the NetLabel code caused build problem with configurations that did not have IPv6 enabled; see below: netlabel_kapi.c: In function 'netlbl_cfg_unlbl_map_add': netlabel_kapi.c:165:4: error: implicit declaration of function 'netlbl_af6list_add' This patch fixes this problem by making the IPv6 specific code conditional on the IPv6 configuration flags as we done in the rest of NetLabel and the network stack as a whole. We have to move some variable declarations around as a result so things may not be quite as pretty, but at least it builds cleanly now. Some additional IPv6 conditionals were added to the NetLabel code as well for the sake of consistency. Reported-by: Randy Dunlap Signed-off-by: Paul Moore Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- net/netlabel/netlabel_kapi.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7b372a763c60..824f184f7a9b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -111,8 +111,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain, struct netlbl_domaddr_map *addrmap = NULL; struct netlbl_domaddr4_map *map4 = NULL; struct netlbl_domaddr6_map *map6 = NULL; - const struct in_addr *addr4, *mask4; - const struct in6_addr *addr6, *mask6; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) @@ -133,9 +131,9 @@ int netlbl_cfg_unlbl_map_add(const char *domain, INIT_LIST_HEAD(&addrmap->list6); switch (family) { - case AF_INET: - addr4 = addr; - mask4 = mask; + case AF_INET: { + const struct in_addr *addr4 = addr; + const struct in_addr *mask4 = mask; map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; @@ -148,9 +146,11 @@ int netlbl_cfg_unlbl_map_add(const char *domain, if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; - case AF_INET6: - addr6 = addr; - mask6 = mask; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: { + const struct in6_addr *addr6 = addr; + const struct in6_addr *mask6 = mask; map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; @@ -167,6 +167,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain, if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; + } +#endif /* IPv6 */ default: goto cfg_unlbl_map_add_failure; break; @@ -225,9 +227,11 @@ int netlbl_cfg_unlbl_static_add(struct net *net, case AF_INET: addr_len = sizeof(struct in_addr); break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: addr_len = sizeof(struct in6_addr); break; +#endif /* IPv6 */ default: return -EPFNOSUPPORT; } @@ -266,9 +270,11 @@ int netlbl_cfg_unlbl_static_del(struct net *net, case AF_INET: addr_len = sizeof(struct in_addr); break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: addr_len = sizeof(struct in6_addr); break; +#endif /* IPv6 */ default: return -EPFNOSUPPORT; } -- cgit v1.2.2 From a042994dd377d86bff9446ee76151ceb6267c9ba Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 28 Nov 2011 16:47:15 -0500 Subject: cfg80211: fix race on init and driver registration There is a theoretical race that if hit will trigger a crash. The race is between when we issue the first regulatory hint, regulatory_hint_core(), gets processed by the workqueue and between when the first device gets registered to the wireless core. This is not easy to reproduce but it was easy to do so through the regulatory simulator I have been working on. This is a port of the fix I implemented there [1]. [1] https://github.com/mcgrof/regsim/commit/a246ccf81f059cb662eee288aa13100f631e4cc8 Cc: stable@vger.kernel.org Cc: Johannes Berg Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 186b7f2a27b6..0ec40715a67b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -55,8 +55,17 @@ #define REG_DBG_PRINT(args...) #endif +static struct regulatory_request core_request_world = { + .initiator = NL80211_REGDOM_SET_BY_CORE, + .alpha2[0] = '0', + .alpha2[1] = '0', + .intersect = false, + .processed = true, + .country_ie_env = ENVIRON_ANY, +}; + /* Receipt of information from last regulatory request */ -static struct regulatory_request *last_request; +static struct regulatory_request *last_request = &core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -148,7 +157,7 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reset_regdomains(void) +static void reset_regdomains(bool full_reset) { /* avoid freeing static information or freeing something twice */ if (cfg80211_regdomain == cfg80211_world_regdom) @@ -163,6 +172,13 @@ static void reset_regdomains(void) cfg80211_world_regdom = &world_regdom; cfg80211_regdomain = NULL; + + if (!full_reset) + return; + + if (last_request != &core_request_world) + kfree(last_request); + last_request = &core_request_world; } /* @@ -173,7 +189,7 @@ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { BUG_ON(!last_request); - reset_regdomains(); + reset_regdomains(false); cfg80211_world_regdom = rd; cfg80211_regdomain = rd; @@ -1405,7 +1421,8 @@ static int __regulatory_hint(struct wiphy *wiphy, } new_request: - kfree(last_request); + if (last_request != &core_request_world) + kfree(last_request); last_request = pending_request; last_request->intersect = intersect; @@ -1575,9 +1592,6 @@ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - kfree(last_request); - last_request = NULL; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1775,7 +1789,7 @@ static void restore_regulatory_settings(bool reset_user) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(); + reset_regdomains(true); restore_alpha2(alpha2, reset_user); /* @@ -2044,7 +2058,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) int r; if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = rd; return 0; } @@ -2065,7 +2079,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (r) return r; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = rd; return 0; } @@ -2090,7 +2104,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) rd = NULL; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = intersected_rd; return 0; @@ -2110,7 +2124,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) kfree(rd); rd = NULL; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = intersected_rd; return 0; @@ -2263,11 +2277,8 @@ void /* __init_or_exit */ regulatory_exit(void) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(); - - kfree(last_request); + reset_regdomains(true); - last_request = NULL; dev_set_uevent_suppress(®_pdev->dev, true); platform_device_unregister(reg_pdev); -- cgit v1.2.2 From 0bac71af6e66dc798bf07d0c0dd14ee5503362f9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 28 Nov 2011 16:47:16 -0500 Subject: cfg80211: amend regulatory NULL dereference fix Johannes' patch for "cfg80211: fix regulatory NULL dereference" broke user regulaotry hints and it did not address the fact that last_request was left populated even if the previous regulatory hint was stale due to the wiphy disappearing. Fix user reguluatory hints by only bailing out if for those regulatory hints where a request_wiphy is expected. The stale last_request considerations are addressed through the previous fixes on last_request where we reset the last_request to a static world regdom request upon reset_regdomains(). In this case though we further enhance the effect by simply restoring reguluatory settings completely. Cc: stable@vger.kernel.org Cc: Johannes Berg Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0ec40715a67b..074c1122ce42 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2049,8 +2049,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - if (!request_wiphy) { - reg_set_request_processed(); + if (!request_wiphy && + (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || + last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + schedule_delayed_work(®_timeout, 0); return -ENODEV; } -- cgit v1.2.2 From c72e8d335e2c6a309b6281f2abcf491f37b8b92b Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 30 Nov 2011 16:56:30 +0100 Subject: mac80211: fill rate filter for internal scan requests The rates bitmap for internal scan requests shoud be filled, otherwise there will be probe requests with zero rates supported. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- net/mac80211/main.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d999bf3b84e1..cae443563ec9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -757,6 +757,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->int_scan_req) return -ENOMEM; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!local->hw.wiphy->bands[band]) + continue; + local->int_scan_req->rates[band] = (u32) -1; + } + /* if low-level driver supports AP, we also support VLAN */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) { hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); -- cgit v1.2.2 From f7e57044eeb1841847c24aa06766c8290c202583 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Nov 2011 04:08:58 +0000 Subject: sch_teql: fix lockdep splat We need rcu_read_lock() protection before using dst_get_neighbour(), and we must cache its value (pass it to __teql_resolve()) teql_master_xmit() is called under rcu_read_lock_bh() protection, its not enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_teql.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index a3b7120fcc74..4f4c52c0eeb3 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int -__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, + struct net_device *dev, struct netdev_queue *txq, + struct neighbour *mn) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = dst_get_neighbour(skb_dst(skb)); + struct teql_sched_data *q = qdisc_priv(txq->qdisc); struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } static inline int teql_resolve(struct sk_buff *skb, - struct sk_buff *skb_res, struct net_device *dev) + struct sk_buff *skb_res, + struct net_device *dev, + struct netdev_queue *txq) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *mn; + int res; + if (txq->qdisc == &noop_qdisc) return -ENODEV; - if (dev->header_ops == NULL || - skb_dst(skb) == NULL || - dst_get_neighbour(skb_dst(skb)) == NULL) + if (!dev->header_ops || !dst) return 0; - return __teql_resolve(skb, skb_res, dev); + + rcu_read_lock(); + mn = dst_get_neighbour(dst); + res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + rcu_read_unlock(); + + return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) @@ -307,7 +316,7 @@ restart: continue; } - switch (teql_resolve(skb, skb_res, slave)) { + switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); -- cgit v1.2.2 From 218fa90f072e4aeff9003d57e390857f4f35513e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Nov 2011 20:05:55 +0000 Subject: ipv4: fix lockdep splat in rt_cache_seq_show After commit f2c31e32b378 (fix NULL dereferences in check_peer_redir()), dst_get_neighbour() should be guarded by rcu_read_lock() / rcu_read_unlock() section. Reported-by: Miles Lane Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5c2847247f51..57e01bc60947 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -417,9 +417,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) else { struct rtable *r = v; struct neighbour *n; - int len; + int len, HHUptod; + rcu_read_lock(); n = dst_get_neighbour(&r->dst); + HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; + rcu_read_unlock(); + seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->dst.dev ? r->dst.dev->name : "*", @@ -433,7 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, -1, - (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, + HHUptod, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); -- cgit v1.2.2 From 8f891489866ec62a87494eff3ed17c88152c32d4 Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Wed, 30 Nov 2011 23:43:07 -0500 Subject: net/core: fix rollback handler in register_netdevice_notifier Within nested statements, the break statement terminates only the do, for, switch, or while statement that immediately encloses it, So replace the break with goto. Signed-off-by: RongQing.Li Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1482eea0bbf0..5a13edfc9f73 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1396,7 +1396,7 @@ rollback: for_each_net(net) { for_each_netdev(net, dev) { if (dev == last) - break; + goto outroll; if (dev->flags & IFF_UP) { nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); @@ -1407,6 +1407,7 @@ rollback: } } +outroll: raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } -- cgit v1.2.2 From 03360c5a405999d605ffc7373a7b90f3388db92e Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 1 Dec 2011 10:44:17 -0500 Subject: Revert "mac80211: clear sta.drv_priv on reconfiguration" This reverts commit f785d83a19bca326f79d127a413e35769afc0105. This was provoking WARNINGs from the iwlegacy drivers. Signed-off-by: John W. Linville --- net/mac80211/util.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0c9490722aa5..6719bce4a081 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1038,7 +1038,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct ieee80211_sub_if_data, u.ap); - memset(&sta->sta.drv_priv, 0, hw->sta_data_size); WARN_ON(drv_sta_add(local, sdata, &sta->sta)); } } -- cgit v1.2.2 From efbc368dcc6426d5430b9b8eeda944cf2cb74b8c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 13:38:59 -0500 Subject: ipv4: Perform peer validation on cached route lookup. Otherwise we won't notice the peer GENID change. Reported-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 57e01bc60947..ca5e237df029 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1693,12 +1693,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +static struct rtable *ipv4_validate_peer(struct rtable *rt) { - struct rtable *rt = (struct rtable *) dst; - - if (rt_is_expired(rt)) - return NULL; if (rt->rt_peer_genid != rt_peer_genid()) { struct inet_peer *peer; @@ -1707,19 +1703,29 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) peer = rt->peer; if (peer) { - check_peer_pmtu(dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_genid != redirect_genid) peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) + if (check_peer_redir(&rt->dst, peer)) return NULL; } } rt->rt_peer_genid = rt_peer_genid(); } + return rt; +} + +static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +{ + struct rtable *rt = (struct rtable *) dst; + + if (rt_is_expired(rt)) + return NULL; + dst = (struct dst_entry *) ipv4_validate_peer(rt); return dst; } @@ -2374,6 +2380,9 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { + rth = ipv4_validate_peer(rth); + if (!rth) + continue; if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2749,6 +2758,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { + rth = ipv4_validate_peer(rth); + if (!rth) + continue; dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); -- cgit v1.2.2 From 7fdcf13b292e8b2e38e42de24be2503e37b2cf97 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Dec 2011 14:00:15 -0500 Subject: SUNRPC: Fix the execution time statistics in the face of RPC restarts If the rpc_task gets restarted, then we want to ensure that we don't double-count the execution time statistics, timeout data, etc. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d12ffa545811..00a1a2acd587 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -590,6 +590,27 @@ void rpc_prepare_task(struct rpc_task *task) task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } +static void +rpc_init_task_statistics(struct rpc_task *task) +{ + /* Initialize retry counters */ + task->tk_garb_retry = 2; + task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; + + /* starting timestamp */ + task->tk_start = ktime_get(); +} + +static void +rpc_reset_task_statistics(struct rpc_task *task) +{ + task->tk_timeouts = 0; + task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT); + + rpc_init_task_statistics(task); +} + /* * Helper that calls task->tk_ops->rpc_call_done if it exists */ @@ -602,6 +623,7 @@ void rpc_exit_task(struct rpc_task *task) WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ xprt_release(task); + rpc_reset_task_statistics(task); } } } @@ -804,11 +826,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_calldata = task_setup_data->callback_data; INIT_LIST_HEAD(&task->tk_task); - /* Initialize retry counters */ - task->tk_garb_retry = 2; - task->tk_cred_retry = 2; - task->tk_rebind_retry = 2; - task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; @@ -818,8 +835,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; - /* starting timestamp */ - task->tk_start = ktime_get(); + rpc_init_task_statistics(task); dprintk("RPC: new task initialized, procpid %u\n", task_pid_nr(current)); -- cgit v1.2.2 From b03b6dd58cef7d15b7c46a6729b83dd535ef08ab Mon Sep 17 00:00:00 2001 From: Vitalii Demianets Date: Fri, 25 Nov 2011 00:16:37 +0000 Subject: bridge: master device stuck in no-carrier state forever when in user-stp mode When in user-stp mode, bridge master do not follow state of its slaves, so after the following sequence of events it can stuck forever in no-carrier state: 1) turn stp off 2) put all slaves down - master device will follow their state and also go in no-carrier state 3) turn stp on with bridge-stp script returning 0 (go to the user-stp mode) Now bridge master won't follow slaves' state and will never reach running state. This patch solves the problem by making user-stp and kernel-stp behavior similar regarding master following slaves' states. Signed-off-by: Vitalii Demianets Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 6 ++++++ net/bridge/br_stp.c | 29 ++++++++++++++--------------- 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e5f9ece3c9a0..a1daf8227ed1 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -18,6 +18,7 @@ #include #include "br_private.h" +#include "br_private_stp.h" static inline size_t br_nlmsg_size(void) { @@ -188,6 +189,11 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) p->state = new_state; br_log_state(p); + + spin_lock_bh(&p->br->lock); + br_port_state_selection(p->br); + spin_unlock_bh(&p->br->lock); + br_ifinfo_notify(RTM_NEWLINK, p); return 0; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index ad0a3f7cf6cc..dd147d78a588 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -399,25 +399,24 @@ void br_port_state_selection(struct net_bridge *br) struct net_bridge_port *p; unsigned int liveports = 0; - /* Don't change port states if userspace is handling STP */ - if (br->stp_enabled == BR_USER_STP) - return; - list_for_each_entry(p, &br->port_list, list) { if (p->state == BR_STATE_DISABLED) continue; - if (p->port_no == br->root_port) { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_forwarding(p); - } else if (br_is_designated_port(p)) { - del_timer(&p->message_age_timer); - br_make_forwarding(p); - } else { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_blocking(p); + /* Don't change port states if userspace is handling STP */ + if (br->stp_enabled != BR_USER_STP) { + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); + } } if (p->state == BR_STATE_FORWARDING) -- cgit v1.2.2 From 59c2cdae2791c0b2ee13d148edc6b771e7e7953f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 14:12:55 -0500 Subject: Revert "udp: remove redundant variable" This reverts commit 81d54ec8479a2c695760da81f05b5a9fb2dbe40a. If we take the "try_again" goto, due to a checksum error, the 'len' has already been truncated. So we won't compute the same values as the original code did. Reported-by: paul bilke Signed-off-by: David S. Miller --- net/ipv4/udp.c | 15 ++++++++------- net/ipv6/udp.c | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ab0966df1e2a..5a65eeac1d29 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1164,7 +1164,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -1186,9 +1186,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; /* @@ -1197,14 +1198,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, len); + msg->msg_iov, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), @@ -1233,7 +1234,7 @@ try_again: if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 846f4757eb8d..8c2541915183 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -340,7 +340,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -363,9 +363,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); @@ -376,14 +377,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov,len); + msg->msg_iov, copied ); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) @@ -432,7 +433,7 @@ try_again: datagram_recv_ctl(sk, msg, skb); } - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; -- cgit v1.2.2 From c25573b5134294c0be82bfaecc6d08136835b271 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Dec 2011 14:16:17 -0500 Subject: SUNRPC: Ensure we always bump the backlog queue in xprt_free_slot Whenever we free a slot, we know that the resulting xprt->num_reqs will be less than xprt->max_reqs, so we know that we can release at least one backlogged rpc_task. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>=3.1] --- net/sunrpc/xprt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f4385e45a5fc..c64c0ef519b5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -995,13 +995,11 @@ out_init_req: static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { - if (xprt_dynamic_free_slot(xprt, req)) - return; - - memset(req, 0, sizeof(*req)); /* mark unused */ - spin_lock(&xprt->reserve_lock); - list_add(&req->rq_list, &xprt->free); + if (!xprt_dynamic_free_slot(xprt, req)) { + memset(req, 0, sizeof(*req)); /* mark unused */ + list_add(&req->rq_list, &xprt->free); + } rpc_wake_up_next(&xprt->backlog); spin_unlock(&xprt->reserve_lock); } -- cgit v1.2.2 From 1ee5fa1e9970a16036e37c7b9d5ce81c778252fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Dec 2011 11:06:34 +0000 Subject: sch_red: fix red_change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le mercredi 30 novembre 2011 à 14:36 -0800, Stephen Hemminger a écrit : > (Almost) nobody uses RED because they can't figure it out. > According to Wikipedia, VJ says that: > "there are not one, but two bugs in classic RED." RED is useful for high throughput routers, I doubt many linux machines act as such devices. I was considering adding Adaptative RED (Sally Floyd, Ramakrishna Gummadi, Scott Shender), August 2001 In this version, maxp is dynamic (from 1% to 50%), and user only have to setup min_th (target average queue size) (max_th and wq (burst in linux RED) are automatically setup) By the way it seems we have a small bug in red_change() if (skb_queue_empty(&sch->q)) red_end_of_idle_period(&q->parms); First, if queue is empty, we should call red_start_of_idle_period(&q->parms); Second, since we dont use anymore sch->q, but q->qdisc, the test is meaningless. Oh well... [PATCH] sch_red: fix red_change() Now RED is classful, we must check q->qdisc->q.qlen, and if queue is empty, we start an idle period, not end it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6649463da1b6..d617161f8dd3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -209,8 +209,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) ctl->Plog, ctl->Scell_log, nla_data(tb[TCA_RED_STAB])); - if (skb_queue_empty(&sch->q)) - red_end_of_idle_period(&q->parms); + if (!q->qdisc->q.qlen) + red_start_of_idle_period(&q->parms); sch_tree_unlock(sch); return 0; -- cgit v1.2.2 From d01ff0a049f749e0bf10a35bb23edd012718c8c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Thu, 1 Dec 2011 15:47:06 +0000 Subject: ipv4: flush route cache after change accept_local After reset ipv4_devconf->data[IPV4_DEVCONF_ACCEPT_LOCAL] to 0, we should flush route cache, or it will continue receive packets with local source address, which should be dropped. Signed-off-by: Weiping Pan Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c6b5092f29a1..65f01dc47565 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); + if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) + if ((new_value == 0) && (old_value != 0)) + rt_cache_flush(net, 0); } return ret; -- cgit v1.2.2 From 3ced1be5490f5c415d51a1e5918beeb9239d546b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 22:19:01 -0500 Subject: netfilter: Remove ADVANCED dependency from NF_CONNTRACK_NETBIOS_NS firewalld in Fedora 16 needs this. Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8f379692294..d5597b759ba3 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -201,7 +201,6 @@ config NF_CONNTRACK_BROADCAST config NF_CONNTRACK_NETBIOS_NS tristate "NetBIOS name service protocol support" - depends on NETFILTER_ADVANCED select NF_CONNTRACK_BROADCAST help NetBIOS name service requests are sent as broadcast messages from an -- cgit v1.2.2 From 33cb722c22f28964a501a56cc76397834c221c7a Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 23 Nov 2011 11:23:34 +0200 Subject: Bluetooth: Correct version check in hci_setup Check for hci_ver instead of lmp_ver Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d7d96b6b1f0d..643a41b76e2e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -545,7 +545,7 @@ static void hci_setup(struct hci_dev *hdev) { hci_setup_event_mask(hdev); - if (hdev->lmp_ver > 1) + if (hdev->hci_ver > 1) hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (hdev->features[6] & LMP_SIMPLE_PAIR) { -- cgit v1.2.2 From f61759e6b831a55b89e584b198c3da325e2bc379 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 2 Dec 2011 11:39:42 +0000 Subject: ipv4: make sure RTO_ONLINK is saved in routing cache __mkroute_output fails to work with the original tos and uses value with stripped RTO_ONLINK bit. Make sure we put the original TOS bits into rt_key_tos because it used to match cached route. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ca5e237df029..588d971a3ba1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,7 +112,7 @@ #include #define RT_FL_TOS(oldflp4) \ - ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) + ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) #define IP_MAX_MTU 0xFFF0 @@ -2441,11 +2441,11 @@ EXPORT_SYMBOL(ip_route_input_common); static struct rtable *__mkroute_output(const struct fib_result *res, const struct flowi4 *fl4, __be32 orig_daddr, __be32 orig_saddr, - int orig_oif, struct net_device *dev_out, + int orig_oif, __u8 orig_rtos, + struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; - u32 tos = RT_FL_TOS(fl4); struct in_device *in_dev; u16 type = res->type; struct rtable *rth; @@ -2496,7 +2496,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_key_tos = tos; + rth->rt_key_tos = orig_rtos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; @@ -2546,7 +2546,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) { struct net_device *dev_out = NULL; - u32 tos = RT_FL_TOS(fl4); + __u8 tos = RT_FL_TOS(fl4); unsigned int flags = 0; struct fib_result res; struct rtable *rth; @@ -2722,7 +2722,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) make_route: rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, - dev_out, flags); + tos, dev_out, flags); if (!IS_ERR(rth)) { unsigned int hash; -- cgit v1.2.2 From de398fb8b92eba3447298053a483727bdd5fe37e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 Dec 2011 13:21:42 -0500 Subject: ipv4: Fix peer validation on cached lookup. If ipv4_valdiate_peer() fails during a cached entry lookup, we'll NULL derer since the loop iterator assumes rth is not NULL. Letting this be handled as a failure is just bogus, so just make it not fail. If we have trouble getting a non-NULL neighbour for the redirected gateway, just restore the original gateway and continue. The very next use of this cached route will try again. Reported-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv4/route.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 588d971a3ba1..46af62363b8c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1310,7 +1310,7 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; @@ -1321,21 +1321,19 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) rt->rt_gateway = peer->redirect_learned.a4; n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); - if (IS_ERR(n)) - return PTR_ERR(n); + if (IS_ERR(n)) { + rt->rt_gateway = orig_gw; + return; + } old_n = xchg(&rt->dst._neighbour, n); if (old_n) neigh_release(old_n); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); - rt->rt_gateway = orig_gw; - return -EAGAIN; + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); } else { rt->rt_flags |= RTCF_REDIRECTED; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } - return 0; } /* called in rcu_read_lock() section */ @@ -1693,7 +1691,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } -static struct rtable *ipv4_validate_peer(struct rtable *rt) +static void ipv4_validate_peer(struct rtable *rt) { if (rt->rt_peer_genid != rt_peer_genid()) { struct inet_peer *peer; @@ -1708,15 +1706,12 @@ static struct rtable *ipv4_validate_peer(struct rtable *rt) if (peer->redirect_genid != redirect_genid) peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(&rt->dst, peer)) - return NULL; - } + peer->redirect_learned.a4 != rt->rt_gateway) + check_peer_redir(&rt->dst, peer); } rt->rt_peer_genid = rt_peer_genid(); } - return rt; } static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) @@ -1725,7 +1720,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) if (rt_is_expired(rt)) return NULL; - dst = (struct dst_entry *) ipv4_validate_peer(rt); + ipv4_validate_peer(rt); return dst; } @@ -2380,9 +2375,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - rth = ipv4_validate_peer(rth); - if (!rth) - continue; + ipv4_validate_peer(rth); if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2758,9 +2751,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - rth = ipv4_validate_peer(rth); - if (!rth) - continue; + ipv4_validate_peer(rth); dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); -- cgit v1.2.2 From 99b53bdd810611cc178e1a86bc112d8f4f56a1e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Mon, 5 Dec 2011 21:39:41 +0000 Subject: ipv4:correct description for tcp_max_syn_backlog Since commit c5ed63d66f24(tcp: fix three tcp sysctls tuning), sysctl_max_syn_backlog is determined by tcp_hashinfo->ehash_mask, and the minimal value is 128, and it will increase in proportion to the memory of machine. The original description for tcp_max_syn_backlog and sysctl_max_syn_backlog are out of date. Changelog: V2: update description for sysctl_max_syn_backlog Signed-off-by: Weiping Pan Reviewed-by: Shan Wei Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/request_sock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 182236b2510a..9b570a6a33c5 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -26,10 +26,11 @@ * but then some measure against one socket starving all other sockets * would be needed. * - * It was 128 by default. Experiments with real servers show, that + * The minimum value of it is 128. Experiments with real servers show that * it is absolutely not enough even at 100conn/sec. 256 cures most - * of problems. This value is adjusted to 128 for very small machines - * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). + * of problems. + * This value is adjusted to 128 for low memory machines, + * and it will increase in proportion to the memory of machine. * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; -- cgit v1.2.2 From 681090902eeb459a829f6f93d378a82011af3c89 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 6 Dec 2011 08:04:40 +0000 Subject: net: Silence seq_scale() unused warning On a CONFIG_NET=y build net/core/secure_seq.c:22: warning: 'seq_scale' defined but not used Signed-off-by: Stephen Boyd Signed-off-by: David S. Miller --- net/core/secure_seq.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 025233de25f9..925991ae6f52 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -19,6 +19,7 @@ static int __init net_secret_init(void) } late_initcall(net_secret_init); +#ifdef CONFIG_INET static u32 seq_scale(u32 seq) { /* @@ -33,6 +34,7 @@ static u32 seq_scale(u32 seq) */ return seq + (ktime_to_ns(ktime_get_real()) >> 6); } +#endif #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, -- cgit v1.2.2 From 03fc3070457dc0e6a717a2e732af93ef1cb2ae51 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 4 Dec 2011 12:26:50 +0100 Subject: batman-adv: in case of roaming mark the client with TT_CLIENT_ROAM In case of a client roaming from node A to node B, the latter have to mark the corresponding global entry with TT_CLIENT_ROAM (instead of TT_CLIENT_PENDING). Marking a global entry with TT_CLIENT_PENDING will end up in keeping such entry forever (because this flag is only meant to be used with local entries and it is never checked on global ones). In the worst case (all the clients roaming to the same node A) the local and the global table will contain exactly the same clients. Batman-adv will continue to work, but the memory usage is duplicated. Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c7aafc7c5ed4..c46b1407c885 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -245,9 +245,11 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (tt_global_entry) { /* This node is probably going to update its tt table */ tt_global_entry->orig_node->tt_poss_change = true; - /* The global entry has to be marked as PENDING and has to be + /* The global entry has to be marked as ROAMING and has to be * kept for consistency purpose */ - tt_global_entry->flags |= TT_CLIENT_PENDING; + tt_global_entry->flags |= TT_CLIENT_ROAM; + tt_global_entry->roam_at = jiffies; + send_roam_adv(bat_priv, tt_global_entry->addr, tt_global_entry->orig_node); } -- cgit v1.2.2 From 797399b415b78dacdbcaffdb89e46e369ec88b98 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 4 Dec 2011 22:38:27 +0100 Subject: batman-adv: delete global entry in case of roaming When receiving a DEL change for a client due to a roaming event (change is marked with TT_CLIENT_ROAM), each node has to check if the client roamed to itself or somewhere else. In the latter case the global entry is kept to avoid having no route at all otherwise we can safely delete the global entry Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c46b1407c885..5f09a578d49d 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -696,6 +696,7 @@ void tt_global_del(struct bat_priv *bat_priv, const char *message, bool roaming) { struct tt_global_entry *tt_global_entry = NULL; + struct tt_local_entry *tt_local_entry = NULL; tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) @@ -703,15 +704,29 @@ void tt_global_del(struct bat_priv *bat_priv, if (tt_global_entry->orig_node == orig_node) { if (roaming) { - tt_global_entry->flags |= TT_CLIENT_ROAM; - tt_global_entry->roam_at = jiffies; - goto out; + /* if we are deleting a global entry due to a roam + * event, there are two possibilities: + * 1) the client roamed from node A to node B => we mark + * it with TT_CLIENT_ROAM, we start a timer and we + * wait for node B to claim it. In case of timeout + * the entry is purged. + * 2) the client roamed to us => we can directly delete + * the global entry, since it is useless now. */ + tt_local_entry = tt_local_hash_find(bat_priv, + tt_global_entry->addr); + if (!tt_local_entry) { + tt_global_entry->flags |= TT_CLIENT_ROAM; + tt_global_entry->roam_at = jiffies; + goto out; + } } _tt_global_del(bat_priv, tt_global_entry, message); } out: if (tt_global_entry) tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); } void tt_global_del_orig(struct bat_priv *bat_priv, -- cgit v1.2.2 From 15062e6a8524f5977f2cbdf6e3eb2f144262f74e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Dec 2011 09:02:21 +0100 Subject: mac80211: fix another race in aggregation start Emmanuel noticed that when mac80211 stops the queues for aggregation that can leave a packet pending. This packet will be given to the driver after the AMPDU callback, but as a non-aggregated packet which messes up the sequence number etc. I also noticed by looking at the code that if packets are being processed while we clear the WANT_START bit, they might see it cleared already and queue up on tid_tx->pending. If the driver then rejects the new aggregation session we leak the packet. Fix both of these issues by changing this code to not stop the queues at all. Instead, let packets queue up on the tid_tx->pending queue instead of letting them get to the driver, and add code to recover properly in case the driver rejects the session. (The patch looks large because it has to move two functions to before their new use.) Cc: stable@vger.kernel.org Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 86 ++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 331472ce038c..d448f7a3dbc0 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -302,6 +302,38 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid) __release(agg_queue); } +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + int queue = ieee80211_ac_from_tid(tid); + unsigned long flags; + + ieee80211_stop_queue_agg(local, tid); + + if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" + " from the pending queue\n", tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) +{ + ieee80211_wake_queue_agg(local, tid); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -313,19 +345,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* - * While we're asking the driver about the aggregation, - * stop the AC queue so that we don't have to worry - * about frames that came in while we were doing that, - * which would require us to put them to the AC pending - * afterwards which just makes the code more complex. + * Start queuing up packets for this aggregation session. + * We're going to release them once the driver is OK with + * that. */ - ieee80211_stop_queue_agg(local, tid); - clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* - * make sure no packets are being processed to get - * valid starting sequence number + * Make sure no packets are being processed. This ensures that + * we have a valid starting sequence number and that in-flight + * packets have been flushed out and no packets for this TID + * will go into the driver during the ampdu_action call. */ synchronize_net(); @@ -339,17 +369,15 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) " tid %d\n", tid); #endif spin_lock_bh(&sta->lock); + ieee80211_agg_splice_packets(local, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); + ieee80211_agg_splice_finish(local, tid); spin_unlock_bh(&sta->lock); - ieee80211_wake_queue_agg(local, tid); kfree_rcu(tid_tx, rcu_head); return; } - /* we can take packets again now */ - ieee80211_wake_queue_agg(local, tid); - /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -465,38 +493,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_local *local, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - int queue = ieee80211_ac_from_tid(tid); - unsigned long flags; - - ieee80211_stop_queue_agg(local, tid); - - if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" - " from the pending queue\n", tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) -{ - ieee80211_wake_queue_agg(local, tid); -} - static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { -- cgit v1.2.2 From f1932fc1a6d899c754676b1dd8b17de93b052d43 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 7 Dec 2011 09:10:26 -0800 Subject: crush: fix mapping calculation when force argument doesn't exist If the force argument isn't valid, we should continue calculating a mapping as if it weren't specified. Signed-off-by: Sage Weil --- net/ceph/crush/mapper.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 42599e31dcad..3a94eae7abe9 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -477,7 +477,6 @@ int crush_do_rule(struct crush_map *map, int i, j; int numrep; int firstn; - int rc = -1; BUG_ON(ruleno >= map->max_rules); @@ -491,23 +490,18 @@ int crush_do_rule(struct crush_map *map, * that this may or may not correspond to the specific types * referenced by the crush rule. */ - if (force >= 0) { - if (force >= map->max_devices || - map->device_parents[force] == 0) { - /*dprintk("CRUSH: forcefed device dne\n");*/ - rc = -1; /* force fed device dne */ - goto out; - } - if (!is_out(map, weight, force, x)) { - while (1) { - force_context[++force_pos] = force; - if (force >= 0) - force = map->device_parents[force]; - else - force = map->bucket_parents[-1-force]; - if (force == 0) - break; - } + if (force >= 0 && + force < map->max_devices && + map->device_parents[force] != 0 && + !is_out(map, weight, force, x)) { + while (1) { + force_context[++force_pos] = force; + if (force >= 0) + force = map->device_parents[force]; + else + force = map->bucket_parents[-1-force]; + if (force == 0) + break; } } @@ -600,10 +594,7 @@ int crush_do_rule(struct crush_map *map, BUG_ON(1); } } - rc = result_len; - -out: - return rc; + return result_len; } -- cgit v1.2.2 From 4af04aba93f47699e7ac33e7cfd4da22550e6114 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 6 Dec 2011 21:23:45 +0000 Subject: ipv6: Fix for adding multicast route for loopback device automatically. There is no obvious reason to add a default multicast route for loopback devices, otherwise there would be a route entry whose dst.error set to -ENETUNREACH that would blocking all multicast packets. ==================== [ more detailed explanation ] The problem is that the resulting routing table depends on the sequence of interface's initialization and in some situation, that would block all muticast packets. Suppose there are two interfaces on my computer (lo and eth0), if we initailize 'lo' before 'eth0', the resuting routing table(for multicast) would be # ip -6 route show | grep ff00:: unreachable ff00::/8 dev lo metric 256 error -101 ff00::/8 dev eth0 metric 256 When sending multicasting packets, routing subsystem will return the first route entry which with a error set to -101(ENETUNREACH). I know the kernel will set the default ipv6 address for 'lo' when it is up and won't set the default multicast route for it, but there is no reason to stop 'init' program from setting address for 'lo', and that is exactly what systemd did. I am sure there is something wrong with kernel or systemd, currently I preferred kernel caused this problem. ==================== Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cf88df82e2c2..36806def8cfd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1805,7 +1805,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return ERR_PTR(-EACCES); /* Add default multicast route */ - addrconf_add_mroute(dev); + if (!(dev->flags & IFF_LOOPBACK)) + addrconf_add_mroute(dev); /* Add link local route */ addrconf_add_lroute(dev); -- cgit v1.2.2 From 72b36015ba43a3cca5303f5534d2c3e1899eae29 Mon Sep 17 00:00:00 2001 From: Ted Feng Date: Thu, 8 Dec 2011 00:46:21 +0000 Subject: ipip, sit: copy parms.name after register_netdevice Same fix as 731abb9cb2 for ipip and sit tunnel. Commit 1c5cae815d removed an explicit call to dev_alloc_name in ipip_tunnel_locate and ipip6_tunnel_locate, because register_netdevice will now create a valid name, however the tunnel keeps a copy of the name in the private parms structure. Fix this by copying the name back after register_netdevice has successfully returned. This shows up if you do a simple tunnel add, followed by a tunnel show: $ sudo ip tunnel add mode ipip remote 10.2.20.211 $ ip tunnel tunl0: ip/ip remote any local any ttl inherit nopmtudisc tunl%d: ip/ip remote 10.2.20.211 local any ttl inherit $ sudo ip tunnel add mode sit remote 10.2.20.212 $ ip tunnel sit0: ipv6/ip remote any local any ttl 64 nopmtudisc 6rd-prefix 2002::/16 sit%d: ioctl 89f8 failed: No such device sit%d: ipv6/ip remote 10.2.20.212 local any ttl inherit Cc: stable@vger.kernel.org Signed-off-by: Ted Feng Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 7 ++++++- net/ipv6/sit.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 065effd8349a..0b2e7329abda 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip_tunnel_link(ipn, nt); return nt; @@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) static int __net_init ipip_init_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); + struct ip_tunnel *t; int err; ipn->tunnels[0] = ipn->tunnels_wc; @@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net) if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(ipn->fb_tunnel_dev); + + strcpy(t->parms.name, ipn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a7a18602a046..96f3623618e3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip6_tunnel_link(sitn, nt); @@ -1144,7 +1146,6 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1207,6 +1208,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; @@ -1231,6 +1233,9 @@ static int __net_init sit_init_net(struct net *net) if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(sitn->fb_tunnel_dev); + + strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: -- cgit v1.2.2 From 3f1e6d3fd37bd4f25e5b19f1c7ca21850426c33f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Dec 2011 23:42:53 +0000 Subject: sch_gred: should not use GFP_KERNEL while holding a spinlock gred_change_vq() is called under sch_tree_lock(sch). This means a spinlock is held, and we are not allowed to sleep in this context. We might pre-allocate memory using GFP_KERNEL before taking spinlock, but this is not suitable for stable material. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index b9493a09a870..6cd8ddfb512d 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -385,7 +385,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched_data *q; if (table->tab[dp] == NULL) { - table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); + table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC); if (table->tab[dp] == NULL) return -ENOMEM; } -- cgit v1.2.2 From bb3c36863e8001fc21a88bebfdead4da4c23e848 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Dec 2011 17:35:06 -0500 Subject: ipv6: Check dest prefix length on original route not copied one in rt6_alloc_cow(). After commit 8e2ec639173f325977818c45011ee176ef2b11f6 ("ipv6: don't use inetpeer to store metrics for routes.") the test in rt6_alloc_cow() for setting the ANYCAST flag is now wrong. 'rt' will always now have a plen of 128, because it is set explicitly to 128 by ip6_rt_copy. So to restore the semantics of the test, check the destination prefix length of 'ort'. Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3399dd326287..b582a0a0f1c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -728,7 +728,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, int attempts = !in_softirq(); if (!(rt->rt6i_flags&RTF_GATEWAY)) { - if (rt->rt6i_dst.plen != 128 && + if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; ipv6_addr_copy(&rt->rt6i_gateway, daddr); -- cgit v1.2.2