From bf32fecdc1851ad9ca960f56771b798d17c26cf1 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 2 Apr 2012 14:26:27 -0700 Subject: openvswitch: Add length check when retrieving TCP flags. When collecting TCP flags we check that the IP header indicates that a TCP header is present but not that the packet is actually long enough to contain the header. This adds a check to prevent reading off the end of the packet. In practice, this is only likely to result in reading of bad data and not a crash due to the presence of struct skb_shared_info at the end of the packet. Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1252c3081ef1..2a11ec2383ee 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -183,7 +183,8 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) u8 tcp_flags = 0; if (flow->key.eth.type == htons(ETH_P_IP) && - flow->key.ip.proto == IPPROTO_TCP) { + flow->key.ip.proto == IPPROTO_TCP && + likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { u8 *tcp = (u8 *)tcp_hdr(skb); tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; } -- cgit v1.2.2 From 745c0ce35f904aeff8e1ea325c259a14a00ff1b7 Mon Sep 17 00:00:00 2001 From: Vishal Agarwal Date: Fri, 13 Apr 2012 17:43:22 +0530 Subject: Bluetooth: hci_persistent_key should return bool This patch changes the return type of function hci_persistent_key from int to bool because it makes more sense to return information whether a key is persistent or not as a bool. Signed-off-by: Vishal Agarwal Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 21 +++++++++++---------- net/bluetooth/mgmt.c | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2054c1321c87..c2251e4c3b72 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1216,40 +1216,40 @@ struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) return NULL; } -static int hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn, +static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn, u8 key_type, u8 old_key_type) { /* Legacy key */ if (key_type < 0x03) - return 1; + return true; /* Debug keys are insecure so don't store them persistently */ if (key_type == HCI_LK_DEBUG_COMBINATION) - return 0; + return false; /* Changed combination key and there's no previous one */ if (key_type == HCI_LK_CHANGED_COMBINATION && old_key_type == 0xff) - return 0; + return false; /* Security mode 3 case */ if (!conn) - return 1; + return true; /* Neither local nor remote side had no-bonding as requirement */ if (conn->auth_type > 0x01 && conn->remote_auth > 0x01) - return 1; + return true; /* Local side had dedicated bonding as requirement */ if (conn->auth_type == 0x02 || conn->auth_type == 0x03) - return 1; + return true; /* Remote side had dedicated bonding as requirement */ if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) - return 1; + return true; /* If none of the above criteria match, then don't store the key * persistently */ - return 0; + return false; } struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, u8 rand[8]) @@ -1286,7 +1286,8 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len) { struct link_key *key, *old_key; - u8 old_key_type, persistent; + u8 old_key_type; + bool persistent; old_key = hci_find_link_key(hdev, bdaddr); if (old_key) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4ef275c69675..4bb03b111122 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2884,7 +2884,7 @@ int mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status) return 0; } -int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, u8 persistent) +int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { struct mgmt_ev_new_link_key ev; -- cgit v1.2.2 From 6ec5bcadc21e13ceba8c144e4731eccac01d04f7 Mon Sep 17 00:00:00 2001 From: Vishal Agarwal Date: Mon, 16 Apr 2012 14:44:44 +0530 Subject: Bluetooth: Temporary keys should be retained during connection If a key is non persistent then it should not be used in future connections but it should be kept for current connection. And it should be removed when connecion is removed. Signed-off-by: Vishal Agarwal Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 6 ++---- net/bluetooth/hci_event.c | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c2251e4c3b72..a7607e4be347 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1330,10 +1330,8 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, mgmt_new_link_key(hdev, key, persistent); - if (!persistent) { - list_del(&key->list); - kfree(key); - } + if (conn) + conn->flush_key = !persistent; return 0; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index badb7851d116..6a72eaea70ee 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1902,6 +1902,8 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff } if (ev->status == 0) { + if (conn->type == ACL_LINK && conn->flush_key) + hci_remove_link_key(hdev, &conn->dst); hci_proto_disconn_cfm(conn, ev->reason); hci_conn_del(conn); } -- cgit v1.2.2 From a881e963c7fe1f226e991ee9bbe8907acda93294 Mon Sep 17 00:00:00 2001 From: "Peter Huang (Peng)" Date: Thu, 19 Apr 2012 20:12:51 +0000 Subject: set fake_rtable's dst to NULL to avoid kernel Oops bridge: set fake_rtable's dst to NULL to avoid kernel Oops when bridge is deleted before tap/vif device's delete, kernel may encounter an oops because of NULL reference to fake_rtable's dst. Set fake_rtable's dst to NULL before sending packets out can solve this problem. v4 reformat, change br_drop_fake_rtable(skb) to {} v3 enrich commit header v2 introducing new flag DST_FAKE_RTABLE to dst_entry struct. [ Use "do { } while (0)" for nop br_drop_fake_rtable() implementation -DaveM ] Acked-by: Eric Dumazet Signed-off-by: Peter Huang Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 1 + net/bridge/br_netfilter.c | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 61f65344e711..a2098e3de500 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -47,6 +47,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); dev_queue_xmit(skb); } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index dec4f3817133..d7f49b63ab0f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -156,7 +156,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->dst.dev = br->dev; rt->dst.path = &rt->dst; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM | DST_NOPEER; + rt->dst.flags = DST_NOXFRM | DST_NOPEER | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; } @@ -694,11 +694,7 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct rtable *rt = skb_rtable(skb); - - if (rt && rt == bridge_parent_rtable(in)) - skb_dst_drop(skb); - + br_drop_fake_rtable(skb); return NF_ACCEPT; } -- cgit v1.2.2 From 16cde9931bcd8d8ca968ef1cded02684ea040374 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 13 Apr 2012 12:32:42 +0200 Subject: Bluetooth: Fix missing break in hci_cmd_complete_evt Command complete event for HCI_OP_USER_PASSKEY_NEG_REPLY would result in calling handler function also for HCI_OP_LE_SET_SCAN_PARAM. This could result in undefined behaviour. Signed-off-by: Szymon Janc Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6a72eaea70ee..7f87a70b8618 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2314,6 +2314,7 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk case HCI_OP_USER_PASSKEY_NEG_REPLY: hci_cc_user_passkey_neg_reply(hdev, skb); + break; case HCI_OP_LE_SET_SCAN_PARAM: hci_cc_le_set_scan_param(hdev, skb); -- cgit v1.2.2 From afa762f6871a8cb05fbef5d0f83fac14304aa816 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 23 Apr 2012 14:45:15 +0300 Subject: mac80211: call ieee80211_mgd_stop() on interface stop ieee80211_mgd_teardown() is called on netdev removal, which occurs after the vif was already removed from the low-level driver, resulting in the following warning: [ 4809.014734] ------------[ cut here ]------------ [ 4809.019861] WARNING: at net/mac80211/driver-ops.h:12 ieee80211_bss_info_change_notify+0x200/0x2c8 [mac80211]() [ 4809.030388] wlan0: Failed check-sdata-in-driver check, flags: 0x4 [ 4809.036862] Modules linked in: wlcore_sdio(-) wl12xx wlcore mac80211 cfg80211 [last unloaded: cfg80211] [ 4809.046849] [] (unwind_backtrace+0x0/0x12c) [ 4809.055937] [] (dump_stack+0x20/0x24) [ 4809.065385] [] (warn_slowpath_common+0x5c/0x74) [ 4809.075589] [] (warn_slowpath_fmt+0x40/0x48) [ 4809.088291] [] (ieee80211_bss_info_change_notify+0x200/0x2c8 [mac80211]) [ 4809.102844] [] (ieee80211_destroy_auth_data+0x80/0xa4 [mac80211]) [ 4809.116276] [] (ieee80211_mgd_teardown+0x5c/0x74 [mac80211]) [ 4809.129331] [] (ieee80211_teardown_sdata+0xb0/0xd8 [mac80211]) [ 4809.141595] [] (rollback_registered_many+0x228/0x2f0) [ 4809.153056] [] (unregister_netdevice_many+0x28/0x50) [ 4809.165696] [] (ieee80211_remove_interfaces+0xb4/0xdc [mac80211]) [ 4809.179151] [] (ieee80211_unregister_hw+0x50/0xf0 [mac80211]) [ 4809.191043] [] (wlcore_remove+0x5c/0x7c [wlcore]) [ 4809.201491] [] (platform_drv_remove+0x24/0x28) [ 4809.212029] [] (__device_release_driver+0x8c/0xcc) [ 4809.222738] [] (device_release_driver+0x30/0x3c) [ 4809.233099] [] (bus_remove_device+0x10c/0x128) [ 4809.242620] [] (device_del+0x11c/0x17c) [ 4809.252150] [] (platform_device_del+0x28/0x68) [ 4809.263051] [] (wl1271_remove+0x3c/0x50 [wlcore_sdio]) [ 4809.273590] [] (sdio_bus_remove+0x48/0xf8) [ 4809.283754] [] (__device_release_driver+0x8c/0xcc) [ 4809.293729] [] (driver_detach+0x9c/0xc4) [ 4809.303163] [] (bus_remove_driver+0xc4/0xf4) [ 4809.312973] [] (driver_unregister+0x70/0x7c) [ 4809.323220] [] (sdio_unregister_driver+0x24/0x2c) [ 4809.334213] [] (wl1271_exit+0x14/0x1c [wlcore_sdio]) [ 4809.344930] [] (sys_delete_module+0x228/0x2a8) [ 4809.354734] ---[ end trace 515290ccf5feb522 ]--- Rename ieee80211_mgd_teardown() to ieee80211_mgd_stop(), and call it on ieee80211_do_stop(). Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 4 ++-- net/mac80211/mlme.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d9798a307f20..db8fae51714c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1210,7 +1210,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); -void ieee80211_mgd_teardown(struct ieee80211_sub_if_data *sdata); +void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 401c01f0731e..c20051b7ffcd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -486,6 +486,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps_bc_buf); skb_queue_purge(&sdata->u.ap.ps_bc_buf); + } else if (sdata->vif.type == NL80211_IFTYPE_STATION) { + ieee80211_mgd_stop(sdata); } if (going_down) @@ -644,8 +646,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - else if (sdata->vif.type == NL80211_IFTYPE_STATION) - ieee80211_mgd_teardown(sdata); flushed = sta_info_flush(local, sdata); WARN_ON(flushed); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f76da5b3f5c5..20c680bfc3ae 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3497,7 +3497,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return 0; } -void ieee80211_mgd_teardown(struct ieee80211_sub_if_data *sdata) +void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; -- cgit v1.2.2 From 7118c07a844d367560ee91adb2071bde2fabcdbf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 14 Apr 2012 12:37:46 -0400 Subject: ipvs: Verify that IP_VS protocol has been registered The registration of a protocol might fail, there were no checks and all registrations were assumed to be correct. This lead to NULL ptr dereferences when apps tried registering. For example: [ 1293.226051] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 [ 1293.227038] IP: [] tcp_register_app+0x60/0xb0 [ 1293.227038] PGD 391de067 PUD 6c20b067 PMD 0 [ 1293.227038] Oops: 0000 [#1] PREEMPT SMP [ 1293.227038] CPU 1 [ 1293.227038] Pid: 19609, comm: trinity Tainted: G W 3.4.0-rc1-next-20120405-sasha-dirty #57 [ 1293.227038] RIP: 0010:[] [] tcp_register_app+0x60/0xb0 [ 1293.227038] RSP: 0018:ffff880038c1dd18 EFLAGS: 00010286 [ 1293.227038] RAX: ffffffffffffffc0 RBX: 0000000000001500 RCX: 0000000000010000 [ 1293.227038] RDX: 0000000000000000 RSI: ffff88003a2d5888 RDI: 0000000000000282 [ 1293.227038] RBP: ffff880038c1dd48 R08: 0000000000000000 R09: 0000000000000000 [ 1293.227038] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a2d5668 [ 1293.227038] R13: ffff88003a2d5988 R14: ffff8800696a8ff8 R15: 0000000000000000 [ 1293.227038] FS: 00007f01930d9700(0000) GS:ffff88007ce00000(0000) knlGS:0000000000000000 [ 1293.227038] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1293.227038] CR2: 0000000000000018 CR3: 0000000065dfc000 CR4: 00000000000406e0 [ 1293.227038] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1293.227038] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 1293.227038] Process trinity (pid: 19609, threadinfo ffff880038c1c000, task ffff88002dc73000) [ 1293.227038] Stack: [ 1293.227038] ffff880038c1dd48 00000000fffffff4 ffff8800696aada0 ffff8800694f5580 [ 1293.227038] ffffffff8369f1e0 0000000000001500 ffff880038c1dd98 ffffffff822a716b [ 1293.227038] 0000000000000000 ffff8800696a8ff8 0000000000000015 ffff8800694f5580 [ 1293.227038] Call Trace: [ 1293.227038] [] ip_vs_app_inc_new+0xdb/0x180 [ 1293.227038] [] register_ip_vs_app_inc+0x48/0x70 [ 1293.227038] [] __ip_vs_ftp_init+0xba/0x140 [ 1293.227038] [] ops_init+0x80/0x90 [ 1293.227038] [] setup_net+0x5b/0xe0 [ 1293.227038] [] copy_net_ns+0x76/0x100 [ 1293.227038] [] create_new_namespaces+0xfb/0x190 [ 1293.227038] [] unshare_nsproxy_namespaces+0x61/0x80 [ 1293.227038] [] sys_unshare+0xff/0x290 [ 1293.227038] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 1293.227038] [] system_call_fastpath+0x16/0x1b [ 1293.227038] Code: 89 c7 e8 34 91 3b 00 89 de 66 c1 ee 04 31 de 83 e6 0f 48 83 c6 22 48 c1 e6 04 4a 8b 14 26 49 8d 34 34 48 8d 42 c0 48 39 d6 74 13 <66> 39 58 58 74 22 48 8b 48 40 48 8d 41 c0 48 39 ce 75 ed 49 8d [ 1293.227038] RIP [] tcp_register_app+0x60/0xb0 [ 1293.227038] RSP [ 1293.227038] CR2: 0000000000000018 [ 1293.379284] ---[ end trace 364ab40c7011a009 ]--- [ 1293.381182] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Sasha Levin Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index f843a8833250..a62360e29037 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -59,9 +59,6 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } -#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \ - defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \ - defined(CONFIG_IP_VS_PROTO_ESP) /* * register an ipvs protocols netns related data */ @@ -86,7 +83,6 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) return 0; } -#endif /* * unregister an ipvs protocol @@ -316,22 +312,35 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, */ int __net_init ip_vs_protocol_net_init(struct net *net) { + int i, ret; + static struct ip_vs_protocol *protos[] = { #ifdef CONFIG_IP_VS_PROTO_TCP - register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); + &ip_vs_protocol_tcp, #endif #ifdef CONFIG_IP_VS_PROTO_UDP - register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); + &ip_vs_protocol_udp, #endif #ifdef CONFIG_IP_VS_PROTO_SCTP - register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); + &ip_vs_protocol_sctp, #endif #ifdef CONFIG_IP_VS_PROTO_AH - register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); + &ip_vs_protocol_ah, #endif #ifdef CONFIG_IP_VS_PROTO_ESP - register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); + &ip_vs_protocol_esp, #endif + }; + + for (i = 0; i < ARRAY_SIZE(protos); i++) { + ret = register_ip_vs_proto_netns(net, protos[i]); + if (ret < 0) + goto cleanup; + } return 0; + +cleanup: + ip_vs_protocol_net_cleanup(net); + return ret; } void __net_exit ip_vs_protocol_net_cleanup(struct net *net) -- cgit v1.2.2 From 8f9b9a2fad47af27e14b037395e03cd8278d96d7 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 13 Apr 2012 18:08:43 +0300 Subject: ipvs: fix crash in ip_vs_control_net_cleanup on unload commit 14e405461e664b777e2a5636e10b2ebf36a686ec (2.6.39) ("Add __ip_vs_control_{init,cleanup}_sysctl()") introduced regression due to wrong __net_init for __ip_vs_control_cleanup_sysctl. This leads to crash when the ip_vs module is unloaded. Fix it by changing __net_init to __net_exit for the function that is already renamed to ip_vs_control_net_cleanup_sysctl. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b3afe189af61..376d2b12d581 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3680,7 +3680,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) +void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3692,7 +3692,7 @@ void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) #else int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { } +void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif -- cgit v1.2.2 From 62ad6fcd743792bf294f2a7ba26ab8f462065150 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 24 Apr 2012 18:15:41 +0000 Subject: udp_diag: implement idiag_get_info for udp/udplite to get queue information When we use netlink to monitor queue information for udp socket, idiag_rqueue and idiag_wqueue of inet_diag_msg are returned with 0. Keep consistent with netstat, just return back allocated rmem/wmem size. Signed-off-by: Shan Wei Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 2 +- net/ipv4/udp_diag.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8d25a1c557eb..8f8db724bfaf 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -141,7 +141,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto rtattr_failure; if (icsk == NULL) { - r->idiag_rqueue = r->idiag_wqueue = 0; + handler->idiag_get_info(sk, r, NULL); goto out; } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 8a949f19deb6..a7f86a3cd502 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -146,9 +146,17 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return udp_dump_one(&udp_table, in_skb, nlh, req); } +static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + r->idiag_rqueue = sk_rmem_alloc_get(sk); + r->idiag_wqueue = sk_wmem_alloc_get(sk); +} + static const struct inet_diag_handler udp_diag_handler = { .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, + .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, }; @@ -167,6 +175,7 @@ static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr * static const struct inet_diag_handler udplite_diag_handler = { .dump = udplite_diag_dump, .dump_one = udplite_diag_dump_one, + .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, }; -- cgit v1.2.2 From 8d08d71ce59438a6ef06be5db07966e0c144b74e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 25 Apr 2012 00:29:59 +0300 Subject: ipvs: add check in ftp for initialized core Avoid crash when registering ip_vs_ftp after the IPVS core initialization for netns fails. Do this by checking for present core (net->ipvs). Signed-off-by: Julian Anastasov Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ftp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 538d74ee4f68..e39f693dd3e4 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net) struct ip_vs_app *app; struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); if (!app) return -ENOMEM; -- cgit v1.2.2 From 39f618b4fd95ae243d940ec64c961009c74e3333 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 25 Apr 2012 00:29:58 +0300 Subject: ipvs: reset ipvs pointer in netns Make sure net->ipvs is reset on netns cleanup or failed initialization. It is needed for IPVS applications to know that IPVS core is not loaded in netns. Signed-off-by: Julian Anastasov Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2555816e7788..260b9ef88775 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1924,6 +1924,7 @@ protocol_fail: control_fail: ip_vs_estimator_net_cleanup(net); estimator_fail: + net->ipvs = NULL; return -ENOMEM; } @@ -1936,6 +1937,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net) ip_vs_control_net_cleanup(net); ip_vs_estimator_net_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); + net->ipvs = NULL; } static void __net_exit __ip_vs_dev_cleanup(struct net *net) -- cgit v1.2.2 From 0848e4043014631d792a66266d6d7d64a7f21da5 Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Wed, 25 Apr 2012 23:24:56 +0000 Subject: 6lowpan: fix segmentation fault caused by mlme request Add nescesary mlme callbacks to satisfy "iz list" request from user space. Due to 6lowpan device doesn't have its own phy, mlme implemented as a pipe to a real phy to which 6lowpan is attached. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 368515885368..f6b169439b5b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1044,6 +1044,24 @@ static void lowpan_dev_free(struct net_device *dev) free_netdev(dev); } +static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_phy(real_dev); +} + +static u16 lowpan_get_pan_id(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); +} + +static u16 lowpan_get_short_addr(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); +} + static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -1053,6 +1071,12 @@ static const struct net_device_ops lowpan_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; +static struct ieee802154_mlme_ops lowpan_mlme = { + .get_pan_id = lowpan_get_pan_id, + .get_phy = lowpan_get_phy, + .get_short_addr = lowpan_get_short_addr, +}; + static void lowpan_setup(struct net_device *dev) { pr_debug("(%s)\n", __func__); @@ -1070,6 +1094,7 @@ static void lowpan_setup(struct net_device *dev) dev->netdev_ops = &lowpan_netdev_ops; dev->header_ops = &lowpan_header_ops; + dev->ml_priv = &lowpan_mlme; dev->destructor = lowpan_dev_free; } -- cgit v1.2.2 From 8deff4af8745561efd2be34ee30cc57a6f0107c6 Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Wed, 25 Apr 2012 23:24:57 +0000 Subject: 6lowpan: clean up fragments list if module unloaded Clean all the pending fragments and relative timers if 6lowpan link is going to be deleted. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index f6b169439b5b..b3021f765204 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1177,11 +1177,20 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head) { struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); struct net_device *real_dev = lowpan_dev->real_dev; - struct lowpan_dev_record *entry; - struct lowpan_dev_record *tmp; + struct lowpan_dev_record *entry, *tmp; + struct lowpan_fragment *frame, *tframe; ASSERT_RTNL(); + spin_lock(&flist_lock); + list_for_each_entry_safe(frame, tframe, &lowpan_fragments, list) { + del_timer(&frame->timer); + list_del(&frame->list); + dev_kfree_skb(frame->skb); + kfree(frame); + } + spin_unlock(&flist_lock); + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { if (entry->ldev == dev) { -- cgit v1.2.2 From 768f7c7c121e80f458a9d013b2e8b169e5dfb1e5 Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Wed, 25 Apr 2012 23:24:58 +0000 Subject: 6lowpan: add missing spin_lock_init() Add missing spin_lock_init() for frames list lock. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index b3021f765204..840821b90bcd 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1168,6 +1168,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, list_add_tail(&entry->list, &lowpan_devices); mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + spin_lock_init(&flist_lock); + register_netdevice(dev); return 0; -- cgit v1.2.2 From 651913ce9de2bbcedef608c5d6cf39c244248509 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 27 Apr 2012 11:29:37 -0400 Subject: tcp: clean up use of jiffies in tcp_rcv_rtt_measure() Clean up a reference to jiffies in tcp_rcv_rtt_measure() that should instead reference tcp_time_stamp. Since the result of the subtraction is passed into a function taking u32, this should not change any behavior (and indeed the generated assembly does not change on x86_64). However, it seems worth cleaning this up for consistency and clarity (and perhaps to avoid bugs if this is copied and pasted somewhere else). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ff364065376..2a702e3a4ae6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -495,7 +495,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; - tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1); + tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1); new_measure: tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; -- cgit v1.2.2 From cde2e9a651b76d8db36ae94cd0febc82b637e5dd Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 27 Apr 2012 10:11:48 +0000 Subject: drop_monitor: fix sleeping in invalid context warning Eric Dumazet pointed out this warning in the drop_monitor protocol to me: [ 38.352571] BUG: sleeping function called from invalid context at kernel/mutex.c:85 [ 38.352576] in_atomic(): 1, irqs_disabled(): 0, pid: 4415, name: dropwatch [ 38.352580] Pid: 4415, comm: dropwatch Not tainted 3.4.0-rc2+ #71 [ 38.352582] Call Trace: [ 38.352592] [] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352599] [] __might_sleep+0xca/0xf0 [ 38.352606] [] mutex_lock+0x26/0x50 [ 38.352610] [] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352616] [] tracepoint_probe_register+0x29/0x90 [ 38.352621] [] set_all_monitor_traces+0x105/0x170 [ 38.352625] [] net_dm_cmd_trace+0x2a/0x40 [ 38.352630] [] genl_rcv_msg+0x21a/0x2b0 [ 38.352636] [] ? zone_statistics+0x99/0xc0 [ 38.352640] [] ? genl_rcv+0x30/0x30 [ 38.352645] [] netlink_rcv_skb+0xa9/0xd0 [ 38.352649] [] genl_rcv+0x20/0x30 [ 38.352653] [] netlink_unicast+0x1ae/0x1f0 [ 38.352658] [] netlink_sendmsg+0x2b6/0x310 [ 38.352663] [] sock_sendmsg+0x10f/0x130 [ 38.352668] [] ? move_addr_to_kernel+0x60/0xb0 [ 38.352673] [] ? verify_iovec+0x64/0xe0 [ 38.352677] [] __sys_sendmsg+0x386/0x390 [ 38.352682] [] ? handle_mm_fault+0x139/0x210 [ 38.352687] [] ? do_page_fault+0x1ec/0x4f0 [ 38.352693] [] ? set_next_entity+0x9d/0xb0 [ 38.352699] [] ? tty_ldisc_deref+0x9/0x10 [ 38.352703] [] ? pick_next_task_fair+0x63/0x140 [ 38.352708] [] sys_sendmsg+0x44/0x80 [ 38.352713] [] system_call_fastpath+0x16/0x1b It stems from holding a spinlock (trace_state_lock) while attempting to register or unregister tracepoint hooks, making in_atomic() true in this context, leading to the warning when the tracepoint calls might_sleep() while its taking a mutex. Since we only use the trace_state_lock to prevent trace protocol state races, as well as hardware stat list updates on an rcu write side, we can just convert the spinlock to a mutex to avoid this problem. Signed-off-by: Neil Horman Reported-by: Eric Dumazet CC: David Miller Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 5c3c81a609e5..a221a5bbecf7 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -42,7 +42,7 @@ static void send_dm_alert(struct work_struct *unused); * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_SPINLOCK(trace_state_lock); +static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; @@ -214,7 +214,7 @@ static int set_all_monitor_traces(int state) struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *temp; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); if (state == trace_state) { rc = -EAGAIN; @@ -253,7 +253,7 @@ static int set_all_monitor_traces(int state) rc = -EINPROGRESS; out_unlock: - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); return rc; } @@ -296,12 +296,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; case NETDEV_UNREGISTER: - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -312,7 +312,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; } out: -- cgit v1.2.2 From 3885ca785a3618593226687ced84f3f336dc3860 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 27 Apr 2012 10:11:49 +0000 Subject: drop_monitor: Make updating data->skb smp safe Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in its smp protections. Specifically, its possible to replace data->skb while its being written. This patch corrects that by making data->skb an rcu protected variable. That will prevent it from being overwritten while a tracepoint is modifying it. Signed-off-by: Neil Horman Reported-by: Eric Dumazet CC: David Miller Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 70 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index a221a5bbecf7..7592943513e3 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; - struct sk_buff *skb; + struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; }; @@ -73,35 +73,58 @@ static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); +static int initialized = 0; static void reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; + struct sk_buff *skb; + struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); al += sizeof(struct nlattr); - data->skb = genlmsg_new(al, GFP_KERNEL); - genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - atomic_set(&data->dm_hit_count, dm_hit_limit); + skb = genlmsg_new(al, GFP_KERNEL); + + if (skb) { + genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + msg = nla_data(nla); + memset(msg, 0, al); + } else if (initialized) + schedule_work_on(smp_processor_id(), &data->dm_alert_work); + + /* + * Don't need to lock this, since we are guaranteed to only + * run this on a single cpu at a time. + * Note also that we only update data->skb if the old and new skb + * pointers don't match. This ensures that we don't continually call + * synchornize_rcu if we repeatedly fail to alloc a new netlink message. + */ + if (skb != oskb) { + rcu_assign_pointer(data->skb, skb); + + synchronize_rcu(); + + atomic_set(&data->dm_hit_count, dm_hit_limit); + } + } static void send_dm_alert(struct work_struct *unused) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); /* * Grab the skb we're about to send */ - skb = data->skb; + skb = rcu_dereference_protected(data->skb, 1); /* * Replace it with a new one @@ -111,8 +134,10 @@ static void send_dm_alert(struct work_struct *unused) /* * Ship it! */ - genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + if (skb) + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + put_cpu_var(dm_cpu_data); } /* @@ -123,9 +148,11 @@ static void send_dm_alert(struct work_struct *unused) */ static void sched_send_work(unsigned long unused) { - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + + schedule_work_on(smp_processor_id(), &data->dm_alert_work); - schedule_work(&data->dm_alert_work); + put_cpu_var(dm_cpu_data); } static void trace_drop_common(struct sk_buff *skb, void *location) @@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlmsghdr *nlh; struct nlattr *nla; int i; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct sk_buff *dskb; + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + rcu_read_lock(); + dskb = rcu_dereference(data->skb); + + if (!dskb) + goto out; + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { /* * we're already at zero, discard this hit @@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) goto out; } - nlh = (struct nlmsghdr *)data->skb->data; + nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { @@ -158,7 +192,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) /* * We need to create a new entry */ - __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); msg->points[msg->entries].count = 1; @@ -170,6 +204,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) } out: + rcu_read_unlock(); + put_cpu_var(dm_cpu_data); return; } @@ -375,6 +411,8 @@ static int __init init_net_drop_monitor(void) data->send_timer.function = sched_send_work; } + initialized = 1; + goto out; out_unreg: -- cgit v1.2.2 From 4b984cd50bc1b6d492175cd77bfabb78e76ffa67 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 26 Apr 2012 09:45:34 +0200 Subject: ipvs: null check of net->ipvs in lblc(r) shedulers Avoid crash when registering shedulers after the IPVS core initialization for netns fails. Do this by checking for present core (net->ipvs). Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 3 +++ net/netfilter/ipvs/ip_vs_lblcr.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 0f16283fd058..caa43704e55e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; + if (!net_eq(net, &init_net)) { ipvs->lblc_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index eec797f8cce7..548bf37aa29e 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; + if (!net_eq(net, &init_net)) { ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), -- cgit v1.2.2 From 582b8e3eadaec77788c1aa188081a8d5059c42a6 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 26 Apr 2012 09:45:35 +0200 Subject: ipvs: take care of return value from protocol init_netns ip_vs_create_timeout_table() can return NULL All functions protocol init_netns is affected of this patch. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto.c | 11 +++++++++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 ++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 ++++- net/netfilter/ipvs/ip_vs_proto_udp.c | 5 ++++- 4 files changed, 21 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index a62360e29037..ed835e67a07e 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -78,8 +78,15 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) ipvs->proto_data_table[hash] = pd; atomic_set(&pd->appcnt, 0); /* Init app counter */ - if (pp->init_netns != NULL) - pp->init_netns(net, pd); + if (pp->init_netns != NULL) { + int ret = pp->init_netns(net, pd); + if (ret) { + /* unlink an free proto data */ + ipvs->proto_data_table[hash] = pd->next; + kfree(pd); + return ret; + } + } return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1fbf7a2816f5..9f3fb751c491 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1090,7 +1090,7 @@ out: * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1098,6 +1098,9 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index ef8641f7af83..cd609cc62721 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -677,7 +677,7 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -685,7 +685,10 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; pd->tcp_state_table = tcp_states; + return 0; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index f4b7262896bb..2fedb2dcb3d1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -467,7 +467,7 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; } -static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -475,6 +475,9 @@ static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) -- cgit v1.2.2 From 8537de8a7ab6681cc72fb0411ab1ba7fdba62dd0 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 26 Apr 2012 07:47:44 +0200 Subject: ipvs: kernel oops - do_ip_vs_get_ctl Change order of init so netns init is ready when register ioctl and netlink. Ver2 Whitespace fixes and __init added. Reported-by: "Ryan O'Hara" Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 9 +++++++ net/netfilter/ipvs/ip_vs_ctl.c | 52 ++++++++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 260b9ef88775..00bdb1d9d690 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1995,10 +1995,18 @@ static int __init ip_vs_init(void) goto cleanup_dev; } + ret = ip_vs_register_nl_ioctl(); + if (ret < 0) { + pr_err("can't register netlink/ioctl.\n"); + goto cleanup_hooks; + } + pr_info("ipvs loaded.\n"); return ret; +cleanup_hooks: + nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: @@ -2014,6 +2022,7 @@ exit: static void __exit ip_vs_cleanup(void) { + ip_vs_unregister_nl_ioctl(); nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 376d2b12d581..f5589987fc80 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3750,21 +3750,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -int __init ip_vs_control_init(void) +int __init ip_vs_register_nl_ioctl(void) { - int idx; int ret; - EnterFunction(2); - - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - - smp_wmb(); /* Do we really need it now ? */ - ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); @@ -3776,28 +3765,47 @@ int __init ip_vs_control_init(void) pr_err("cannot register Generic Netlink interface.\n"); goto err_genl; } - - ret = register_netdevice_notifier(&ip_vs_dst_notifier); - if (ret < 0) - goto err_notf; - - LeaveFunction(2); return 0; -err_notf: - ip_vs_genl_unregister(); err_genl: nf_unregister_sockopt(&ip_vs_sockopts); err_sock: return ret; } +void ip_vs_unregister_nl_ioctl(void) +{ + ip_vs_genl_unregister(); + nf_unregister_sockopt(&ip_vs_sockopts); +} + +int __init ip_vs_control_init(void) +{ + int idx; + int ret; + + EnterFunction(2); + + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + INIT_LIST_HEAD(&ip_vs_svc_table[idx]); + INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + } + + smp_wmb(); /* Do we really need it now ? */ + + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + return ret; + + LeaveFunction(2); + return 0; +} + void ip_vs_control_cleanup(void) { EnterFunction(2); unregister_netdevice_notifier(&ip_vs_dst_notifier); - ip_vs_genl_unregister(); - nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } -- cgit v1.2.2 From 6cf51852486af3d79f57bf46d00209a14244dbaa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 Apr 2012 02:00:50 +0200 Subject: netfilter: xt_CT: fix wrong checking in the timeout assignment path The current checking always succeeded. We have to check the first character of the string to check that it's empty, thus, skipping the timeout path. This fixes the use of the CT target without the timeout option. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 59530e93fa58..3746d8b9a478 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -227,7 +227,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT - if (info->timeout) { + if (info->timeout[0]) { typeof(nf_ct_timeout_find_get_hook) timeout_find_get; struct nf_conn_timeout *timeout_ext; -- cgit v1.2.2 From cbbb34498f8b2b26cbdc79532c8a2ee5cd1e756a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 11:52:40 -0400 Subject: SUNRPC: RPC client must use the current utsname hostname string Now that the rpc client is namespace aware, it needs to use the utsname of the process that created it instead of using the init_utsname. Both rpc_new_client and rpc_clone_client need to be fixed. Signed-off-by: Trond Myklebust Cc: Stanislav Kinsbursky --- net/sunrpc/clnt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d127bd747527..adf2990acebf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -279,6 +279,14 @@ void rpc_clients_notifier_unregister(void) return rpc_pipefs_notifier_unregister(&rpc_clients_block); } +static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) +{ + clnt->cl_nodelen = strlen(nodename); + if (clnt->cl_nodelen > UNX_MAXNODENAME) + clnt->cl_nodelen = UNX_MAXNODENAME; + memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { const struct rpc_program *program = args->program; @@ -359,10 +367,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru } /* save the nodename */ - clnt->cl_nodelen = strlen(init_utsname()->nodename); - if (clnt->cl_nodelen > UNX_MAXNODENAME) - clnt->cl_nodelen = UNX_MAXNODENAME; - memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen); + rpc_clnt_set_nodename(clnt, utsname()->nodename); rpc_register_client(clnt); return clnt; @@ -521,6 +526,7 @@ rpc_clone_client(struct rpc_clnt *clnt) err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; + rpc_clnt_set_nodename(new, utsname()->nodename); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); atomic_inc(&clnt->cl_count); -- cgit v1.2.2 From 1cebce36d660c83bd1353e41f3e66abd4686f215 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 30 Apr 2012 06:00:18 +0000 Subject: tcp: fix infinite cwnd in tcp_complete_cwr() When the cwnd reduction is done, ssthresh may be infinite if TCP enters CWR via ECN or F-RTO. If cwnd is not undone, i.e., undo_marker is set, tcp_complete_cwr() falsely set cwnd to the infinite ssthresh value. The correct operation is to keep cwnd intact because it has been updated in ECN or F-RTO. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2a702e3a4ae6..d99efd7dfb6c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2868,11 +2868,14 @@ static inline void tcp_complete_cwr(struct sock *sk) /* Do not moderate cwnd if it's already undone in cwr or recovery. */ if (tp->undo_marker) { - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - else /* PRR */ + tp->snd_cwnd_stamp = tcp_time_stamp; + } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { + /* PRR algorithm. */ tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_time_stamp; + } } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } -- cgit v1.2.2 From 66f2c99af3d6f2d0aa1120884cf1c60613ef61c0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 29 Apr 2012 15:44:16 +0200 Subject: mac80211: fix AP mode EAP tx for VLAN stations EAP frames for stations in an AP VLAN are sent on the main AP interface to avoid race conditions wrt. moving stations. For that to work properly, sta_info_get_bss must be used instead of sta_info_get when sending EAP packets. Previously this was only done for cooked monitor injected packets, so this patch adds a check for tx->skb->protocol to the same place. Signed-off-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 782a60198df4..e76facc69e95 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1158,7 +1158,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) return TX_DROP; - } else if (info->flags & IEEE80211_TX_CTL_INJECTED) { + } else if (info->flags & IEEE80211_TX_CTL_INJECTED || + tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } if (!tx->sta) -- cgit v1.2.2 From 116a0fc31c6c9b8fc821be5a96e5bf0b43260131 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Apr 2012 09:08:22 +0000 Subject: netem: fix possible skb leak skb_checksum_help(skb) can return an error, we must free skb in this case. qdisc_drop(skb, sch) can also be feeded with a NULL skb (if skb_unshare() failed), so lets use this generic helper. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5da548fa7ae9..ebd22966f748 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -408,10 +408,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) { - sch->qstats.drops++; - return NET_XMIT_DROP; - } + skb_checksum_help(skb))) + return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } -- cgit v1.2.2 From 4fdcfa12843bca38d0c9deff70c8720e4e8f515f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 1 May 2012 08:18:02 +0000 Subject: drop_monitor: prevent init path from scheduling on the wrong cpu I just noticed after some recent updates, that the init path for the drop monitor protocol has a minor error. drop monitor maintains a per cpu structure, that gets initalized from a single cpu. Normally this is fine, as the protocol isn't in use yet, but I recently made a change that causes a failed skb allocation to reschedule itself . Given the current code, the implication is that this workqueue reschedule will take place on the wrong cpu. If drop monitor is used early during the boot process, its possible that two cpus will access a single per-cpu structure in parallel, possibly leading to data corruption. This patch fixes the situation, by storing the cpu number that a given instance of this per-cpu data should be accessed from. In the case of a need for a reschedule, the cpu stored in the struct is assigned the rescheule, rather than the currently executing cpu Tested successfully by myself. Signed-off-by: Neil Horman CC: David Miller Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 7592943513e3..a7cad741df01 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -49,6 +49,7 @@ struct per_cpu_dm_data { struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; + int cpu; }; struct dm_hw_stat_delta { @@ -73,7 +74,6 @@ static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); -static int initialized = 0; static void reset_per_cpu_data(struct per_cpu_dm_data *data) { @@ -96,8 +96,8 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) sizeof(struct net_dm_alert_msg)); msg = nla_data(nla); memset(msg, 0, al); - } else if (initialized) - schedule_work_on(smp_processor_id(), &data->dm_alert_work); + } else + schedule_work_on(data->cpu, &data->dm_alert_work); /* * Don't need to lock this, since we are guaranteed to only @@ -121,6 +121,8 @@ static void send_dm_alert(struct work_struct *unused) struct sk_buff *skb; struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + WARN_ON_ONCE(data->cpu != smp_processor_id()); + /* * Grab the skb we're about to send */ @@ -404,14 +406,14 @@ static int __init init_net_drop_monitor(void) for_each_present_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - reset_per_cpu_data(data); + data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); data->send_timer.data = cpu; data->send_timer.function = sched_send_work; + reset_per_cpu_data(data); } - initialized = 1; goto out; -- cgit v1.2.2 From 84768edbb2721637620b2d84501bb0d5aed603f1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 2 May 2012 03:58:43 +0000 Subject: net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg l2tp_ip_sendmsg could return without releasing socket lock, making it all the way to userspace, and generating the following warning: [ 130.891594] ================================================ [ 130.894569] [ BUG: lock held when returning to user space! ] [ 130.897257] 3.4.0-rc5-next-20120501-sasha #104 Tainted: G W [ 130.900336] ------------------------------------------------ [ 130.902996] trinity/8384 is leaving the kernel with locks still held! [ 130.906106] 1 lock held by trinity/8384: [ 130.907924] #0: (sk_lock-AF_INET){+.+.+.}, at: [] l2tp_ip_sendmsg+0x2f/0x550 Introduced by commit 2f16270 ("l2tp: Fix locking in l2tp_ip.c"). Signed-off-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 585d93ecee2d..6274f0be82b0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -442,8 +442,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m daddr = lip->l2tp_addr.s_addr; } else { + rc = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; + goto out; daddr = inet->inet_daddr; connected = 1; -- cgit v1.2.2 From b49960a05e32121d29316cfdf653894b88ac9190 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 May 2012 02:28:41 +0000 Subject: tcp: change tcp_adv_win_scale and tcp_rmem[2] tcp_adv_win_scale default value is 2, meaning we expect a good citizen skb to have skb->len / skb->truesize ratio of 75% (3/4) In 2.6 kernels we (mis)accounted for typical MSS=1460 frame : 1536 + 64 + 256 = 1856 'estimated truesize', and 1856 * 3/4 = 1392. So these skbs were considered as not bloated. With recent truesize fixes, a typical MSS=1460 frame truesize is now the more precise : 2048 + 256 = 2304. But 2304 * 3/4 = 1728. So these skb are not good citizen anymore, because 1460 < 1728 (GRO can escape this problem because it build skbs with a too low truesize.) This also means tcp advertises a too optimistic window for a given allocated rcvspace : When receiving frames, sk_rmem_alloc can hit sk_rcvbuf limit and we call tcp_prune_queue()/tcp_collapse() too often, especially when application is slow to drain its receive queue or in case of losses (netperf is fast, scp is slow). This is a major latency source. We should adjust the len/truesize ratio to 50% instead of 75% This patch : 1) changes tcp_adv_win_scale default to 1 instead of 2 2) increase tcp_rmem[2] limit from 4MB to 6MB to take into account better truesize tracking and to allow autotuning tcp receive window to reach same value than before. Note that same amount of kernel memory is consumed compared to 2.6 kernels. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 +++++---- net/ipv4/tcp_input.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8bb6adeb62c0..1272a88c2a63 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3243,7 +3243,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long limit; - int max_share, cnt; + int max_rshare, max_wshare, cnt; unsigned int i; unsigned long jiffy = jiffies; @@ -3303,15 +3303,16 @@ void __init tcp_init(void) tcp_init_mem(&init_net); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_share = min(4UL*1024*1024, limit); + max_wshare = min(4UL*1024*1024, limit); + max_rshare = min(6UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_share); + sysctl_tcp_wmem[2] = max(64*1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_share); + sysctl_tcp_rmem[2] = max(87380, max_rshare); pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d99efd7dfb6c..257b61789eeb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,7 @@ int sysctl_tcp_ecn __read_mostly = 2; EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; -int sysctl_tcp_adv_win_scale __read_mostly = 2; +int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); int sysctl_tcp_stdurg __read_mostly; -- cgit v1.2.2 From 7bdf7415a6b8ec31f86b3ad3eaa241257ecb7c4c Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Thu, 3 May 2012 11:47:08 -0400 Subject: auth_gss: the list of pseudoflavors not being parsed correctly gss_mech_list_pseudoflavors() parses a list of registered mechanisms. On that list contains a list of pseudo flavors which was not being parsed correctly, causing only the first pseudo flavor to be found. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_mech_switch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index ca8cad8251c7..782bfe1b6465 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -242,12 +242,13 @@ EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) { struct gss_api_mech *pos = NULL; - int i = 0; + int j, i = 0; spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { - array_ptr[i] = pos->gm_pfs->pseudoflavor; - i++; + for (j=0; j < pos->gm_pf_num; j++) { + array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; + } } spin_unlock(®istered_mechs_lock); return i; -- cgit v1.2.2 From 4cb6e116bb97c8b87a1f4f95e99d0c8dda2a6e9b Mon Sep 17 00:00:00 2001 From: Ansis Atteka Date: Thu, 3 May 2012 18:40:38 -0700 Subject: openvswitch: Release rtnl_lock if ovs_vport_cmd_build_info() failed. This patch fixes a possible lock-up bug where rtnl_lock might not get released. Signed-off-by: Ansis Atteka Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e44e631ea952..4cb615d46363 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1641,10 +1641,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { - err = PTR_ERR(reply); netlink_set_err(init_net.genl_sock, 0, - ovs_dp_vport_multicast_group.id, err); - return 0; + ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); + goto exit_unlock; } genl_notify(reply, genl_info_net(info), info->snd_pid, -- cgit v1.2.2 From 072ae6314a191e3a9fc309b1e4e539ac7abc48ad Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 7 May 2012 17:21:53 -0700 Subject: openvswitch: Validation of IPv6 set port action uses IPv4 header When the kernel validates set TCP/UDP port actions, it looks at the ports in the existing flow to make sure that the L4 header exists. However, these actions always use the IPv4 version of the struct. Following patch fixes this by checking for flow ip protocol first. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4cb615d46363..777716bc80f7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -421,6 +421,19 @@ static int validate_sample(const struct nlattr *attr, return validate_actions(actions, key, depth + 1); } +static int validate_tp_port(const struct sw_flow_key *flow_key) +{ + if (flow_key->eth.type == htons(ETH_P_IP)) { + if (flow_key->ipv4.tp.src && flow_key->ipv4.tp.dst) + return 0; + } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { + if (flow_key->ipv6.tp.src && flow_key->ipv6.tp.dst) + return 0; + } + + return -EINVAL; +} + static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key) { @@ -462,18 +475,13 @@ static int validate_set(const struct nlattr *a, if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst) - return -EINVAL; - - break; + return validate_tp_port(flow_key); case OVS_KEY_ATTR_UDP: if (flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst) - return -EINVAL; - break; + return validate_tp_port(flow_key); default: return -EINVAL; -- cgit v1.2.2 From dccd9ecc374462e5d6a5b8f8110415a86c2213d8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 May 2012 22:16:32 -0400 Subject: ipv4: Do not use dead fib_info entries. Due to RCU lookups and RCU based release, fib_info objects can be found during lookup which have fi->fib_dead set. We must ignore these entries, otherwise we risk dereferencing the parts of the entry which are being torn down. Reported-by: Yevgen Pronenko Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index bce36f1a37b4..30b88d7b4bd6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1370,6 +1370,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; + if (fi->fib_dead) + continue; if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); -- cgit v1.2.2 From 59b9997baba5242997ddc7bd96b1391f5275a5a4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 May 2012 23:03:34 -0400 Subject: Revert "net: maintain namespace isolation between vlan and real device" This reverts commit 8a83a00b0735190384a348156837918271034144. It causes regressions for S390 devices, because it does an unconditional DST drop on SKBs for vlans and the QETH device needs the neighbour entry hung off the DST for certain things on transmit. Arnd can't remember exactly why he even needed this change. Conflicts: drivers/net/macvlan.c net/8021q/vlan_dev.c net/core/dev.c Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 36 +++++------------------------------- 2 files changed, 6 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9988d4abb372..9757c193c86b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -157,7 +157,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb_set_dev(skb, vlan_dev_priv(dev)->real_dev); + skb->dev = vlan_dev_priv(dev)->real_dev; len = skb->len; if (netpoll_tx_running(dev)) return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); diff --git a/net/core/dev.c b/net/core/dev.c index 9bb8f87c4cda..99e1d759f41e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1617,10 +1617,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) return NET_RX_DROP; } skb->skb_iif = 0; - skb_set_dev(skb, dev); + skb->dev = dev; + skb_dst_drop(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1869,36 +1873,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -/** - * skb_dev_set -- assign a new device to a buffer - * @skb: buffer for the new device - * @dev: network device - * - * If an skb is owned by a device already, we have to reset - * all data private to the namespace a device belongs to - * before assigning it a new device. - */ -#ifdef CONFIG_NET_NS -void skb_set_dev(struct sk_buff *skb, struct net_device *dev) -{ - skb_dst_drop(skb); - if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { - secpath_reset(skb); - nf_reset(skb); - skb_init_secmark(skb); - skb->mark = 0; - skb->priority = 0; - skb->nf_trace = 0; - skb->ipvs_property = 0; -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif - } - skb->dev = dev; -} -EXPORT_SYMBOL(skb_set_dev); -#endif /* CONFIG_NET_NS */ - static void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features = 0; -- cgit v1.2.2 From c57b54684060c8aced64a5b78ff69ff289af97b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 May 2012 13:29:51 +0000 Subject: pktgen: fix crash at module unload commit 7d3d43dab4e9 (net: In unregister_netdevice_notifier unregister the netdevices.) makes pktgen crashing at module unload. [ 296.820578] BUG: spinlock bad magic on CPU#6, rmmod/3267 [ 296.820719] lock: ffff880310c38000, .magic: ffff8803, .owner: /-1, .owner_cpu: -1 [ 296.820943] Pid: 3267, comm: rmmod Not tainted 3.4.0-rc5+ #254 [ 296.821079] Call Trace: [ 296.821211] [] spin_dump+0x8a/0x8f [ 296.821345] [] spin_bug+0x21/0x26 [ 296.821507] [] do_raw_spin_lock+0x131/0x140 [ 296.821648] [] _raw_spin_lock+0x1e/0x20 [ 296.821786] [] __pktgen_NN_threads+0x4d/0x140 [pktgen] [ 296.821928] [] pktgen_device_event+0x10d/0x1e0 [pktgen] [ 296.822073] [] unregister_netdevice_notifier+0x7f/0x100 [ 296.822216] [] pg_cleanup+0x48/0x73 [pktgen] [ 296.822357] [] sys_delete_module+0x17e/0x2a0 [ 296.822502] [] system_call_fastpath+0x16/0x1b Hold the pktgen_thread_lock while splicing pktgen_threads, and test pktgen_exiting in pktgen_device_event() to make unload faster. Signed-off-by: Eric Dumazet Cc: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/pktgen.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4d8ce93cd503..77a59980b579 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1931,7 +1931,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -3755,12 +3755,18 @@ static void __exit pg_cleanup(void) { struct pktgen_thread *t; struct list_head *q, *n; + struct list_head list; /* Stop all interfaces & threads */ pktgen_exiting = true; - list_for_each_safe(q, n, &pktgen_threads) { + mutex_lock(&pktgen_thread_lock); + list_splice(&list, &pktgen_threads); + mutex_unlock(&pktgen_thread_lock); + + list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); + list_del(&t->th_list); kthread_stop(t->tsk); kfree(t); } -- cgit v1.2.2 From e0268868ba064980488fc8c194db3d8e9fb2959c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 4 May 2012 05:24:54 +0000 Subject: sctp: check cached dst before using it dst_check() will take care of SA (and obsolete field), hence IPsec rekeying scenario is taken into account. Signed-off-by: Nicolas Dichtel Acked-by: Vlad Yaseivch Signed-off-by: David S. Miller --- net/sctp/output.c | 4 +--- net/sctp/transport.c | 17 ----------------- 2 files changed, 1 insertion(+), 20 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 817174eb5f41..8fc4dcd294ab 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -377,9 +377,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) */ skb_set_owner_w(nskb, sk); - /* The 'obsolete' field of dst is set to 2 when a dst is freed. */ - if (!dst || (dst->obsolete > 1)) { - dst_release(dst); + if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3889330b7b04..b026ba0c6992 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -226,23 +226,6 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -/* this is a complete rip-off from __sk_dst_check - * the cookie is always 0 since this is how it's used in the - * pmtu code - */ -static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) -{ - struct dst_entry *dst = t->dst; - - if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { - dst_release(t->dst); - t->dst = NULL; - return NULL; - } - - return dst; -} - void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst; -- cgit v1.2.2 From bda14606a3c055dbbccd998fa91eb87c4c7b2027 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 13 May 2012 10:35:40 -0700 Subject: sunrpc: fix kernel-doc warnings Fix kernel-doc warnings in sunrpc/rpc_pipe.c and sunrpc/rpcb_clnt.c: Warning(net/sunrpc/rpcb_clnt.c:428): No description found for parameter 'net' Warning(net/sunrpc/rpcb_clnt.c:567): No description found for parameter 'net' Warning(net/sunrpc/rpc_pipe.c:133): No description found for parameter 'pipe' Warning(net/sunrpc/rpc_pipe.c:133): Excess function parameter 'inode' description in 'rpc_queue_upcall' Warning(net/sunrpc/rpc_pipe.c:839): No description found for parameter 'pipe' Warning(net/sunrpc/rpc_pipe.c:839): Excess function parameter 'ops' description in 'rpc_mkpipe_dentry' Warning(net/sunrpc/rpc_pipe.c:839): Excess function parameter 'flags' description in 'rpc_mkpipe_dentry' Warning(net/sunrpc/rpc_pipe.c:949): No description found for parameter 'dentry' Warning(net/sunrpc/rpc_pipe.c:949): Excess function parameter 'clnt' description in 'rpc_remove_client_dir' Signed-off-by: Randy Dunlap Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 8 +++----- net/sunrpc/rpcb_clnt.c | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 3b62cf288031..7fb49c53ed61 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -120,7 +120,7 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); /** * rpc_queue_upcall - queue an upcall message to userspace - * @inode: inode of upcall pipe on which to queue given message + * @pipe: upcall pipe on which to queue given message * @msg: message to queue * * Call with an @inode created by rpc_mkpipe() to queue an upcall. @@ -819,9 +819,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * @parent: dentry of directory to create new "pipe" in * @name: name of pipe * @private: private data to associate with the pipe, for the caller's use - * @ops: operations defining the behavior of the pipe: upcall, downcall, - * release_pipe, open_pipe, and destroy_msg. - * @flags: rpc_pipe flags + * @pipe: &rpc_pipe containing input parameters * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to @@ -943,7 +941,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @clnt: rpc client + * @dentry: dentry for the pipe */ int rpc_remove_client_dir(struct dentry *dentry) { diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 78ac39fd9fe7..3c0653439f3d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -394,6 +394,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) /** * rpcb_register - set or unset a port registration with the local rpcbind svc + * @net: target network namespace * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to register @@ -521,6 +522,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, /** * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @net: target network namespace * @program: RPC program number of service to (un)register * @version: RPC version number of service to (un)register * @address: address family, IP address, and port to (un)register -- cgit v1.2.2 From 6b34309936ed5c85cbe5868655814065f42c2f38 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 May 2012 13:30:35 -0400 Subject: sunrpc: suppress page allocation warnings in xprt_alloc_slot() It's easily possible for these allocations to fail since we're using GFP_NOWAIT here. We don't want to spam the logs with warnings about that though. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0cbcd1ab49ab..b239e75c483f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -979,7 +979,7 @@ static void xprt_alloc_slot(struct rpc_task *task) list_del(&req->rq_list); goto out_init_req; } - req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); + req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN); if (!IS_ERR(req)) goto out_init_req; switch (PTR_ERR(req)) { -- cgit v1.2.2 From 1afeaf5c29aa07db25760d2fbed5c08a3aec3498 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 May 2012 12:12:53 -0400 Subject: sunrpc: fix loss of task->tk_status after rpc_delay call in xprt_alloc_slot xprt_alloc_slot will call rpc_delay() to make the task wait a bit before retrying when it gets back an -ENOMEM error from xprt_dynamic_alloc_slot. The problem is that rpc_delay will clear the task->tk_status, causing call_reserveresult to abort the task. The solution is simply to let call_reserveresult handle the ENOMEM error directly. Reported-by: Jeff Layton Cc: stable@vger.kernel.org [>= 3.1] Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ net/sunrpc/xprt.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index adf2990acebf..25302c802460 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1288,6 +1288,8 @@ call_reserveresult(struct rpc_task *task) } switch (status) { + case -ENOMEM: + rpc_delay(task, HZ >> 2); case -EAGAIN: /* woken up; retry */ task->tk_action = call_reserve; return; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b239e75c483f..d7ccd7923eab 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -984,15 +984,16 @@ static void xprt_alloc_slot(struct rpc_task *task) goto out_init_req; switch (PTR_ERR(req)) { case -ENOMEM: - rpc_delay(task, HZ >> 2); dprintk("RPC: dynamic allocation of request slot " "failed! Retrying\n"); + task->tk_status = -ENOMEM; break; case -EAGAIN: rpc_sleep_on(&xprt->backlog, task, NULL); dprintk("RPC: waiting for request slot\n"); + default: + task->tk_status = -EAGAIN; } - task->tk_status = -EAGAIN; return; out_init_req: task->tk_status = 0; -- cgit v1.2.2