aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-31 20:10:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-31 20:10:56 -0400
commit7c764cec3703583247c4ab837c652975a3d41f4b (patch)
treebd7149e43ee1d3465f7f3db5ed1df53c06eae62c /net
parentacea568fa9eaeffbf949d15b2f7c9c346e16aae3 (diff)
parentea111545843e657504193e4f726e1116b96b8141 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Must teardown SR-IOV before unregistering netdev in igb driver, from Alex Williamson. 2) Fix ipv6 route unreachable crash in IPVS, from Alex Gartrell. 3) Default route selection in ipv4 should take the prefix length, table ID, and TOS into account, from Julian Anastasov. 4) sch_plug must have a reset method in order to purge all buffered packets when the qdisc is reset, likewise for sch_choke, from WANG Cong. 5) Fix deadlock and races in slave_changelink/br_setport in bridging. From Nikolay Aleksandrov. 6) mlx4 bug fixes (wrong index in port even propagation to VFs, overzealous BUG_ON assertion, etc.) from Ido Shamay, Jack Morgenstein, and Or Gerlitz. 7) Turn off klog message about SCTP userspace interface compat that makes no sense at all, from Daniel Borkmann. 8) Fix unbounded restarts of inet frag eviction process, causing NMI watchdog soft lockup messages, from Florian Westphal. 9) Suspend/resume fixes for r8152 from Hayes Wang. 10) Fix busy loop when MSG_WAITALL|MSG_PEEK is used in TCP recv, from Sabrina Dubroca. 11) Fix performance regression when removing a lot of routes from the ipv4 routing tables, from Alexander Duyck. 12) Fix device leak in AF_PACKET, from Lars Westerhoff. 13) AF_PACKET also has a header length comparison bug due to signedness, from Alexander Drozdov. 14) Fix bug in EBPF tail call generation on x86, from Daniel Borkmann. 15) Memory leaks, TSO stats, watchdog timeout and other fixes to thunderx driver from Sunil Goutham and Thanneeru Srinivasulu. 16) act_bpf can leak memory when replacing programs, from Daniel Borkmann. 17) WOL packet fixes in gianfar driver, from Claudiu Manoil. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (79 commits) stmmac: fix missing MODULE_LICENSE in stmmac_platform gianfar: Enable device wakeup when appropriate gianfar: Fix suspend/resume for wol magic packet gianfar: Fix warning when CONFIG_PM off act_pedit: check binding before calling tcf_hash_release() net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket net: sched: fix refcount imbalance in actions r8152: reset device when tx timeout r8152: add pre_reset and post_reset qlcnic: Fix corruption while copying act_bpf: fix memory leaks when replacing bpf programs net: thunderx: Fix for crash while BGX teardown net: thunderx: Add PCI driver shutdown routine net: thunderx: Fix crash when changing rss with mutliple traffic flows net: thunderx: Set watchdog timeout value net: thunderx: Wakeup TXQ only if CQE_TX are processed net: thunderx: Suppress alloc_pages() failure warnings net: thunderx: Fix TSO packet statistic net: thunderx: Fix memory leak when changing queue count net: thunderx: Fix RQ_DROP miscalculation ...
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/smp.c4
-rw-r--r--net/bridge/br_forward.c29
-rw-r--r--net/bridge/br_mdb.c1
-rw-r--r--net/bridge/br_multicast.c50
-rw-r--r--net/bridge/br_netlink.c10
-rw-r--r--net/bridge/br_stp.c5
-rw-r--r--net/bridge/br_stp_if.c13
-rw-r--r--net/bridge/br_stp_timer.c4
-rw-r--r--net/core/netclassid_cgroup.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/ieee802154/6lowpan/reassembly.c6
-rw-r--r--net/ipv4/arp.c16
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_semantics.c41
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/inet_fragment.c40
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c6
-rw-r--r--net/ipv6/reassembly.c8
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c78
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c41
-rw-r--r--net/netfilter/nf_conntrack_core.c67
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_synproxy_core.c7
-rw-r--r--net/netfilter/xt_CT.c8
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/sched/act_api.c11
-rw-r--r--net/sched/act_bpf.c53
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/sch_choke.c13
-rw-r--r--net/sched/sch_plug.c1
-rw-r--r--net/sctp/socket.c6
43 files changed, 402 insertions, 228 deletions
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 3d0f7d2a0616..ad82324f710f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2312,6 +2312,10 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
2312 return 1; 2312 return 1;
2313 2313
2314 chan = conn->smp; 2314 chan = conn->smp;
2315 if (!chan) {
2316 BT_ERR("SMP security requested but not available");
2317 return 1;
2318 }
2315 2319
2316 if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) 2320 if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED))
2317 return 1; 2321 return 1;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 0ff6e1bbca91..fa7bfced888e 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -37,15 +37,30 @@ static inline int should_deliver(const struct net_bridge_port *p,
37 37
38int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) 38int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
39{ 39{
40 if (!is_skb_forwardable(skb->dev, skb)) { 40 if (!is_skb_forwardable(skb->dev, skb))
41 kfree_skb(skb); 41 goto drop;
42 } else { 42
43 skb_push(skb, ETH_HLEN); 43 skb_push(skb, ETH_HLEN);
44 br_drop_fake_rtable(skb); 44 br_drop_fake_rtable(skb);
45 skb_sender_cpu_clear(skb); 45 skb_sender_cpu_clear(skb);
46 dev_queue_xmit(skb); 46
47 if (skb->ip_summed == CHECKSUM_PARTIAL &&
48 (skb->protocol == htons(ETH_P_8021Q) ||
49 skb->protocol == htons(ETH_P_8021AD))) {
50 int depth;
51
52 if (!__vlan_get_protocol(skb, skb->protocol, &depth))
53 goto drop;
54
55 skb_set_network_header(skb, depth);
47 } 56 }
48 57
58 dev_queue_xmit(skb);
59
60 return 0;
61
62drop:
63 kfree_skb(skb);
49 return 0; 64 return 0;
50} 65}
51EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); 66EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 1198a3dbad95..c94321955db7 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -445,6 +445,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
445 if (p->port->state == BR_STATE_DISABLED) 445 if (p->port->state == BR_STATE_DISABLED)
446 goto unlock; 446 goto unlock;
447 447
448 entry->state = p->state;
448 rcu_assign_pointer(*pp, p->next); 449 rcu_assign_pointer(*pp, p->next);
449 hlist_del_init(&p->mglist); 450 hlist_del_init(&p->mglist);
450 del_timer(&p->timer); 451 del_timer(&p->timer);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 79db489cdade..0b39dcc65b94 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1416,8 +1416,7 @@ br_multicast_leave_group(struct net_bridge *br,
1416 1416
1417 spin_lock(&br->multicast_lock); 1417 spin_lock(&br->multicast_lock);
1418 if (!netif_running(br->dev) || 1418 if (!netif_running(br->dev) ||
1419 (port && port->state == BR_STATE_DISABLED) || 1419 (port && port->state == BR_STATE_DISABLED))
1420 timer_pending(&other_query->timer))
1421 goto out; 1420 goto out;
1422 1421
1423 mdb = mlock_dereference(br->mdb, br); 1422 mdb = mlock_dereference(br->mdb, br);
@@ -1425,6 +1424,31 @@ br_multicast_leave_group(struct net_bridge *br,
1425 if (!mp) 1424 if (!mp)
1426 goto out; 1425 goto out;
1427 1426
1427 if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
1428 struct net_bridge_port_group __rcu **pp;
1429
1430 for (pp = &mp->ports;
1431 (p = mlock_dereference(*pp, br)) != NULL;
1432 pp = &p->next) {
1433 if (p->port != port)
1434 continue;
1435
1436 rcu_assign_pointer(*pp, p->next);
1437 hlist_del_init(&p->mglist);
1438 del_timer(&p->timer);
1439 call_rcu_bh(&p->rcu, br_multicast_free_pg);
1440 br_mdb_notify(br->dev, port, group, RTM_DELMDB);
1441
1442 if (!mp->ports && !mp->mglist &&
1443 netif_running(br->dev))
1444 mod_timer(&mp->timer, jiffies);
1445 }
1446 goto out;
1447 }
1448
1449 if (timer_pending(&other_query->timer))
1450 goto out;
1451
1428 if (br->multicast_querier) { 1452 if (br->multicast_querier) {
1429 __br_multicast_send_query(br, port, &mp->addr); 1453 __br_multicast_send_query(br, port, &mp->addr);
1430 1454
@@ -1450,28 +1474,6 @@ br_multicast_leave_group(struct net_bridge *br,
1450 } 1474 }
1451 } 1475 }
1452 1476
1453 if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
1454 struct net_bridge_port_group __rcu **pp;
1455
1456 for (pp = &mp->ports;
1457 (p = mlock_dereference(*pp, br)) != NULL;
1458 pp = &p->next) {
1459 if (p->port != port)
1460 continue;
1461
1462 rcu_assign_pointer(*pp, p->next);
1463 hlist_del_init(&p->mglist);
1464 del_timer(&p->timer);
1465 call_rcu_bh(&p->rcu, br_multicast_free_pg);
1466 br_mdb_notify(br->dev, port, group, RTM_DELMDB);
1467
1468 if (!mp->ports && !mp->mglist &&
1469 netif_running(br->dev))
1470 mod_timer(&mp->timer, jiffies);
1471 }
1472 goto out;
1473 }
1474
1475 now = jiffies; 1477 now = jiffies;
1476 time = now + br->multicast_last_member_count * 1478 time = now + br->multicast_last_member_count *
1477 br->multicast_last_member_interval; 1479 br->multicast_last_member_interval;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 364bdc98bd9b..3da5525eb8a2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -693,9 +693,17 @@ static int br_port_slave_changelink(struct net_device *brdev,
693 struct nlattr *tb[], 693 struct nlattr *tb[],
694 struct nlattr *data[]) 694 struct nlattr *data[])
695{ 695{
696 struct net_bridge *br = netdev_priv(brdev);
697 int ret;
698
696 if (!data) 699 if (!data)
697 return 0; 700 return 0;
698 return br_setport(br_port_get_rtnl(dev), data); 701
702 spin_lock_bh(&br->lock);
703 ret = br_setport(br_port_get_rtnl(dev), data);
704 spin_unlock_bh(&br->lock);
705
706 return ret;
699} 707}
700 708
701static int br_port_fill_slave_info(struct sk_buff *skb, 709static int br_port_fill_slave_info(struct sk_buff *skb,
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b4b6dab9c285..ed74ffaa851f 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -209,8 +209,9 @@ void br_transmit_config(struct net_bridge_port *p)
209 br_send_config_bpdu(p, &bpdu); 209 br_send_config_bpdu(p, &bpdu);
210 p->topology_change_ack = 0; 210 p->topology_change_ack = 0;
211 p->config_pending = 0; 211 p->config_pending = 0;
212 mod_timer(&p->hold_timer, 212 if (p->br->stp_enabled == BR_KERNEL_STP)
213 round_jiffies(jiffies + BR_HOLD_TIME)); 213 mod_timer(&p->hold_timer,
214 round_jiffies(jiffies + BR_HOLD_TIME));
214 } 215 }
215} 216}
216 217
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a2730e7196cd..4ca449a16132 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -48,7 +48,8 @@ void br_stp_enable_bridge(struct net_bridge *br)
48 struct net_bridge_port *p; 48 struct net_bridge_port *p;
49 49
50 spin_lock_bh(&br->lock); 50 spin_lock_bh(&br->lock);
51 mod_timer(&br->hello_timer, jiffies + br->hello_time); 51 if (br->stp_enabled == BR_KERNEL_STP)
52 mod_timer(&br->hello_timer, jiffies + br->hello_time);
52 mod_timer(&br->gc_timer, jiffies + HZ/10); 53 mod_timer(&br->gc_timer, jiffies + HZ/10);
53 54
54 br_config_bpdu_generation(br); 55 br_config_bpdu_generation(br);
@@ -127,6 +128,7 @@ static void br_stp_start(struct net_bridge *br)
127 int r; 128 int r;
128 char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; 129 char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
129 char *envp[] = { NULL }; 130 char *envp[] = { NULL };
131 struct net_bridge_port *p;
130 132
131 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); 133 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
132 134
@@ -140,6 +142,10 @@ static void br_stp_start(struct net_bridge *br)
140 if (r == 0) { 142 if (r == 0) {
141 br->stp_enabled = BR_USER_STP; 143 br->stp_enabled = BR_USER_STP;
142 br_debug(br, "userspace STP started\n"); 144 br_debug(br, "userspace STP started\n");
145 /* Stop hello and hold timers */
146 del_timer(&br->hello_timer);
147 list_for_each_entry(p, &br->port_list, list)
148 del_timer(&p->hold_timer);
143 } else { 149 } else {
144 br->stp_enabled = BR_KERNEL_STP; 150 br->stp_enabled = BR_KERNEL_STP;
145 br_debug(br, "using kernel STP\n"); 151 br_debug(br, "using kernel STP\n");
@@ -156,12 +162,17 @@ static void br_stp_stop(struct net_bridge *br)
156 int r; 162 int r;
157 char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; 163 char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
158 char *envp[] = { NULL }; 164 char *envp[] = { NULL };
165 struct net_bridge_port *p;
159 166
160 if (br->stp_enabled == BR_USER_STP) { 167 if (br->stp_enabled == BR_USER_STP) {
161 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); 168 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
162 br_info(br, "userspace STP stopped, return code %d\n", r); 169 br_info(br, "userspace STP stopped, return code %d\n", r);
163 170
164 /* To start timers on any ports left in blocking */ 171 /* To start timers on any ports left in blocking */
172 mod_timer(&br->hello_timer, jiffies + br->hello_time);
173 list_for_each_entry(p, &br->port_list, list)
174 mod_timer(&p->hold_timer,
175 round_jiffies(jiffies + BR_HOLD_TIME));
165 spin_lock_bh(&br->lock); 176 spin_lock_bh(&br->lock);
166 br_port_state_selection(br); 177 br_port_state_selection(br);
167 spin_unlock_bh(&br->lock); 178 spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 7caf7fae2d5b..5f0f5af0ec35 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -40,7 +40,9 @@ static void br_hello_timer_expired(unsigned long arg)
40 if (br->dev->flags & IFF_UP) { 40 if (br->dev->flags & IFF_UP) {
41 br_config_bpdu_generation(br); 41 br_config_bpdu_generation(br);
42 42
43 mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); 43 if (br->stp_enabled != BR_USER_STP)
44 mod_timer(&br->hello_timer,
45 round_jiffies(jiffies + br->hello_time));
44 } 46 }
45 spin_unlock(&br->lock); 47 spin_unlock(&br->lock);
46} 48}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 1f2a126f4ffa..6441f47b1a8f 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
23 23
24struct cgroup_cls_state *task_cls_state(struct task_struct *p) 24struct cgroup_cls_state *task_cls_state(struct task_struct *p)
25{ 25{
26 return css_cls_state(task_css(p, net_cls_cgrp_id)); 26 return css_cls_state(task_css_check(p, net_cls_cgrp_id,
27 rcu_read_lock_bh_held()));
27} 28}
28EXPORT_SYMBOL_GPL(task_cls_state); 29EXPORT_SYMBOL_GPL(task_cls_state);
29 30
diff --git a/net/core/sock.c b/net/core/sock.c
index 08f16db46070..193901d09757 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1497,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1497 sock_copy(newsk, sk); 1497 sock_copy(newsk, sk);
1498 1498
1499 /* SANITY */ 1499 /* SANITY */
1500 get_net(sock_net(newsk)); 1500 if (likely(newsk->sk_net_refcnt))
1501 get_net(sock_net(newsk));
1501 sk_node_init(&newsk->sk_node); 1502 sk_node_init(&newsk->sk_node);
1502 sock_lock_init(newsk); 1503 sock_lock_init(newsk);
1503 bh_lock_sock(newsk); 1504 bh_lock_sock(newsk);
@@ -1967,20 +1968,21 @@ static void __release_sock(struct sock *sk)
1967 * sk_wait_data - wait for data to arrive at sk_receive_queue 1968 * sk_wait_data - wait for data to arrive at sk_receive_queue
1968 * @sk: sock to wait on 1969 * @sk: sock to wait on
1969 * @timeo: for how long 1970 * @timeo: for how long
1971 * @skb: last skb seen on sk_receive_queue
1970 * 1972 *
1971 * Now socket state including sk->sk_err is changed only under lock, 1973 * Now socket state including sk->sk_err is changed only under lock,
1972 * hence we may omit checks after joining wait queue. 1974 * hence we may omit checks after joining wait queue.
1973 * We check receive queue before schedule() only as optimization; 1975 * We check receive queue before schedule() only as optimization;
1974 * it is very likely that release_sock() added new data. 1976 * it is very likely that release_sock() added new data.
1975 */ 1977 */
1976int sk_wait_data(struct sock *sk, long *timeo) 1978int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
1977{ 1979{
1978 int rc; 1980 int rc;
1979 DEFINE_WAIT(wait); 1981 DEFINE_WAIT(wait);
1980 1982
1981 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1983 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1982 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1984 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1983 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1985 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
1984 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1986 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1985 finish_wait(sk_sleep(sk), &wait); 1987 finish_wait(sk_sleep(sk), &wait);
1986 return rc; 1988 return rc;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 52a94016526d..b5cf13a28009 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -886,7 +886,7 @@ verify_sock_status:
886 break; 886 break;
887 } 887 }
888 888
889 sk_wait_data(sk, &timeo); 889 sk_wait_data(sk, &timeo, NULL);
890 continue; 890 continue;
891 found_ok_skb: 891 found_ok_skb:
892 if (len > skb->len) 892 if (len > skb->len)
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f46e4d1306f2..214d44aef35b 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -207,7 +207,7 @@ found:
207 } else { 207 } else {
208 fq->q.meat += skb->len; 208 fq->q.meat += skb->len;
209 } 209 }
210 add_frag_mem_limit(&fq->q, skb->truesize); 210 add_frag_mem_limit(fq->q.net, skb->truesize);
211 211
212 if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 212 if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
213 fq->q.meat == fq->q.len) { 213 fq->q.meat == fq->q.len) {
@@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
287 clone->data_len = clone->len; 287 clone->data_len = clone->len;
288 head->data_len -= clone->len; 288 head->data_len -= clone->len;
289 head->len -= clone->len; 289 head->len -= clone->len;
290 add_frag_mem_limit(&fq->q, clone->truesize); 290 add_frag_mem_limit(fq->q.net, clone->truesize);
291 } 291 }
292 292
293 WARN_ON(head == NULL); 293 WARN_ON(head == NULL);
@@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
310 } 310 }
311 fp = next; 311 fp = next;
312 } 312 }
313 sub_frag_mem_limit(&fq->q, sum_truesize); 313 sub_frag_mem_limit(fq->q.net, sum_truesize);
314 314
315 head->next = NULL; 315 head->next = NULL;
316 head->dev = dev; 316 head->dev = dev;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 933a92820d26..6c8b1fbafce8 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1017,14 +1017,16 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
1017 1017
1018 neigh = neigh_lookup(&arp_tbl, &ip, dev); 1018 neigh = neigh_lookup(&arp_tbl, &ip, dev);
1019 if (neigh) { 1019 if (neigh) {
1020 read_lock_bh(&neigh->lock); 1020 if (!(neigh->nud_state & NUD_NOARP)) {
1021 memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); 1021 read_lock_bh(&neigh->lock);
1022 r->arp_flags = arp_state_to_flags(neigh); 1022 memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
1023 read_unlock_bh(&neigh->lock); 1023 r->arp_flags = arp_state_to_flags(neigh);
1024 r->arp_ha.sa_family = dev->type; 1024 read_unlock_bh(&neigh->lock);
1025 strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); 1025 r->arp_ha.sa_family = dev->type;
1026 strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
1027 err = 0;
1028 }
1026 neigh_release(neigh); 1029 neigh_release(neigh);
1027 err = 0;
1028 } 1030 }
1029 return err; 1031 return err;
1030} 1032}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e813196c91c7..2d9cb1748f81 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -882,7 +882,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
882 queue_delayed_work(system_power_efficient_wq, 882 queue_delayed_work(system_power_efficient_wq,
883 &check_lifetime_work, 0); 883 &check_lifetime_work, 0);
884 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 884 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
886 } 885 }
887 return 0; 886 return 0;
888} 887}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c6211ed60b03..9c02920725db 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -13,6 +13,7 @@ struct fib_alias {
13 u8 fa_state; 13 u8 fa_state;
14 u8 fa_slen; 14 u8 fa_slen;
15 u32 tb_id; 15 u32 tb_id;
16 s16 fa_default;
16 struct rcu_head rcu; 17 struct rcu_head rcu;
17}; 18};
18 19
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c7358ea4ae93..3a06586b170c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1202,23 +1202,40 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1202} 1202}
1203 1203
1204/* Must be invoked inside of an RCU protected region. */ 1204/* Must be invoked inside of an RCU protected region. */
1205void fib_select_default(struct fib_result *res) 1205void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1206{ 1206{
1207 struct fib_info *fi = NULL, *last_resort = NULL; 1207 struct fib_info *fi = NULL, *last_resort = NULL;
1208 struct hlist_head *fa_head = res->fa_head; 1208 struct hlist_head *fa_head = res->fa_head;
1209 struct fib_table *tb = res->table; 1209 struct fib_table *tb = res->table;
1210 u8 slen = 32 - res->prefixlen;
1210 int order = -1, last_idx = -1; 1211 int order = -1, last_idx = -1;
1211 struct fib_alias *fa; 1212 struct fib_alias *fa, *fa1 = NULL;
1213 u32 last_prio = res->fi->fib_priority;
1214 u8 last_tos = 0;
1212 1215
1213 hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 1216 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1214 struct fib_info *next_fi = fa->fa_info; 1217 struct fib_info *next_fi = fa->fa_info;
1215 1218
1219 if (fa->fa_slen != slen)
1220 continue;
1221 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1222 continue;
1223 if (fa->tb_id != tb->tb_id)
1224 continue;
1225 if (next_fi->fib_priority > last_prio &&
1226 fa->fa_tos == last_tos) {
1227 if (last_tos)
1228 continue;
1229 break;
1230 }
1231 if (next_fi->fib_flags & RTNH_F_DEAD)
1232 continue;
1233 last_tos = fa->fa_tos;
1234 last_prio = next_fi->fib_priority;
1235
1216 if (next_fi->fib_scope != res->scope || 1236 if (next_fi->fib_scope != res->scope ||
1217 fa->fa_type != RTN_UNICAST) 1237 fa->fa_type != RTN_UNICAST)
1218 continue; 1238 continue;
1219
1220 if (next_fi->fib_priority > res->fi->fib_priority)
1221 break;
1222 if (!next_fi->fib_nh[0].nh_gw || 1239 if (!next_fi->fib_nh[0].nh_gw ||
1223 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 1240 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1224 continue; 1241 continue;
@@ -1228,10 +1245,11 @@ void fib_select_default(struct fib_result *res)
1228 if (!fi) { 1245 if (!fi) {
1229 if (next_fi != res->fi) 1246 if (next_fi != res->fi)
1230 break; 1247 break;
1248 fa1 = fa;
1231 } else if (!fib_detect_death(fi, order, &last_resort, 1249 } else if (!fib_detect_death(fi, order, &last_resort,
1232 &last_idx, tb->tb_default)) { 1250 &last_idx, fa1->fa_default)) {
1233 fib_result_assign(res, fi); 1251 fib_result_assign(res, fi);
1234 tb->tb_default = order; 1252 fa1->fa_default = order;
1235 goto out; 1253 goto out;
1236 } 1254 }
1237 fi = next_fi; 1255 fi = next_fi;
@@ -1239,20 +1257,21 @@ void fib_select_default(struct fib_result *res)
1239 } 1257 }
1240 1258
1241 if (order <= 0 || !fi) { 1259 if (order <= 0 || !fi) {
1242 tb->tb_default = -1; 1260 if (fa1)
1261 fa1->fa_default = -1;
1243 goto out; 1262 goto out;
1244 } 1263 }
1245 1264
1246 if (!fib_detect_death(fi, order, &last_resort, &last_idx, 1265 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1247 tb->tb_default)) { 1266 fa1->fa_default)) {
1248 fib_result_assign(res, fi); 1267 fib_result_assign(res, fi);
1249 tb->tb_default = order; 1268 fa1->fa_default = order;
1250 goto out; 1269 goto out;
1251 } 1270 }
1252 1271
1253 if (last_idx >= 0) 1272 if (last_idx >= 0)
1254 fib_result_assign(res, last_resort); 1273 fib_result_assign(res, last_resort);
1255 tb->tb_default = last_idx; 1274 fa1->fa_default = last_idx;
1256out: 1275out:
1257 return; 1276 return;
1258} 1277}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 15d32612e3c6..37c4bb89a708 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1171,6 +1171,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1171 new_fa->fa_state = state & ~FA_S_ACCESSED; 1171 new_fa->fa_state = state & ~FA_S_ACCESSED;
1172 new_fa->fa_slen = fa->fa_slen; 1172 new_fa->fa_slen = fa->fa_slen;
1173 new_fa->tb_id = tb->tb_id; 1173 new_fa->tb_id = tb->tb_id;
1174 new_fa->fa_default = -1;
1174 1175
1175 err = switchdev_fib_ipv4_add(key, plen, fi, 1176 err = switchdev_fib_ipv4_add(key, plen, fi,
1176 new_fa->fa_tos, 1177 new_fa->fa_tos,
@@ -1222,6 +1223,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1222 new_fa->fa_state = 0; 1223 new_fa->fa_state = 0;
1223 new_fa->fa_slen = slen; 1224 new_fa->fa_slen = slen;
1224 new_fa->tb_id = tb->tb_id; 1225 new_fa->tb_id = tb->tb_id;
1226 new_fa->fa_default = -1;
1225 1227
1226 /* (Optionally) offload fib entry to switch hardware. */ 1228 /* (Optionally) offload fib entry to switch hardware. */
1227 err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, 1229 err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type,
@@ -1791,8 +1793,6 @@ void fib_table_flush_external(struct fib_table *tb)
1791 if (hlist_empty(&n->leaf)) { 1793 if (hlist_empty(&n->leaf)) {
1792 put_child_root(pn, n->key, NULL); 1794 put_child_root(pn, n->key, NULL);
1793 node_free(n); 1795 node_free(n);
1794 } else {
1795 leaf_pull_suffix(pn, n);
1796 } 1796 }
1797 } 1797 }
1798} 1798}
@@ -1862,8 +1862,6 @@ int fib_table_flush(struct fib_table *tb)
1862 if (hlist_empty(&n->leaf)) { 1862 if (hlist_empty(&n->leaf)) {
1863 put_child_root(pn, n->key, NULL); 1863 put_child_root(pn, n->key, NULL);
1864 node_free(n); 1864 node_free(n);
1865 } else {
1866 leaf_pull_suffix(pn, n);
1867 } 1865 }
1868 } 1866 }
1869 1867
@@ -1990,7 +1988,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
1990 return NULL; 1988 return NULL;
1991 1989
1992 tb->tb_id = id; 1990 tb->tb_id = id;
1993 tb->tb_default = -1;
1994 tb->tb_num_default = 0; 1991 tb->tb_num_default = 0;
1995 tb->tb_data = (alias ? alias->__data : tb->__data); 1992 tb->tb_data = (alias ? alias->__data : tb->__data);
1996 1993
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5e346a082e5f..d0a7c0319e3d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
131 unsigned int evicted = 0; 131 unsigned int evicted = 0;
132 HLIST_HEAD(expired); 132 HLIST_HEAD(expired);
133 133
134evict_again:
135 spin_lock(&hb->chain_lock); 134 spin_lock(&hb->chain_lock);
136 135
137 hlist_for_each_entry_safe(fq, n, &hb->chain, list) { 136 hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
138 if (!inet_fragq_should_evict(fq)) 137 if (!inet_fragq_should_evict(fq))
139 continue; 138 continue;
140 139
141 if (!del_timer(&fq->timer)) { 140 if (!del_timer(&fq->timer))
142 /* q expiring right now thus increment its refcount so 141 continue;
143 * it won't be freed under us and wait until the timer
144 * has finished executing then destroy it
145 */
146 atomic_inc(&fq->refcnt);
147 spin_unlock(&hb->chain_lock);
148 del_timer_sync(&fq->timer);
149 inet_frag_put(fq, f);
150 goto evict_again;
151 }
152 142
153 fq->flags |= INET_FRAG_EVICTED; 143 hlist_add_head(&fq->list_evictor, &expired);
154 hlist_del(&fq->list);
155 hlist_add_head(&fq->list, &expired);
156 ++evicted; 144 ++evicted;
157 } 145 }
158 146
159 spin_unlock(&hb->chain_lock); 147 spin_unlock(&hb->chain_lock);
160 148
161 hlist_for_each_entry_safe(fq, n, &expired, list) 149 hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
162 f->frag_expire((unsigned long) fq); 150 f->frag_expire((unsigned long) fq);
163 151
164 return evicted; 152 return evicted;
@@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
240 int i; 228 int i;
241 229
242 nf->low_thresh = 0; 230 nf->low_thresh = 0;
243 local_bh_disable();
244 231
245evict_again: 232evict_again:
233 local_bh_disable();
246 seq = read_seqbegin(&f->rnd_seqlock); 234 seq = read_seqbegin(&f->rnd_seqlock);
247 235
248 for (i = 0; i < INETFRAGS_HASHSZ ; i++) 236 for (i = 0; i < INETFRAGS_HASHSZ ; i++)
249 inet_evict_bucket(f, &f->hash[i]); 237 inet_evict_bucket(f, &f->hash[i]);
250 238
251 if (read_seqretry(&f->rnd_seqlock, seq))
252 goto evict_again;
253
254 local_bh_enable(); 239 local_bh_enable();
240 cond_resched();
241
242 if (read_seqretry(&f->rnd_seqlock, seq) ||
243 percpu_counter_sum(&nf->mem))
244 goto evict_again;
255 245
256 percpu_counter_destroy(&nf->mem); 246 percpu_counter_destroy(&nf->mem);
257} 247}
@@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
284 struct inet_frag_bucket *hb; 274 struct inet_frag_bucket *hb;
285 275
286 hb = get_frag_bucket_locked(fq, f); 276 hb = get_frag_bucket_locked(fq, f);
287 if (!(fq->flags & INET_FRAG_EVICTED)) 277 hlist_del(&fq->list);
288 hlist_del(&fq->list); 278 fq->flags |= INET_FRAG_COMPLETE;
289 spin_unlock(&hb->chain_lock); 279 spin_unlock(&hb->chain_lock);
290} 280}
291 281
@@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
297 if (!(fq->flags & INET_FRAG_COMPLETE)) { 287 if (!(fq->flags & INET_FRAG_COMPLETE)) {
298 fq_unlink(fq, f); 288 fq_unlink(fq, f);
299 atomic_dec(&fq->refcnt); 289 atomic_dec(&fq->refcnt);
300 fq->flags |= INET_FRAG_COMPLETE;
301 } 290 }
302} 291}
303EXPORT_SYMBOL(inet_frag_kill); 292EXPORT_SYMBOL(inet_frag_kill);
@@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
330 fp = xp; 319 fp = xp;
331 } 320 }
332 sum = sum_truesize + f->qsize; 321 sum = sum_truesize + f->qsize;
333 sub_frag_mem_limit(q, sum);
334 322
335 if (f->destructor) 323 if (f->destructor)
336 f->destructor(q); 324 f->destructor(q);
337 kmem_cache_free(f->frags_cachep, q); 325 kmem_cache_free(f->frags_cachep, q);
326
327 sub_frag_mem_limit(nf, sum);
338} 328}
339EXPORT_SYMBOL(inet_frag_destroy); 329EXPORT_SYMBOL(inet_frag_destroy);
340 330
@@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
390 380
391 q->net = nf; 381 q->net = nf;
392 f->constructor(q, arg); 382 f->constructor(q, arg);
393 add_frag_mem_limit(q, f->qsize); 383 add_frag_mem_limit(nf, f->qsize);
394 384
395 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 385 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
396 spin_lock_init(&q->lock); 386 spin_lock_init(&q->lock);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 31f71b15cfba..921138f6c97c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg)
202 ipq_kill(qp); 202 ipq_kill(qp);
203 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 203 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
204 204
205 if (!(qp->q.flags & INET_FRAG_EVICTED)) { 205 if (!inet_frag_evicting(&qp->q)) {
206 struct sk_buff *head = qp->q.fragments; 206 struct sk_buff *head = qp->q.fragments;
207 const struct iphdr *iph; 207 const struct iphdr *iph;
208 int err; 208 int err;
@@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp)
309 kfree_skb(fp); 309 kfree_skb(fp);
310 fp = xp; 310 fp = xp;
311 } while (fp); 311 } while (fp);
312 sub_frag_mem_limit(&qp->q, sum_truesize); 312 sub_frag_mem_limit(qp->q.net, sum_truesize);
313 313
314 qp->q.flags = 0; 314 qp->q.flags = 0;
315 qp->q.len = 0; 315 qp->q.len = 0;
@@ -455,7 +455,7 @@ found:
455 qp->q.fragments = next; 455 qp->q.fragments = next;
456 456
457 qp->q.meat -= free_it->len; 457 qp->q.meat -= free_it->len;
458 sub_frag_mem_limit(&qp->q, free_it->truesize); 458 sub_frag_mem_limit(qp->q.net, free_it->truesize);
459 kfree_skb(free_it); 459 kfree_skb(free_it);
460 } 460 }
461 } 461 }
@@ -479,7 +479,7 @@ found:
479 qp->q.stamp = skb->tstamp; 479 qp->q.stamp = skb->tstamp;
480 qp->q.meat += skb->len; 480 qp->q.meat += skb->len;
481 qp->ecn |= ecn; 481 qp->ecn |= ecn;
482 add_frag_mem_limit(&qp->q, skb->truesize); 482 add_frag_mem_limit(qp->q.net, skb->truesize);
483 if (offset == 0) 483 if (offset == 0)
484 qp->q.flags |= INET_FRAG_FIRST_IN; 484 qp->q.flags |= INET_FRAG_FIRST_IN;
485 485
@@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
587 head->len -= clone->len; 587 head->len -= clone->len;
588 clone->csum = 0; 588 clone->csum = 0;
589 clone->ip_summed = head->ip_summed; 589 clone->ip_summed = head->ip_summed;
590 add_frag_mem_limit(&qp->q, clone->truesize); 590 add_frag_mem_limit(qp->q.net, clone->truesize);
591 } 591 }
592 592
593 skb_push(head, head->data - skb_network_header(head)); 593 skb_push(head, head->data - skb_network_header(head));
@@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
615 } 615 }
616 fp = next; 616 fp = next;
617 } 617 }
618 sub_frag_mem_limit(&qp->q, sum_truesize); 618 sub_frag_mem_limit(qp->q.net, sum_truesize);
619 619
620 head->next = NULL; 620 head->next = NULL;
621 head->dev = dev; 621 head->dev = dev;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d0362a2de3d3..e681b852ced1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2176,7 +2176,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2176 if (!res.prefixlen && 2176 if (!res.prefixlen &&
2177 res.table->tb_num_default > 1 && 2177 res.table->tb_num_default > 1 &&
2178 res.type == RTN_UNICAST && !fl4->flowi4_oif) 2178 res.type == RTN_UNICAST && !fl4->flowi4_oif)
2179 fib_select_default(&res); 2179 fib_select_default(fl4, &res);
2180 2180
2181 if (!fl4->saddr) 2181 if (!fl4->saddr)
2182 fl4->saddr = FIB_RES_PREFSRC(net, res); 2182 fl4->saddr = FIB_RES_PREFSRC(net, res);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7f4056785acc..45534a5ab430 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -780,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
780 ret = -EAGAIN; 780 ret = -EAGAIN;
781 break; 781 break;
782 } 782 }
783 sk_wait_data(sk, &timeo); 783 sk_wait_data(sk, &timeo, NULL);
784 if (signal_pending(current)) { 784 if (signal_pending(current)) {
785 ret = sock_intr_errno(timeo); 785 ret = sock_intr_errno(timeo);
786 break; 786 break;
@@ -1575,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1575 int target; /* Read at least this many bytes */ 1575 int target; /* Read at least this many bytes */
1576 long timeo; 1576 long timeo;
1577 struct task_struct *user_recv = NULL; 1577 struct task_struct *user_recv = NULL;
1578 struct sk_buff *skb; 1578 struct sk_buff *skb, *last;
1579 u32 urg_hole = 0; 1579 u32 urg_hole = 0;
1580 1580
1581 if (unlikely(flags & MSG_ERRQUEUE)) 1581 if (unlikely(flags & MSG_ERRQUEUE))
@@ -1635,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1635 1635
1636 /* Next get a buffer. */ 1636 /* Next get a buffer. */
1637 1637
1638 last = skb_peek_tail(&sk->sk_receive_queue);
1638 skb_queue_walk(&sk->sk_receive_queue, skb) { 1639 skb_queue_walk(&sk->sk_receive_queue, skb) {
1640 last = skb;
1639 /* Now that we have two receive queues this 1641 /* Now that we have two receive queues this
1640 * shouldn't happen. 1642 * shouldn't happen.
1641 */ 1643 */
@@ -1754,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1754 /* Do not sleep, just process backlog. */ 1756 /* Do not sleep, just process backlog. */
1755 release_sock(sk); 1757 release_sock(sk);
1756 lock_sock(sk); 1758 lock_sock(sk);
1757 } else 1759 } else {
1758 sk_wait_data(sk, &timeo); 1760 sk_wait_data(sk, &timeo, last);
1761 }
1759 1762
1760 if (user_recv) { 1763 if (user_recv) {
1761 int chunk; 1764 int chunk;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0a05b35a90fc..c53331cfed95 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1650,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb)
1650static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1650static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1651{ 1651{
1652 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1652 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1653 struct netdev_notifier_change_info *change_info;
1653 struct net *net = dev_net(dev); 1654 struct net *net = dev_net(dev);
1654 struct inet6_dev *idev; 1655 struct inet6_dev *idev;
1655 1656
@@ -1664,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
1664 ndisc_send_unsol_na(dev); 1665 ndisc_send_unsol_na(dev);
1665 in6_dev_put(idev); 1666 in6_dev_put(idev);
1666 break; 1667 break;
1668 case NETDEV_CHANGE:
1669 change_info = ptr;
1670 if (change_info->flags_changed & IFF_NOARP)
1671 neigh_changeaddr(&nd_tbl, dev);
1672 break;
1667 case NETDEV_DOWN: 1673 case NETDEV_DOWN:
1668 neigh_ifdown(&nd_tbl, dev); 1674 neigh_ifdown(&nd_tbl, dev);
1669 fib6_run_gc(0, net, false); 1675 fib6_run_gc(0, net, false);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6f187c8d8a1b..6d02498172c1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -348,7 +348,7 @@ found:
348 fq->ecn |= ecn; 348 fq->ecn |= ecn;
349 if (payload_len > fq->q.max_size) 349 if (payload_len > fq->q.max_size)
350 fq->q.max_size = payload_len; 350 fq->q.max_size = payload_len;
351 add_frag_mem_limit(&fq->q, skb->truesize); 351 add_frag_mem_limit(fq->q.net, skb->truesize);
352 352
353 /* The first fragment. 353 /* The first fragment.
354 * nhoffset is obtained from the first fragment, of course. 354 * nhoffset is obtained from the first fragment, of course.
@@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
430 clone->ip_summed = head->ip_summed; 430 clone->ip_summed = head->ip_summed;
431 431
432 NFCT_FRAG6_CB(clone)->orig = NULL; 432 NFCT_FRAG6_CB(clone)->orig = NULL;
433 add_frag_mem_limit(&fq->q, clone->truesize); 433 add_frag_mem_limit(fq->q.net, clone->truesize);
434 } 434 }
435 435
436 /* We have to remove fragment header from datagram and to relocate 436 /* We have to remove fragment header from datagram and to relocate
@@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
454 head->csum = csum_add(head->csum, fp->csum); 454 head->csum = csum_add(head->csum, fp->csum);
455 head->truesize += fp->truesize; 455 head->truesize += fp->truesize;
456 } 456 }
457 sub_frag_mem_limit(&fq->q, head->truesize); 457 sub_frag_mem_limit(fq->q.net, head->truesize);
458 458
459 head->ignore_df = 1; 459 head->ignore_df = 1;
460 head->next = NULL; 460 head->next = NULL;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 8ffa2c8cce77..f1159bb76e0a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
144 144
145 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 145 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
146 146
147 if (fq->q.flags & INET_FRAG_EVICTED) 147 if (inet_frag_evicting(&fq->q))
148 goto out_rcu_unlock; 148 goto out_rcu_unlock;
149 149
150 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); 150 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
@@ -330,7 +330,7 @@ found:
330 fq->q.stamp = skb->tstamp; 330 fq->q.stamp = skb->tstamp;
331 fq->q.meat += skb->len; 331 fq->q.meat += skb->len;
332 fq->ecn |= ecn; 332 fq->ecn |= ecn;
333 add_frag_mem_limit(&fq->q, skb->truesize); 333 add_frag_mem_limit(fq->q.net, skb->truesize);
334 334
335 /* The first fragment. 335 /* The first fragment.
336 * nhoffset is obtained from the first fragment, of course. 336 * nhoffset is obtained from the first fragment, of course.
@@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
443 head->len -= clone->len; 443 head->len -= clone->len;
444 clone->csum = 0; 444 clone->csum = 0;
445 clone->ip_summed = head->ip_summed; 445 clone->ip_summed = head->ip_summed;
446 add_frag_mem_limit(&fq->q, clone->truesize); 446 add_frag_mem_limit(fq->q.net, clone->truesize);
447 } 447 }
448 448
449 /* We have to remove fragment header from datagram and to relocate 449 /* We have to remove fragment header from datagram and to relocate
@@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
481 } 481 }
482 fp = next; 482 fp = next;
483 } 483 }
484 sub_frag_mem_limit(&fq->q, sum_truesize); 484 sub_frag_mem_limit(fq->q.net, sum_truesize);
485 485
486 head->next = NULL; 486 head->next = NULL;
487 head->dev = dev; 487 head->dev = dev;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8fd9febaa5ba..8dab4e569571 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo)
613 if (signal_pending(current)) 613 if (signal_pending(current))
614 break; 614 break;
615 rc = 0; 615 rc = 0;
616 if (sk_wait_data(sk, &timeo)) 616 if (sk_wait_data(sk, &timeo, NULL))
617 break; 617 break;
618 } 618 }
619 return rc; 619 return rc;
@@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
802 release_sock(sk); 802 release_sock(sk);
803 lock_sock(sk); 803 lock_sock(sk);
804 } else 804 } else
805 sk_wait_data(sk, &timeo); 805 sk_wait_data(sk, &timeo, NULL);
806 806
807 if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { 807 if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
808 net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", 808 net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5d2b806a862e..38fbc194b9cb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
319 * return *ignored=0 i.e. ICMP and NF_DROP 319 * return *ignored=0 i.e. ICMP and NF_DROP
320 */ 320 */
321 sched = rcu_dereference(svc->scheduler); 321 sched = rcu_dereference(svc->scheduler);
322 dest = sched->schedule(svc, skb, iph); 322 if (sched) {
323 /* read svc->sched_data after svc->scheduler */
324 smp_rmb();
325 dest = sched->schedule(svc, skb, iph);
326 } else {
327 dest = NULL;
328 }
323 if (!dest) { 329 if (!dest) {
324 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 330 IP_VS_DBG(1, "p-schedule: no dest found.\n");
325 kfree(param.pe_data); 331 kfree(param.pe_data);
@@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
467 } 473 }
468 474
469 sched = rcu_dereference(svc->scheduler); 475 sched = rcu_dereference(svc->scheduler);
470 dest = sched->schedule(svc, skb, iph); 476 if (sched) {
477 /* read svc->sched_data after svc->scheduler */
478 smp_rmb();
479 dest = sched->schedule(svc, skb, iph);
480 } else {
481 dest = NULL;
482 }
471 if (dest == NULL) { 483 if (dest == NULL) {
472 IP_VS_DBG(1, "Schedule: no dest found.\n"); 484 IP_VS_DBG(1, "Schedule: no dest found.\n");
473 return NULL; 485 return NULL;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 285eae3a1454..24c554201a76 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
842 __ip_vs_dst_cache_reset(dest); 842 __ip_vs_dst_cache_reset(dest);
843 spin_unlock_bh(&dest->dst_lock); 843 spin_unlock_bh(&dest->dst_lock);
844 844
845 sched = rcu_dereference_protected(svc->scheduler, 1);
846 if (add) { 845 if (add) {
847 ip_vs_start_estimator(svc->net, &dest->stats); 846 ip_vs_start_estimator(svc->net, &dest->stats);
848 list_add_rcu(&dest->n_list, &svc->destinations); 847 list_add_rcu(&dest->n_list, &svc->destinations);
849 svc->num_dests++; 848 svc->num_dests++;
850 if (sched->add_dest) 849 sched = rcu_dereference_protected(svc->scheduler, 1);
850 if (sched && sched->add_dest)
851 sched->add_dest(svc, dest); 851 sched->add_dest(svc, dest);
852 } else { 852 } else {
853 if (sched->upd_dest) 853 sched = rcu_dereference_protected(svc->scheduler, 1);
854 if (sched && sched->upd_dest)
854 sched->upd_dest(svc, dest); 855 sched->upd_dest(svc, dest);
855 } 856 }
856} 857}
@@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1084 struct ip_vs_scheduler *sched; 1085 struct ip_vs_scheduler *sched;
1085 1086
1086 sched = rcu_dereference_protected(svc->scheduler, 1); 1087 sched = rcu_dereference_protected(svc->scheduler, 1);
1087 if (sched->del_dest) 1088 if (sched && sched->del_dest)
1088 sched->del_dest(svc, dest); 1089 sched->del_dest(svc, dest);
1089 } 1090 }
1090} 1091}
@@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1175 ip_vs_use_count_inc(); 1176 ip_vs_use_count_inc();
1176 1177
1177 /* Lookup the scheduler by 'u->sched_name' */ 1178 /* Lookup the scheduler by 'u->sched_name' */
1178 sched = ip_vs_scheduler_get(u->sched_name); 1179 if (strcmp(u->sched_name, "none")) {
1179 if (sched == NULL) { 1180 sched = ip_vs_scheduler_get(u->sched_name);
1180 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1181 if (!sched) {
1181 ret = -ENOENT; 1182 pr_info("Scheduler module ip_vs_%s not found\n",
1182 goto out_err; 1183 u->sched_name);
1184 ret = -ENOENT;
1185 goto out_err;
1186 }
1183 } 1187 }
1184 1188
1185 if (u->pe_name && *u->pe_name) { 1189 if (u->pe_name && *u->pe_name) {
@@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1240 spin_lock_init(&svc->stats.lock); 1244 spin_lock_init(&svc->stats.lock);
1241 1245
1242 /* Bind the scheduler */ 1246 /* Bind the scheduler */
1243 ret = ip_vs_bind_scheduler(svc, sched); 1247 if (sched) {
1244 if (ret) 1248 ret = ip_vs_bind_scheduler(svc, sched);
1245 goto out_err; 1249 if (ret)
1246 sched = NULL; 1250 goto out_err;
1251 sched = NULL;
1252 }
1247 1253
1248 /* Bind the ct retriever */ 1254 /* Bind the ct retriever */
1249 RCU_INIT_POINTER(svc->pe, pe); 1255 RCU_INIT_POINTER(svc->pe, pe);
@@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1291static int 1297static int
1292ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1298ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1293{ 1299{
1294 struct ip_vs_scheduler *sched, *old_sched; 1300 struct ip_vs_scheduler *sched = NULL, *old_sched;
1295 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1301 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1296 int ret = 0; 1302 int ret = 0;
1297 1303
1298 /* 1304 /*
1299 * Lookup the scheduler, by 'u->sched_name' 1305 * Lookup the scheduler, by 'u->sched_name'
1300 */ 1306 */
1301 sched = ip_vs_scheduler_get(u->sched_name); 1307 if (strcmp(u->sched_name, "none")) {
1302 if (sched == NULL) { 1308 sched = ip_vs_scheduler_get(u->sched_name);
1303 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1309 if (!sched) {
1304 return -ENOENT; 1310 pr_info("Scheduler module ip_vs_%s not found\n",
1311 u->sched_name);
1312 return -ENOENT;
1313 }
1305 } 1314 }
1306 old_sched = sched; 1315 old_sched = sched;
1307 1316
@@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1329 1338
1330 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1339 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1331 if (sched != old_sched) { 1340 if (sched != old_sched) {
1341 if (old_sched) {
1342 ip_vs_unbind_scheduler(svc, old_sched);
1343 RCU_INIT_POINTER(svc->scheduler, NULL);
1344 /* Wait all svc->sched_data users */
1345 synchronize_rcu();
1346 }
1332 /* Bind the new scheduler */ 1347 /* Bind the new scheduler */
1333 ret = ip_vs_bind_scheduler(svc, sched); 1348 if (sched) {
1334 if (ret) { 1349 ret = ip_vs_bind_scheduler(svc, sched);
1335 old_sched = sched; 1350 if (ret) {
1336 goto out; 1351 ip_vs_scheduler_put(sched);
1352 goto out;
1353 }
1337 } 1354 }
1338 /* Unbind the old scheduler on success */
1339 ip_vs_unbind_scheduler(svc, old_sched);
1340 } 1355 }
1341 1356
1342 /* 1357 /*
@@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1982 const struct ip_vs_iter *iter = seq->private; 1997 const struct ip_vs_iter *iter = seq->private;
1983 const struct ip_vs_dest *dest; 1998 const struct ip_vs_dest *dest;
1984 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 1999 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2000 char *sched_name = sched ? sched->name : "none";
1985 2001
1986 if (iter->table == ip_vs_svc_table) { 2002 if (iter->table == ip_vs_svc_table) {
1987#ifdef CONFIG_IP_VS_IPV6 2003#ifdef CONFIG_IP_VS_IPV6
@@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1990 ip_vs_proto_name(svc->protocol), 2006 ip_vs_proto_name(svc->protocol),
1991 &svc->addr.in6, 2007 &svc->addr.in6,
1992 ntohs(svc->port), 2008 ntohs(svc->port),
1993 sched->name); 2009 sched_name);
1994 else 2010 else
1995#endif 2011#endif
1996 seq_printf(seq, "%s %08X:%04X %s %s ", 2012 seq_printf(seq, "%s %08X:%04X %s %s ",
1997 ip_vs_proto_name(svc->protocol), 2013 ip_vs_proto_name(svc->protocol),
1998 ntohl(svc->addr.ip), 2014 ntohl(svc->addr.ip),
1999 ntohs(svc->port), 2015 ntohs(svc->port),
2000 sched->name, 2016 sched_name,
2001 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2017 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2002 } else { 2018 } else {
2003 seq_printf(seq, "FWM %08X %s %s", 2019 seq_printf(seq, "FWM %08X %s %s",
2004 svc->fwmark, sched->name, 2020 svc->fwmark, sched_name,
2005 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2021 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2006 } 2022 }
2007 2023
@@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2427{ 2443{
2428 struct ip_vs_scheduler *sched; 2444 struct ip_vs_scheduler *sched;
2429 struct ip_vs_kstats kstats; 2445 struct ip_vs_kstats kstats;
2446 char *sched_name;
2430 2447
2431 sched = rcu_dereference_protected(src->scheduler, 1); 2448 sched = rcu_dereference_protected(src->scheduler, 1);
2449 sched_name = sched ? sched->name : "none";
2432 dst->protocol = src->protocol; 2450 dst->protocol = src->protocol;
2433 dst->addr = src->addr.ip; 2451 dst->addr = src->addr.ip;
2434 dst->port = src->port; 2452 dst->port = src->port;
2435 dst->fwmark = src->fwmark; 2453 dst->fwmark = src->fwmark;
2436 strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); 2454 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2437 dst->flags = src->flags; 2455 dst->flags = src->flags;
2438 dst->timeout = src->timeout / HZ; 2456 dst->timeout = src->timeout / HZ;
2439 dst->netmask = src->netmask; 2457 dst->netmask = src->netmask;
@@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2892 struct ip_vs_flags flags = { .flags = svc->flags, 2910 struct ip_vs_flags flags = { .flags = svc->flags,
2893 .mask = ~0 }; 2911 .mask = ~0 };
2894 struct ip_vs_kstats kstats; 2912 struct ip_vs_kstats kstats;
2913 char *sched_name;
2895 2914
2896 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 2915 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2897 if (!nl_service) 2916 if (!nl_service)
@@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2910 } 2929 }
2911 2930
2912 sched = rcu_dereference_protected(svc->scheduler, 1); 2931 sched = rcu_dereference_protected(svc->scheduler, 1);
2932 sched_name = sched ? sched->name : "none";
2913 pe = rcu_dereference_protected(svc->pe, 1); 2933 pe = rcu_dereference_protected(svc->pe, 1);
2914 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || 2934 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
2915 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 2935 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
2916 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2936 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2917 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 2937 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 199760c71f39..7e8141647943 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
74 74
75 if (sched->done_service) 75 if (sched->done_service)
76 sched->done_service(svc); 76 sched->done_service(svc);
77 /* svc->scheduler can not be set to NULL */ 77 /* svc->scheduler can be set to NULL only by caller */
78} 78}
79 79
80 80
@@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
147 147
148void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) 148void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
149{ 149{
150 struct ip_vs_scheduler *sched; 150 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
151 char *sched_name = sched ? sched->name : "none";
151 152
152 sched = rcu_dereference(svc->scheduler);
153 if (svc->fwmark) { 153 if (svc->fwmark) {
154 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", 154 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
155 sched->name, svc->fwmark, svc->fwmark, msg); 155 sched_name, svc->fwmark, svc->fwmark, msg);
156#ifdef CONFIG_IP_VS_IPV6 156#ifdef CONFIG_IP_VS_IPV6
157 } else if (svc->af == AF_INET6) { 157 } else if (svc->af == AF_INET6) {
158 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", 158 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
159 sched->name, ip_vs_proto_name(svc->protocol), 159 sched_name, ip_vs_proto_name(svc->protocol),
160 &svc->addr.in6, ntohs(svc->port), msg); 160 &svc->addr.in6, ntohs(svc->port), msg);
161#endif 161#endif
162 } else { 162 } else {
163 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", 163 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
164 sched->name, ip_vs_proto_name(svc->protocol), 164 sched_name, ip_vs_proto_name(svc->protocol),
165 &svc->addr.ip, ntohs(svc->port), msg); 165 &svc->addr.ip, ntohs(svc->port), msg);
166 } 166 }
167} 167}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index b08ba9538d12..d99ad93eb855 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
612 pkts = atomic_add_return(1, &cp->in_pkts); 612 pkts = atomic_add_return(1, &cp->in_pkts);
613 else 613 else
614 pkts = sysctl_sync_threshold(ipvs); 614 pkts = sysctl_sync_threshold(ipvs);
615 ip_vs_sync_conn(net, cp->control, pkts); 615 ip_vs_sync_conn(net, cp, pkts);
616 } 616 }
617} 617}
618 618
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index bf66a8657a5f..258a0b0e82a2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
130 130
131 memset(&fl4, 0, sizeof(fl4)); 131 memset(&fl4, 0, sizeof(fl4));
132 fl4.daddr = daddr; 132 fl4.daddr = daddr;
133 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
134 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 133 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
135 FLOWI_FLAG_KNOWN_NH : 0; 134 FLOWI_FLAG_KNOWN_NH : 0;
136 135
@@ -505,6 +504,13 @@ err_put:
505 return -1; 504 return -1;
506 505
507err_unreach: 506err_unreach:
507 /* The ip6_link_failure function requires the dev field to be set
508 * in order to get the net (further for the sake of fwmark
509 * reflection).
510 */
511 if (!skb->dev)
512 skb->dev = skb_dst(skb)->dev;
513
508 dst_link_failure(skb); 514 dst_link_failure(skb);
509 return -1; 515 return -1;
510} 516}
@@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
523 if (ret == NF_ACCEPT) { 529 if (ret == NF_ACCEPT) {
524 nf_reset(skb); 530 nf_reset(skb);
525 skb_forward_csum(skb); 531 skb_forward_csum(skb);
532 if (!skb->sk)
533 skb_sender_cpu_clear(skb);
526 } 534 }
527 return ret; 535 return ret;
528} 536}
529 537
538/* In the event of a remote destination, it's possible that we would have
539 * matches against an old socket (particularly a TIME-WAIT socket). This
540 * causes havoc down the line (ip_local_out et. al. expect regular sockets
541 * and invalid memory accesses will happen) so simply drop the association
542 * in this case.
543*/
544static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
545{
546 /* If dev is set, the packet came from the LOCAL_IN callback and
547 * not from a local TCP socket.
548 */
549 if (skb->dev)
550 skb_orphan(skb);
551}
552
530/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 553/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
531static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, 554static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
532 struct ip_vs_conn *cp, int local) 555 struct ip_vs_conn *cp, int local)
@@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
538 ip_vs_notrack(skb); 561 ip_vs_notrack(skb);
539 else 562 else
540 ip_vs_update_conntrack(skb, cp, 1); 563 ip_vs_update_conntrack(skb, cp, 1);
564
565 /* Remove the early_demux association unless it's bound for the
566 * exact same port and address on this host after translation.
567 */
568 if (!local || cp->vport != cp->dport ||
569 !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
570 ip_vs_drop_early_demux_sk(skb);
571
541 if (!local) { 572 if (!local) {
542 skb_forward_csum(skb); 573 skb_forward_csum(skb);
574 if (!skb->sk)
575 skb_sender_cpu_clear(skb);
543 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, 576 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
544 NULL, skb_dst(skb)->dev, dst_output_sk); 577 NULL, skb_dst(skb)->dev, dst_output_sk);
545 } else 578 } else
546 ret = NF_ACCEPT; 579 ret = NF_ACCEPT;
580
547 return ret; 581 return ret;
548} 582}
549 583
@@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
557 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 591 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
558 ip_vs_notrack(skb); 592 ip_vs_notrack(skb);
559 if (!local) { 593 if (!local) {
594 ip_vs_drop_early_demux_sk(skb);
560 skb_forward_csum(skb); 595 skb_forward_csum(skb);
596 if (!skb->sk)
597 skb_sender_cpu_clear(skb);
561 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, 598 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
562 NULL, skb_dst(skb)->dev, dst_output_sk); 599 NULL, skb_dst(skb)->dev, dst_output_sk);
563 } else 600 } else
@@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
845 struct ipv6hdr *old_ipv6h = NULL; 882 struct ipv6hdr *old_ipv6h = NULL;
846#endif 883#endif
847 884
885 ip_vs_drop_early_demux_sk(skb);
886
848 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { 887 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
849 new_skb = skb_realloc_headroom(skb, max_headroom); 888 new_skb = skb_realloc_headroom(skb, max_headroom);
850 if (!new_skb) 889 if (!new_skb)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 13fad8668f83..651039ad1681 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
287 spin_unlock(&pcpu->lock); 287 spin_unlock(&pcpu->lock);
288} 288}
289 289
290/* Released via destroy_conntrack() */
291struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
292{
293 struct nf_conn *tmpl;
294
295 tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL);
296 if (tmpl == NULL)
297 return NULL;
298
299 tmpl->status = IPS_TEMPLATE;
300 write_pnet(&tmpl->ct_net, net);
301
302#ifdef CONFIG_NF_CONNTRACK_ZONES
303 if (zone) {
304 struct nf_conntrack_zone *nf_ct_zone;
305
306 nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC);
307 if (!nf_ct_zone)
308 goto out_free;
309 nf_ct_zone->id = zone;
310 }
311#endif
312 atomic_set(&tmpl->ct_general.use, 0);
313
314 return tmpl;
315#ifdef CONFIG_NF_CONNTRACK_ZONES
316out_free:
317 kfree(tmpl);
318 return NULL;
319#endif
320}
321EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
322
323static void nf_ct_tmpl_free(struct nf_conn *tmpl)
324{
325 nf_ct_ext_destroy(tmpl);
326 nf_ct_ext_free(tmpl);
327 kfree(tmpl);
328}
329
290static void 330static void
291destroy_conntrack(struct nf_conntrack *nfct) 331destroy_conntrack(struct nf_conntrack *nfct)
292{ 332{
@@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
298 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 338 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
299 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 339 NF_CT_ASSERT(!timer_pending(&ct->timeout));
300 340
341 if (unlikely(nf_ct_is_template(ct))) {
342 nf_ct_tmpl_free(ct);
343 return;
344 }
301 rcu_read_lock(); 345 rcu_read_lock();
302 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 346 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
303 if (l4proto && l4proto->destroy) 347 if (l4proto && l4proto->destroy)
@@ -540,28 +584,6 @@ out:
540} 584}
541EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 585EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
542 586
543/* deletion from this larval template list happens via nf_ct_put() */
544void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
545{
546 struct ct_pcpu *pcpu;
547
548 __set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
549 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
550 nf_conntrack_get(&tmpl->ct_general);
551
552 /* add this conntrack to the (per cpu) tmpl list */
553 local_bh_disable();
554 tmpl->cpu = smp_processor_id();
555 pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
556
557 spin_lock(&pcpu->lock);
558 /* Overload tuple linked list to put us in template list. */
559 hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
560 &pcpu->tmpl);
561 spin_unlock_bh(&pcpu->lock);
562}
563EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
564
565/* Confirm a connection given skb; places it in hash table */ 587/* Confirm a connection given skb; places it in hash table */
566int 588int
567__nf_conntrack_confirm(struct sk_buff *skb) 589__nf_conntrack_confirm(struct sk_buff *skb)
@@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net)
1751 spin_lock_init(&pcpu->lock); 1773 spin_lock_init(&pcpu->lock);
1752 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); 1774 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
1753 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); 1775 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
1754 INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
1755 } 1776 }
1756 1777
1757 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1778 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 7a17070c5dab..b45a4223cb05 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
219 a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; 219 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
220 } 220 }
221 221
222 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); 222 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
223 nf_ct_zone(a->master) == nf_ct_zone(b->master);
223} 224}
224 225
225static inline int expect_matches(const struct nf_conntrack_expect *a, 226static inline int expect_matches(const struct nf_conntrack_expect *a,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d1c23940a86a..6b8b0abbfab4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
2995 } 2995 }
2996 2996
2997 err = nf_ct_expect_related_report(exp, portid, report); 2997 err = nf_ct_expect_related_report(exp, portid, report);
2998 if (err < 0)
2999 goto err_exp;
3000
3001 return 0;
3002err_exp:
3003 nf_ct_expect_put(exp); 2998 nf_ct_expect_put(exp);
3004err_ct: 2999err_ct:
3005 nf_ct_put(ct); 3000 nf_ct_put(ct);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 789feeae6c44..71f1e9fdfa18 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net)
349static int __net_init synproxy_net_init(struct net *net) 349static int __net_init synproxy_net_init(struct net *net)
350{ 350{
351 struct synproxy_net *snet = synproxy_pernet(net); 351 struct synproxy_net *snet = synproxy_pernet(net);
352 struct nf_conntrack_tuple t;
353 struct nf_conn *ct; 352 struct nf_conn *ct;
354 int err = -ENOMEM; 353 int err = -ENOMEM;
355 354
356 memset(&t, 0, sizeof(t)); 355 ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
357 ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
358 if (IS_ERR(ct)) { 356 if (IS_ERR(ct)) {
359 err = PTR_ERR(ct); 357 err = PTR_ERR(ct);
360 goto err1; 358 goto err1;
@@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
365 if (!nfct_synproxy_ext_add(ct)) 363 if (!nfct_synproxy_ext_add(ct))
366 goto err2; 364 goto err2;
367 365
368 nf_conntrack_tmpl_insert(net, ct); 366 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
367 nf_conntrack_get(&ct->ct_general);
369 snet->tmpl = ct; 368 snet->tmpl = ct;
370 369
371 snet->stats = alloc_percpu(struct synproxy_stats); 370 snet->stats = alloc_percpu(struct synproxy_stats);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 75747aecdebe..c6630030c912 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -184,7 +184,6 @@ out:
184static int xt_ct_tg_check(const struct xt_tgchk_param *par, 184static int xt_ct_tg_check(const struct xt_tgchk_param *par,
185 struct xt_ct_target_info_v1 *info) 185 struct xt_ct_target_info_v1 *info)
186{ 186{
187 struct nf_conntrack_tuple t;
188 struct nf_conn *ct; 187 struct nf_conn *ct;
189 int ret = -EOPNOTSUPP; 188 int ret = -EOPNOTSUPP;
190 189
@@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
202 if (ret < 0) 201 if (ret < 0)
203 goto err1; 202 goto err1;
204 203
205 memset(&t, 0, sizeof(t)); 204 ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
206 ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
207 ret = PTR_ERR(ct); 205 ret = PTR_ERR(ct);
208 if (IS_ERR(ct)) 206 if (IS_ERR(ct))
209 goto err2; 207 goto err2;
@@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
227 if (ret < 0) 225 if (ret < 0)
228 goto err3; 226 goto err3;
229 } 227 }
230 228 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
231 nf_conntrack_tmpl_insert(par->net, ct); 229 nf_conntrack_get(&ct->ct_general);
232out: 230out:
233 info->ct = ct; 231 info->ct = ct;
234 return 0; 232 return 0;
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index f407ebc13481..29d2c31f406c 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
126 goto out; 126 goto out;
127 } 127 }
128 128
129 sysfs_attr_init(&info->timer->attr.attr);
129 info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); 130 info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
130 if (!info->timer->attr.attr.name) { 131 if (!info->timer->attr.attr.name) {
131 ret = -ENOMEM; 132 ret = -ENOMEM;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c9e8741226c6..ed458b315ef4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2403,7 +2403,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2403 } 2403 }
2404 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2404 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2405 addr, hlen); 2405 addr, hlen);
2406 if (tp_len > dev->mtu + dev->hard_header_len) { 2406 if (likely(tp_len >= 0) &&
2407 tp_len > dev->mtu + dev->hard_header_len) {
2407 struct ethhdr *ehdr; 2408 struct ethhdr *ehdr;
2408 /* Earlier code assumed this would be a VLAN pkt, 2409 /* Earlier code assumed this would be a VLAN pkt,
2409 * double-check this now that we have the actual 2410 * double-check this now that we have the actual
@@ -2784,7 +2785,7 @@ static int packet_release(struct socket *sock)
2784static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) 2785static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2785{ 2786{
2786 struct packet_sock *po = pkt_sk(sk); 2787 struct packet_sock *po = pkt_sk(sk);
2787 const struct net_device *dev_curr; 2788 struct net_device *dev_curr;
2788 __be16 proto_curr; 2789 __be16 proto_curr;
2789 bool need_rehook; 2790 bool need_rehook;
2790 2791
@@ -2808,15 +2809,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2808 2809
2809 po->num = proto; 2810 po->num = proto;
2810 po->prot_hook.type = proto; 2811 po->prot_hook.type = proto;
2811
2812 if (po->prot_hook.dev)
2813 dev_put(po->prot_hook.dev);
2814
2815 po->prot_hook.dev = dev; 2812 po->prot_hook.dev = dev;
2816 2813
2817 po->ifindex = dev ? dev->ifindex : 0; 2814 po->ifindex = dev ? dev->ifindex : 0;
2818 packet_cached_dev_assign(po, dev); 2815 packet_cached_dev_assign(po, dev);
2819 } 2816 }
2817 if (dev_curr)
2818 dev_put(dev_curr);
2820 2819
2821 if (proto == 0 || !need_rehook) 2820 if (proto == 0 || !need_rehook)
2822 goto out_unlock; 2821 goto out_unlock;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index af427a3dbcba..43ec92680ae8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a)
45} 45}
46EXPORT_SYMBOL(tcf_hash_destroy); 46EXPORT_SYMBOL(tcf_hash_destroy);
47 47
48int tcf_hash_release(struct tc_action *a, int bind) 48int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
49{ 49{
50 struct tcf_common *p = a->priv; 50 struct tcf_common *p = a->priv;
51 int ret = 0; 51 int ret = 0;
@@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind)
53 if (p) { 53 if (p) {
54 if (bind) 54 if (bind)
55 p->tcfc_bindcnt--; 55 p->tcfc_bindcnt--;
56 else if (p->tcfc_bindcnt > 0) 56 else if (strict && p->tcfc_bindcnt > 0)
57 return -EPERM; 57 return -EPERM;
58 58
59 p->tcfc_refcnt--; 59 p->tcfc_refcnt--;
@@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind)
64 ret = 1; 64 ret = 1;
65 } 65 }
66 } 66 }
67
67 return ret; 68 return ret;
68} 69}
69EXPORT_SYMBOL(tcf_hash_release); 70EXPORT_SYMBOL(__tcf_hash_release);
70 71
71static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, 72static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
72 struct tc_action *a) 73 struct tc_action *a)
@@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
136 head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; 137 head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
137 hlist_for_each_entry_safe(p, n, head, tcfc_head) { 138 hlist_for_each_entry_safe(p, n, head, tcfc_head) {
138 a->priv = p; 139 a->priv = p;
139 ret = tcf_hash_release(a, 0); 140 ret = __tcf_hash_release(a, false, true);
140 if (ret == ACT_P_DELETED) { 141 if (ret == ACT_P_DELETED) {
141 module_put(a->ops->owner); 142 module_put(a->ops->owner);
142 n_i++; 143 n_i++;
@@ -408,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind)
408 int ret = 0; 409 int ret = 0;
409 410
410 list_for_each_entry_safe(a, tmp, actions, list) { 411 list_for_each_entry_safe(a, tmp, actions, list) {
411 ret = tcf_hash_release(a, bind); 412 ret = __tcf_hash_release(a, bind, true);
412 if (ret == ACT_P_DELETED) 413 if (ret == ACT_P_DELETED)
413 module_put(a->ops->owner); 414 module_put(a->ops->owner);
414 else if (ret < 0) 415 else if (ret < 0)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1df78289e248..d0edeb7a1950 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -27,9 +27,10 @@
27struct tcf_bpf_cfg { 27struct tcf_bpf_cfg {
28 struct bpf_prog *filter; 28 struct bpf_prog *filter;
29 struct sock_filter *bpf_ops; 29 struct sock_filter *bpf_ops;
30 char *bpf_name; 30 const char *bpf_name;
31 u32 bpf_fd; 31 u32 bpf_fd;
32 u16 bpf_num_ops; 32 u16 bpf_num_ops;
33 bool is_ebpf;
33}; 34};
34 35
35static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 36static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
@@ -207,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
207 cfg->bpf_ops = bpf_ops; 208 cfg->bpf_ops = bpf_ops;
208 cfg->bpf_num_ops = bpf_num_ops; 209 cfg->bpf_num_ops = bpf_num_ops;
209 cfg->filter = fp; 210 cfg->filter = fp;
211 cfg->is_ebpf = false;
210 212
211 return 0; 213 return 0;
212} 214}
@@ -241,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
241 cfg->bpf_fd = bpf_fd; 243 cfg->bpf_fd = bpf_fd;
242 cfg->bpf_name = name; 244 cfg->bpf_name = name;
243 cfg->filter = fp; 245 cfg->filter = fp;
246 cfg->is_ebpf = true;
244 247
245 return 0; 248 return 0;
246} 249}
247 250
251static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
252{
253 if (cfg->is_ebpf)
254 bpf_prog_put(cfg->filter);
255 else
256 bpf_prog_destroy(cfg->filter);
257
258 kfree(cfg->bpf_ops);
259 kfree(cfg->bpf_name);
260}
261
262static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
263 struct tcf_bpf_cfg *cfg)
264{
265 cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
266 cfg->filter = prog->filter;
267
268 cfg->bpf_ops = prog->bpf_ops;
269 cfg->bpf_name = prog->bpf_name;
270}
271
248static int tcf_bpf_init(struct net *net, struct nlattr *nla, 272static int tcf_bpf_init(struct net *net, struct nlattr *nla,
249 struct nlattr *est, struct tc_action *act, 273 struct nlattr *est, struct tc_action *act,
250 int replace, int bind) 274 int replace, int bind)
251{ 275{
252 struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 276 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
277 struct tcf_bpf_cfg cfg, old;
253 struct tc_act_bpf *parm; 278 struct tc_act_bpf *parm;
254 struct tcf_bpf *prog; 279 struct tcf_bpf *prog;
255 struct tcf_bpf_cfg cfg;
256 bool is_bpf, is_ebpf; 280 bool is_bpf, is_ebpf;
257 int ret; 281 int ret;
258 282
@@ -301,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
301 prog = to_bpf(act); 325 prog = to_bpf(act);
302 spin_lock_bh(&prog->tcf_lock); 326 spin_lock_bh(&prog->tcf_lock);
303 327
328 if (ret != ACT_P_CREATED)
329 tcf_bpf_prog_fill_cfg(prog, &old);
330
304 prog->bpf_ops = cfg.bpf_ops; 331 prog->bpf_ops = cfg.bpf_ops;
305 prog->bpf_name = cfg.bpf_name; 332 prog->bpf_name = cfg.bpf_name;
306 333
@@ -316,32 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
316 343
317 if (ret == ACT_P_CREATED) 344 if (ret == ACT_P_CREATED)
318 tcf_hash_insert(act); 345 tcf_hash_insert(act);
346 else
347 tcf_bpf_cfg_cleanup(&old);
319 348
320 return ret; 349 return ret;
321 350
322destroy_fp: 351destroy_fp:
323 if (is_ebpf) 352 tcf_bpf_cfg_cleanup(&cfg);
324 bpf_prog_put(cfg.filter);
325 else
326 bpf_prog_destroy(cfg.filter);
327
328 kfree(cfg.bpf_ops);
329 kfree(cfg.bpf_name);
330
331 return ret; 353 return ret;
332} 354}
333 355
334static void tcf_bpf_cleanup(struct tc_action *act, int bind) 356static void tcf_bpf_cleanup(struct tc_action *act, int bind)
335{ 357{
336 const struct tcf_bpf *prog = act->priv; 358 struct tcf_bpf_cfg tmp;
337
338 if (tcf_bpf_is_ebpf(prog))
339 bpf_prog_put(prog->filter);
340 else
341 bpf_prog_destroy(prog->filter);
342 359
343 kfree(prog->bpf_ops); 360 tcf_bpf_prog_fill_cfg(act->priv, &tmp);
344 kfree(prog->bpf_name); 361 tcf_bpf_cfg_cleanup(&tmp);
345} 362}
346 363
347static struct tc_action_ops act_bpf_ops __read_mostly = { 364static struct tc_action_ops act_bpf_ops __read_mostly = {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 17e6d6669c7f..ff8b466a73f6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -68,13 +68,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
68 } 68 }
69 ret = ACT_P_CREATED; 69 ret = ACT_P_CREATED;
70 } else { 70 } else {
71 p = to_pedit(a);
72 tcf_hash_release(a, bind);
73 if (bind) 71 if (bind)
74 return 0; 72 return 0;
73 tcf_hash_release(a, bind);
75 if (!ovr) 74 if (!ovr)
76 return -EEXIST; 75 return -EEXIST;
77 76 p = to_pedit(a);
78 if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { 77 if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
79 keys = kmalloc(ksize, GFP_KERNEL); 78 keys = kmalloc(ksize, GFP_KERNEL);
80 if (keys == NULL) 79 if (keys == NULL)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 93d5742dc7e0..6a783afe4960 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch)
385{ 385{
386 struct choke_sched_data *q = qdisc_priv(sch); 386 struct choke_sched_data *q = qdisc_priv(sch);
387 387
388 while (q->head != q->tail) {
389 struct sk_buff *skb = q->tab[q->head];
390
391 q->head = (q->head + 1) & q->tab_mask;
392 if (!skb)
393 continue;
394 qdisc_qstats_backlog_dec(sch, skb);
395 --sch->q.qlen;
396 qdisc_drop(skb, sch);
397 }
398
399 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
400 q->head = q->tail = 0;
388 red_restart(&q->vars); 401 red_restart(&q->vars);
389} 402}
390 403
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 89f8fcf73f18..ade9445a55ab 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
216 .peek = qdisc_peek_head, 216 .peek = qdisc_peek_head,
217 .init = plug_init, 217 .init = plug_init,
218 .change = plug_change, 218 .change = plug_change,
219 .reset = qdisc_reset_queue,
219 .owner = THIS_MODULE, 220 .owner = THIS_MODULE,
220}; 221};
221 222
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1425ec2bbd5a..17bef01b9aa3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2200,12 +2200,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
2200 if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) 2200 if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
2201 return -EFAULT; 2201 return -EFAULT;
2202 2202
2203 if (sctp_sk(sk)->subscribe.sctp_data_io_event)
2204 pr_warn_ratelimited(DEPRECATED "%s (pid %d) "
2205 "Requested SCTP_SNDRCVINFO event.\n"
2206 "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n",
2207 current->comm, task_pid_nr(current));
2208
2209 /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, 2203 /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT,
2210 * if there is no data to be sent or retransmit, the stack will 2204 * if there is no data to be sent or retransmit, the stack will
2211 * immediately send up this notification. 2205 * immediately send up this notification.